#!/opt/bin/perl # ________________________________________________________________ # /\ /\ The Web Developer's Virtual Library # -{-<*>-}- World-Wide Web # __\/_\/_________________________________________________________ # Author : Alan Richmond # Purpose : Check a list of HTML files for problematic links. # Usage : vlinks.pl < [list of filenames] # Comment : Output in broken.html # Disclaimer: This software is provided freely on the understanding # that the Author will not be held responsible for any # problems arising from it's use, and that there is no support. # ________________________________________________________________ # use HTML::Parse; use LWP::Simple; use URI::URL; %link_elements = ( 'a' => 'href', 'img' => 'src', 'form' => 'action', 'link' => 'href', ); $base = "/www/wdvl/wdvl"; # $debug = 1; while (<>) { chop ; &parse ($_) unless /\/x\//; } foreach (keys %link) { print "check $_\n" if $debug; if ($_ =~ /^http:/) { if (!head($_)) { if (!$ft) { $ft = 1; open (OUT, ">broken.html")||die$!; print OUT "Broken Links
    \n"; } print OUT "
  1. $_:\n$link{$_}\n"; } } } if (!ft) { print OUT "
"; close (OUT); } sub parse { ( $file ) = @_; print "file: $file:\n" if $debug; my $h = parse_htmlfile($file); $BASE = "http://WDVL.Internet.com/$file"; print "BASE: $BASE:\n" if $debug; $h->traverse(\&expand_urls, 1); # print $h->as_HTML; for (@{ $h->extract_links(qw(a img form)) }) { ($link, $linkelem) = @$_; $link{$link} .= " $file" unless $link =~ /^mailto/; print " $link\n" if $debug; } } sub expand_urls { my($e, $start) = @_; return 1 unless $start; my $attr = $link_elements{$e->tag}; return 1 unless defined $attr; my $url = $e->attr($attr); return 1 unless defined $url; $e->attr($attr, url($url, $BASE)->abs->as_string); }