#!/opt/bin/perl
use CGI;
use LWP::Simple;
use HTML::TokeParser;
$cgiobject=new CGI;
$cgiobject->use_named_parameters;
print $cgiobject->header;
print $cgiobject->start_html
(-title=>'Page Parser',
-bgcolor=>'white');
print $cgiobject->startform
(-method=>'get',
-action=>'parsepage.cgi');
print "URL to Analyze:".$cgiobject->textfield
(-name=>'url',
-size=>'40');
print "
".$cgiobject->submit(-value=>'Analyze');
print $cgiobject->endform;
print "
";
#retrieve web page
$fetchURL=$cgiobject->param("url");
unless ($fetchURL)
{$fetchURL="http://www.wdvl.com"}
$webPage=get($fetchURL);
print <$fetchURL
has been sliced and diced,
thus revealing:
ENDHTML
&parse_title;
&parse_meta_description;
&parse_meta_keywords;
&parse_images;
&parse_hyperlinks;
print $cgiobject->end_html;
sub parse_title{
#parse and output page title
$parser=HTML::TokeParser->new(\$webPage);
$parser->get_tag("title");
print "Page title
".
$parser->get_trimmed_text."";
}
sub parse_meta_keywords{
#parse and output meta data
$parser=HTML::TokeParser->new(\$webPage);
while (my $token=$parser->get_tag("meta"))
{ if ($token->[1]{name}=~/keywords/i)
{ print "Meta Keywords
".
$token->[1]{content}."" }
}
}
sub parse_meta_description{
#parse and output meta data
$parser=HTML::TokeParser->new(\$webPage);
while (my $token=$parser->get_tag("meta"))
{ if ($token->[1]{name}=~/description/i)
{ print "Meta Description
".
$token->[1]{content}."" }
}
}
sub parse_images{
#parse and count images
$parser=HTML::TokeParser->new(\$webPage);
my $imageTotal=0;
while ($parser->get_tag("img"))
{ $imageTotal++ }
print "Image Count
".
"Total = $imageTotal";
}
sub parse_hyperlinks{
#parse and output hyperlinks
$parser=HTML::TokeParser->new(\$webPage);
print "Hyperlink Summary
";
while (my $token = $parser->get_tag("a"))
{ my $linkURL = $token->[1]{href} || "-";
my $linkText = $parser->get_trimmed_text("/a");
if ($linkText=~/$linkText ".
"links to $linkURL
"
}
}