#!/opt/bin/perl use CGI; use LWP::Simple; use HTML::TokeParser; $cgiobject=new CGI; $cgiobject->use_named_parameters; print $cgiobject->header; print $cgiobject->start_html (-title=>'Page Parser', -bgcolor=>'white'); print $cgiobject->startform (-method=>'get', -action=>'parsepage.cgi'); print "URL to Analyze:".$cgiobject->textfield (-name=>'url', -size=>'40'); print "
".$cgiobject->submit(-value=>'Analyze'); print $cgiobject->endform; print "
"; #retrieve web page $fetchURL=$cgiobject->param("url"); unless ($fetchURL) {$fetchURL="http://www.wdvl.com"} $webPage=get($fetchURL); print <

$fetchURL
has been sliced and diced, thus revealing:

ENDHTML &parse_title; &parse_meta_description; &parse_meta_keywords; &parse_images; &parse_hyperlinks; print $cgiobject->end_html; sub parse_title{ #parse and output page title $parser=HTML::TokeParser->new(\$webPage); $parser->get_tag("title"); print "

Page title

". $parser->get_trimmed_text."

"; } sub parse_meta_keywords{ #parse and output meta data $parser=HTML::TokeParser->new(\$webPage); while (my $token=$parser->get_tag("meta")) { if ($token->[1]{name}=~/keywords/i) { print "

Meta Keywords

". $token->[1]{content}."

" } } } sub parse_meta_description{ #parse and output meta data $parser=HTML::TokeParser->new(\$webPage); while (my $token=$parser->get_tag("meta")) { if ($token->[1]{name}=~/description/i) { print "

Meta Description

". $token->[1]{content}."

" } } } sub parse_images{ #parse and count images $parser=HTML::TokeParser->new(\$webPage); my $imageTotal=0; while ($parser->get_tag("img")) { $imageTotal++ } print "

Image Count

". "Total = $imageTotal

"; } sub parse_hyperlinks{ #parse and output hyperlinks $parser=HTML::TokeParser->new(\$webPage); print "

Hyperlink Summary

"; while (my $token = $parser->get_tag("a")) { my $linkURL = $token->[1]{href} || "-"; my $linkText = $parser->get_trimmed_text("/a"); if ($linkText=~/$linkText ". "links to $linkURL
" } }