);
}
# Zero out every thing
$Total = $L200 = $L400 = $L500 = $Lother = $NotChecked = $LEmpty = 0;
%L4 = ("", "");
%L5 = ("", ""); # Empty the Hashes
# Get the page whose links we want to check:
#my $response = $browser->get($doc_url);
my $response = $ua->get($doc_url);
if($response->is_success) {
$document = $response->content; # get the Web page contents
$doc_url = $response->base();
#$ua = LWP::UserAgent->new;
# Remove all the line breaks and extra spaces. This makes it easier to
# display the links. Especially if they've been broken over 2 lines
# the order is important do not reverse these 2 lines
$document =~ s/\n/ /g; # replace newline with space
$document =~ s/\s{2,}/ /g; # replace multiple spaces with 1
# Parse the document
my $parser = HTML::TokeParser->new(\$document);
# Grab the title for reference
if($document =~ m{
(.*?)}i) {
$Title = $1;
} else {
$Title =" *** Unknown ***";
}
# Search for href (URL) and description text (let the parser handle it)
# ARGH!
while(my $token = $parser->get_tag("a")){
# This handles any of the
has a since we're
# not looking for it anyway.
# We're ignoring name= anchors, is name= & href= legal? that will hose
# this up
if(!($token->[1]{name})) {
my $text = $parser->get_trimmed_text("/a");
#print "\nText is: $text\n";
if($text ne "") {
my $url = $token->[1]{href} || "-";
#print "URL: $url\n";
#print "Text is: $text\n";
if($url ne "-") {
$Total++;
my $absolute_url = absolutize($url, $doc_url);
check_url($absolute_url, $text);
} else {
$LEmpty++;
printf "(
XXX)
%s A missing href\n", $text;
}
} else {
if(($n =$token->[1]{name})) {
print " *** NAME = $n (Oops)\n";
} else {
$LEmpty++;
printf "(
XXX)
A missing href and text.\n";
}
}
}
}
print "/==============================================================================\n";
print "* Title: $Title\n";
print "* Link: $doc_url\n\n";
print "* ============ =======\n";
print "* Total: $Total\n";
print "* 200's: $L200\n";
print "* 400's: $L400\n";
print "* 500's: $L500\n";
print "* Other: $Lother\n";
print "* Not checked: $NotChecked\n";
print "* Empty: $LEmpty\n";
print "* ============ =======\n\n";
if($L400 > 0) {
print "* -> 4xx list\n";
while(($lnk, $code) = each(%L4)) {
print qq(* -> (
$code )
$lnk\n);
}
print "*\n";
}
if($L500 > 0) {
print "* -> 5xx list\n";
while(($lnk, $code) = each(%L5)) {
if($lnk) {
print qq(* -> (
$code )
$lnk\n);
}
}
print "*\n";
}
print "