);
}
# Zero out every thing
$Total = $L200 = $L400 = $L500 = $Lother = $NotChecked = $LEmpty = 0;
%L4 = ("", "");
%L5 = ("", ""); # Empty the Hashes
# Get the page whose links we want to check:
my $response = $ua->get($doc_url);
if($response->is_success) {
$document = $response->content; # get the Web page contents
$doc_url = $response->base();
# Remove all the line breaks and extra spaces. This makes it easier to
# display the links. Especially if they've been broken over 2 lines
# the order is important do not reverse these 2 lines
$document =~ s/\n/ /g; # replace newline with space
$document =~ s/\s{2,}/ /g; # replace multiple spaces with 1
# Parse the document
my $parser = HTML::TokeParser->new(\$document);
# Grab the title for reference
if($document =~ m{
(.*?)}i) {
$Title = $1;
} else {
$Title =" *** Unknown ***";
}
# Search for href (URL) and description text (let the parser handle it)
# ARGH!
while(my $token = $parser->get_tag("a")){
# This handles any of the
has a since we're
# not looking for it anyway.
# We're ignoring name= anchors, is name= & href= legal? that will hose
# this up
if(!($token->[1]{name})) {
my $text = $parser->get_trimmed_text("/a");
if($text ne "") {
my $url = $token->[1]{href} || "-";
print STDERR "URL: $url ($text)\n" if($dbgme);
if($url ne "-") {
$Total++;
my $absolute_url = absolutize($url, $doc_url);
check_url($absolute_url, $text);
} else {
$LEmpty++;
$output .= sprintf "(
XXX)
%s A missing href\n", $text;
}
} else {
if(($n = $token->[1]{name})) {
$output .= " *** NAME = $n (Oops)\n";
} else {
$LEmpty++;
$output .= sprintf "(
XXX)
A missing href and text.\n";
}
}
}
}
$output .= "/==============================================================================\n";
$output .= "* Title: $Title\n";
$output .= "* Link: $doc_url\n\n";
$output .= "* ============ =======\n";
$output .= "* 200's: $L200\n";
$output .= "* 400's: $L400\n";
$output .= "* 500's: $L500\n";
$output .= "* Other: $Lother\n";
$output .= "* Empty: $LEmpty\n";
$output .= "* Total: $Total\n";
$output .= "* ------------ -------\n";
$output .= "* Not checked: $NotChecked\n";
$output .= "* ============ =======\n\n";
if($L400 > 0) {
$output .= "* -> 4xx list\n";
while(($lnk, $code) = each(%L4)) {
$output .= qq(* -> (
$code )
$lnk\n);
}
$output .= "*\n";
}
if($L500 > 0) {
$output .= "* -> 5xx list\n";
while(($lnk, $code) = each(%L5)) {
if($lnk) {
$output .= qq(* -> (
$code )
$lnk\n);
}
}
$output .= "*\n";
}
$output .= "