Test email

Checking: $t

$doc_url\n

); } # Zero out every thing $Total = $L200 = $L400 = $L500 = $Lother = $NotChecked = $LEmpty = 0; %L4 = ("", ""); %L5 = ("", ""); # Empty the Hashes # Get the page whose links we want to check: my $response = $ua->get($doc_url); if($response->is_success) { # Parsing of undecoded UTF-8 will give garbage when decoding entities at # /usr/lib/perl5/vendor_perl/5.8.8/LWP/Protocol.pm line 114, line 20. # if ($response->header('Content-Type') && $response->header('Content-Type') =~ m/charset=(\S+),/xms) { my $encoding = "$1"; print "Encoding: $encoding\n" if ($dbgme); $document = $response->content(Encode::decode($encoding, $response->content)); #$document = $response->content; } else { $document = $response->content; # get the Web page contents } $doc_url = $response->base(); #$ua = LWP::UserAgent->new; # Remove all the line breaks and extra spaces. This makes it easier to # display the links. Especially if they've been broken over 2 lines # the order is important do not reverse these 2 lines $document =~ s/\n/ /g; # replace newline with space $document =~ s/\s{2,}/ /g; # replace multiple spaces with 1 # Parse the document my $parser = HTML::TokeParser->new(\$document); # Grab the title for reference if($document =~ m{(.*?)}i) { $Title = $1; } else { $Title =" *** Unknown ***"; } # Search for href (URL) and description text (let the parser handle it) # ARGH! while(my $token = $parser->get_tag("a")){ # This handles any of the has a since we're # not looking for it anyway. # We're ignoring name= anchors, is name= & href= legal? that will hose # this up if(!($token->[1]{name})) { my $text = $parser->get_trimmed_text("/a"); if($text ne "") { my $url = $token->[1]{href} || "-"; print STDERR "URL: $url\n" if($dbgme); #print "Text is: $text\n"; if($url ne "-") { $Total++; my $absolute_url = absolutize($url, $doc_url); check_url($absolute_url, $text); } else { $LEmpty++; $output .= sprintf "(XXX) %s A missing href\n", $text; } } else { if(($n =$token->[1]{name})) { $output .= " *** NAME = $n (Oops)\n"; } else { $LEmpty++; $output .= sprintf "(XXX) A missing href and text.\n"; } } } } $output .= "/==============================================================================\n"; $output .= "* Title: $Title\n"; $output .= "* Link: $doc_url\n\n"; $output .= "* ============ =======\n"; $output .= "* Total: $Total\n"; $output .= "* 200's: $L200\n"; $output .= "* 400's: $L400\n"; $output .= "* 500's: $L500\n"; $output .= "* Other: $Lother\n"; $output .= "* Not checked: $NotChecked\n"; $output .= "* Empty: $LEmpty\n"; $output .= "* ============ =======\n\n"; if($L400 > 0) { $output .= "* -> 4xx list\n"; while(($lnk, $code) = each(%L4)) { $output .= qq(* -> ( $code ) $lnk\n); } $output .= "*\n"; } if($L500 > 0) { $output .= "* -> 5xx list\n"; while(($lnk, $code) = each(%L5)) { if($lnk) { $output .= qq(* -> ( $code ) $lnk\n); } } $output .= "*\n"; } $output .= "