#!/usr/public/bin/perl # $Id: testlinks,v 1.2 1994/09/21 01:23:18 fielding Exp $ # --------------------------------------------------------------------------- # GET and extract the links from the URLs passed as arguments, test them # using HEAD requests, and output an HTML index fragment describing the # results. Relative links are resolved relative to the URL $base. # # Note that this is a non-recursive, completely inefficient version # of MOMspider's index without the visual cues for problem links. See # for more information. # # 21 Apr 1994 (RTF): Initial Version # 12 Jul 1994 (RTF): Rewritten to work with libwww-perl # 20 Jul 1994 (RTF): The default From header is now set by www.pl # and &www'set_def_header() is called to set User-Agent. # Added to libwww-perl distribution. # 20 Sep 1994 (RTF): Added initialization of $headers # # Created by Roy Fielding to test MOMspider and the libwww-perl system #----------------------------------------------------------------- if ($libloc = $ENV{'LIBWWW_PERL'}) { unshift(@INC, $libloc); } require "www.pl"; require "wwwurl.pl"; require "wwwhtml.pl"; require "wwwerror.pl"; require "wwwdates.pl"; $pname = $0; # Method = program name $pname =~ s#^.*/([^/]+)$#$1#; # lose the path &www'set_def_header('http', 'User-Agent', "$pname/0.3"); # Set up User-Agent: header $pwd = ( $ENV{'PWD'} || $ENV{'cwd'} || '' ); $base = "file://localhost$pwd/"; # Set up initial Base URL $vidx = 'tl0001'; #----------------------------------------------------------------- while ($ARGV[0]) { $rel = shift; $url = &wwwurl'absolute($base, $rel); $content = ''; $headers = ''; %headers = (); $response = &www'request('GET', $url, *headers, *content, 30); @TestLinks = (); @TestAbs = (); @TestOrig = (); @TestType = (); &wwwhtml'extract_links($url, *headers, *content, *TestLinks, *TestAbs, *TestOrig, *TestType); # Now print out the index entry for this URL $nextbit = ($headers{'title'} || $url); print "

$nextbit

\n"; $vidx++; print "$response $wwwerror'RespMessage{$response}\n", "\n$url"; if ($nextbit = ($headers{'uri'} || $headers{'location'})) { print "
\nURI: $nextbit"; } if ($nextbit = $headers{'last-modified'}) { print "
\nLast-modified: $nextbit"; } if ($nextbit = $headers{'expires'}) { print "
\nExpires: $nextbit"; } if ($nextbit = $headers{'reply-to'}) { print "
\nReply-to: $nextbit"; } print "\n"; undef $content; undef $headers; undef %headers; if ($TestLinks[0]) { print "\n"; } print "\n"; undef @TestLinks; undef @TestAbs; undef @TestOrig; undef @TestType; } exit(0); sub test_child { local($parent, $link, $labs, $lorig) = @_; local($response, $nextbit) = 0; local($content) = ''; local($headers) = ''; local(%headers) = (); if ($parent) { $headers{'Referer'} = $parent; } if ($link =~ /^http/) { sleep(20); } $response = &www'request('HEAD', $link, *headers, *content, 30); print " $response $wwwerror'RespMessage{$response}\n", " \n $lorig"; if ($nextbit = ($headers{'uri'} || $headers{'location'})) { print "
\n URI: $nextbit"; } if ($nextbit = $headers{'last-modified'}) { print "
\n Last-modified: $nextbit"; } if ($nextbit = $headers{'expires'}) { print "
\n Expires: $nextbit"; } if ($nextbit = $headers{'reply-to'}) { print "
\n Reply-To: $nextbit"; } print "\n"; }