下面的代码可以自动检索本地网页中的连接,如果加以修改你也可以把它做成一个网络上的web爬虫
use HTTP::Request;
use LWP::UserAgent;
$content= getContent('http://127.0.0.1/index.htm');
#保存页面
#open(HTML,">C:\temp\index.htm");
#print HTML $content;
#close HTML;
@html=split(/n/, $content);
for ( @html ) {
#得到页面中所有链接
do{
$2 and push (@link,$2);
} while m{
< s*
A s+ HREF s* = s* (["']) (.*?) 1
s* >
}gsix;
}
print "本页面中所有的链接为:n" ;
for ( @link ) {
print $_,"n";
}
print "访问本页面中所有的本地链接:n" ;
for ( @link ) {
if ( !(m~^http://~) ) {
$url='http://127.0.0.1/'.$_;
$content=getContent($url);
print <<"EOF";
$url的内容是:
$content
EOF
}
if ((m~^http://127.0.0.1~) or (m~^http://localhost~) ) {
$content=getContent($_);
print <<"EOF";
$url的内容是:
$content
EOF
}
}
############################################################################
sub getContent #8/8/00 1:45PM
############################################################################
{
my $url=shift;
my $ua=new LWP::UserAgent();
my $request = new HTTP::Request('GET', "$url");
my $response = $ua->request($request);
my $content= $response->content;
return $content;
} ##getContent