use LWP::UserAgent; use HTML::TreeBuilder; use LWP::Simple; use URI; use Encode; @list_url=(); @download_url=(); foreach (1..16)#在新浪微盘里面搜索perl会有16个页面的结果 { my $url = URI->new('http://vdisk.weibo.com/search/'); my($keyword,$sortby,$page) = ("perl","default",$_);#对perl这个关键词做测试 $url->query_form ( # All form pairs: 'keyword' => $keyword, 'sortby' => $sortby, 'page' => $page, ); push @list_url,$url; } my $ua = LWP::UserAgent->new; #open fh,">aa.txt"; foreach (@list_url)#对我们自己合成的目标url做循环爬取适合的链接 { my $response = $ua->get($_); $html=$response->content; my $tree = HTML::TreeBuilder->new; # empty tree $tree->parse($html) or print "error : parse html "; @pdf_name=$tree->find_by_attribute("class","sort_name_intro") or print "error : cannot find pdf_name "; foreach (@pdf_name) { $node=$_->look_down(_tag=>'a'); $a=$node->attr('href'); $b=encode("cp936", decode("utf-8",$node->attr('title'))); $c="$a\t$b"; push @download_url,$c;#把目标链接的url及文件名添加到下载列表 } } foreach (@download_url) { @tmp=split; $html=get($tmp[0]); $html=~/fileDown\.init.*?\"url\":\"(.*?)\",/;#这个是关键,我找了半天才找到该页面的真实url地址 $a=$1; $a=~s/\\//g; print $a; getstore("$a","$tmp[1]"); }