import urllib.request import re import os import sys from collections import deque if __name__ == "__main__": url = "http://www.tuigirl8.com/forum/view/" queue = deque() for i in range(1,2000): queue.append(url+str(i)) cnt = 0 while queue: url = queue.popleft() print(str(cnt)+url) cnt += 1 targetDir = r"D:/%d"%cnt if not os.path.isdir(targetDir): os.mkdir(targetDir) req = urllib.request.Request(url) try: webpage = urllib.request.urlopen(req,timeout=2) contentBytes = webpage.read() except: continue linkre = re.compile(r'(http:[^\s]*?(jpg|gif|png))') for link,t in linkre.findall(str(contentBytes)): print(link) pos = link.rindex('/') t = os.path.join(targetDir,link[pos+1:]) urllib.request.urlretrieve(link,t)