#!/usr/bin/env python # encoding=utf8 from HTMLParser import HTMLParser import os import requests import shutil __author__ = 'sofn' urlpath = "http://www.360kb.com/kb/2_150.html" hostsBack = "/etc/hosts_back" hosts = "/etc/hosts" class MyHTMLParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.hosts = [] self.got = False self.start = False self.onehost = {} self.first = False def handle_startendtag(self, tag, attrs): if tag == "br" and len(attrs) == 0: self.got = True self.first = True def handle_data(self, data): data = data.strip() if self.got and self.start and data: if self.first: self.onehost["ip"] = data self.first = False else: self.onehost["host"] = data self.hosts.append(self.onehost) self.onehost = {} if not self.start and data.startswith("#google hosts"): self.start = True if self.start and data == "#google hosts 2015 end": self.start = False if __name__ == "__main__": text = requests.get(urlpath).text hp = MyHTMLParser() hp.feed(text) hp.close() datas = hp.hosts if not os.path.exists(hostsBack): print("backup hosts") shutil.copy(hosts, hostsBack) else: if os.path.exists(hosts): os.remove(hosts) hostsFile = open(hosts, "a") with open(hostsBack) as f: print("write base") hostsFile.write(f.read()) hostsFile.close() hostsFile = open(hosts, "a") for one in datas: ip = one.get("ip") host = one.get("host") if ip and host and len(ip.split(".")) == 4: print("append " + str(ip) + " " + str(host)) hostsFile.write(ip + " " + host + "\n") hostsFile.close()