#!/usr/bin/python # -*- coding: utf-8 -*- import os import re import shutil import httplib import traceback import threading title_re = re.compile('<title>(.+?)</title>', re.DOTALL) album_re = re.compile('<td class="item" valign="top">所属专辑:</td>.*?title="(?P<album>.+?)">', re.DOTALL) max_threads = 20 cache_filename = 'cache.txt' cache = {} def listFiles(rootDir, ext = None): list_dirs = os.walk(rootDir) list_ret = [] for root, dirs, files in list_dirs: for f in files: if not ext is None: if not f.endswith(ext): continue list_ret.append(os.path.join(root, f)) return list_ret def filterFiles(lst): global cache def _filter(x): x = os.path.basename(x) if x.find('.') >= 0: return False try: int(x) except: return False if x in cache: if os.path.exists(cache[x]): return False return True return filter(_filter, lst) def readHTTPInfo(uri): try: conn = httplib.HTTPConnection("www.xiami.com") url = '/song/%s' % uri conn.request("GET", url) r = conn.getresponse() if r.status != 200: print r.status, r.reason # print r.read() return '', '' data = r.read() conn.close() match = title_re.search(data) if match: result = match.group(1) pos2 = result.find('-') pos = result.find(',', pos2) if pos > 0: result = result[:pos] pos = result.rfind('-') if pos > 0: return result[:pos].strip(), result[pos+1:].strip() print result.strip() else: print 'no title' return '', '' except Exception: traceback.print_exc() return '', '' def utf2gbk(s): return s.decode('utf8').encode('gbk') def copy2RenameMp3(filename, idx = 0): uri = os.path.basename(filename) name, author = readHTTPInfo(uri) mp3name = utf2gbk('%s - %s.mp3' % (author, name)) print filename, idx, '->', mp3name if len(name) > 0 and len(author): cache[uri] = mp3name shutil.copy(filename, mp3name) def readCache(): global cache try: fp = open(cache_filename, 'rb') lines = fp.readlines() for x in lines: pos = x.find(' ') if pos >= 0: cache[x[:pos]] = x[pos + 1:].strip() fp.close() except: pass def writeCache(): global cache if len(cache) == 0: return fp = open(cache_filename, 'wb') lines = ['%s %s' % (k, v) for k, v in cache.iteritems()] fp.write('\n'.join(lines)) fp.close() def main(): readCache() list_files = listFiles('.') list_files = filterFiles(list_files) threads = [] for filename in list_files: copy2RenameMp3(filename) # th = threading.Thread(target = copy2RenameMp3, args = (filename, len(threads) + 1), name = filename) # threads.append(th) if len(threads) > max_threads: map(lambda th: th.start(), threads) map(lambda th: th.join(), threads) threads = [] if threads: map(lambda th: th.start(), threads) map(lambda th: th.join(), threads) writeCache() if __name__ == '__main__': main()