虾米XMusicCache歌曲批量命名

#!/usr/bin/python
# -*- coding: utf-8 -*-

import os
import re
import shutil
import httplib
import traceback
import threading


title_re = re.compile('<title>(.+?)</title>', re.DOTALL)
album_re = re.compile('<td class="item" valign="top">所属专辑:</td>.*?title="(?P<album>.+?)">', re.DOTALL)
max_threads = 20
cache_filename = 'cache.txt'
cache = {}

def listFiles(rootDir, ext = None): 
	list_dirs = os.walk(rootDir) 
	list_ret = []
	for root, dirs, files in list_dirs: 
		for f in files:
			if not ext is None:
				if not f.endswith(ext):
					continue
			list_ret.append(os.path.join(root, f))
	return list_ret

def filterFiles(lst):
	global cache
	def _filter(x):
		x = os.path.basename(x)
		if x.find('.') >= 0:
			return False
		try:
			int(x)
		except:
			return False
		if x in cache:
			if os.path.exists(cache[x]):
				return False
		return True
	return filter(_filter, lst)

def readHTTPInfo(uri):
	try:
		conn = httplib.HTTPConnection("www.xiami.com")
		url = '/song/%s' % uri
		conn.request("GET", url)
		r = conn.getresponse()
		if r.status != 200:
			print r.status, r.reason
			# print r.read()
			return '', ''

		data = r.read()
		conn.close()

		match = title_re.search(data)
		if match:
			result = match.group(1)
			pos2 = result.find('-')
			pos = result.find(',', pos2)
			if pos > 0:
				result = result[:pos]
			pos = result.rfind('-')
			if pos > 0:
				return result[:pos].strip(), result[pos+1:].strip()
			print result.strip()
		else:
			print 'no title'
			return '', ''
	except Exception:
		traceback.print_exc()
	return '', ''

def utf2gbk(s):
	return s.decode('utf8').encode('gbk')

def copy2RenameMp3(filename, idx = 0):
	uri = os.path.basename(filename)
	name, author = readHTTPInfo(uri)
	mp3name = utf2gbk('%s - %s.mp3' % (author, name))
	print filename, idx, '->', mp3name
	if len(name) > 0 and len(author):
		cache[uri] = mp3name
	shutil.copy(filename, mp3name)

def readCache():
	global cache
	try:
		fp = open(cache_filename, 'rb')
		lines = fp.readlines()
		for x in lines:
			pos = x.find(' ')
			if pos >= 0:
				cache[x[:pos]] = x[pos + 1:].strip()
		fp.close()
	except:
		pass

def writeCache():
	global cache
	if len(cache) == 0:
		return
	fp = open(cache_filename, 'wb')
	lines = ['%s %s' % (k, v) for k, v in cache.iteritems()]
	fp.write('\n'.join(lines))
	fp.close()

def main():
	readCache()
	list_files = listFiles('.')
	list_files = filterFiles(list_files)
	threads = []
	for filename in list_files:
		copy2RenameMp3(filename)
		# th = threading.Thread(target = copy2RenameMp3, args = (filename, len(threads) + 1), name = filename)
		# threads.append(th)
		if len(threads) > max_threads:
			map(lambda th: th.start(), threads)
			map(lambda th: th.join(), threads)
			threads = []

	if threads:
		map(lambda th: th.start(), threads)
		map(lambda th: th.join(), threads)
	writeCache()

if __name__ == '__main__':
	main()

编程技巧