# import the necessary packages import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import SGDClassifier from sklearn.svm import LinearSVC from sklearn.neighbors import KNeighborsClassifier from sklearn.cross_validation import train_test_split from sklearn.metrics import classification_report from sklearn import datasets from sklearn.decomposition import PCA as pca from nolearn.dbn import DBN from matplotlib import pyplot from PIL import Image import numpy as np import scipy STANDARD_SIZE = (28, 28) class DigitProphet(object): def __init__(self): # load train.csv # train = pd.read_csv("data/train.csv") # data_train=train.as_matrix() # values_train=data_train[:,0] # images_train=data_train[:,1:] # trainX, _trainX, trainY, _trainY = train_test_split(images_train/255.,values_train,test_size=0.5) # #load test.csv # test = pd.read_csv("data/test.csv") # data_test=test.as_matrix() # testX, _testX = train_test_split(data_test/255.,test_size=0.99) # Random Forest # self.clf = RandomForestClassifier() # Stochastic Gradient Descent # self.clf = SGDClassifier() # Support Vector Machine # self.clf = LinearSVC() # Nearest Neighbors # self.clf = KNeighborsClassifier(n_neighbors=13) train = pd.read_csv("data/train.csv") data_train=train.as_matrix() values_train=data_train[:,0] images_train=data_train[:,1:] trainX, _trainX, trainY, _trainY = train_test_split(images_train/255.,values_train,test_size=0.995) # Neural Network self.clf = DBN([trainX.shape[1], 300, 10],learn_rates=0.3,learn_rate_decays=0.9,epochs=10,verbose = 1) #Training self.clf.fit(trainX, trainY) pass def predictImage(self,array): image=np.atleast_2d(array) return self.clf.predict(image)[0] def trim(image): image_data = np.array(image) image_data_bw = image_data.min(axis=2) row_min = np.where(image_data_bw.min(axis=1)<255)[0].min() row_max = np.where(image_data_bw.min(axis=1)<255)[0].max() col_min = np.where(image_data_bw.min(axis=0)<255)[0].min() col_max = np.where(image_data_bw.min(axis=0)<255)[0].max() size=int((max(row_max-row_min,col_max-col_min))*1.3) cropBox = (row_min, row_max, col_min, col_max) image_data_new = image_data[cropBox[0]:cropBox[1]+1, cropBox[2]:cropBox[3]+1 , :] new_image = Image.fromarray(image_data_new) img_w, img_h = new_image.size background = Image.new('RGBA', (size, size), (255, 255, 255, 255)) bg_w, bg_h = background.size offset = ((bg_w-img_w)/2,(bg_h-img_h)/2) background.paste(new_image, offset) return background def getimgdata(filename): img = Image.open(filename) img=alpha_to_color(img) img = trim(img) img = img.convert('L') img = img.getdata() img = img.resize(STANDARD_SIZE) img = np.array(img)/255. img = [1-i for i in img] return img def alpha_to_color(image, color=(255, 255, 255)): x = np.array(image) r, g, b, a = np.rollaxis(x, axis=-1) r[a == 0] = color[0] g[a == 0] = color[1] b[a == 0] = color[2] x = np.dstack([r, g, b, a]) return Image.fromarray(x, 'RGBA') def saveImage(array,path='outfile.jpg'): # Get the training data back to its original form. matrix = np.reshape(array, (STANDARD_SIZE)) # Get the original pixel values. matrix = matrix*255. # pyplot.imshow(sample, cmap = pyplot.cm.gray) # result=predictImg(clf,image) scipy.misc.imsave(path, matrix) dp=DigitProphet() pointer=0 def main(): # filename="imageToSave.png" # data=getimgdata(filename) # saveImage(data) # preds=dp.predictImage(data) # print preds pass if __name__ == '__main__': main()