# -*- coding: utf-8 -*- """ Created on Fri Nov 10 11:18:31 2017 @author: Shiro """ from sklearn.decomposition import PCA import math import numpy as np import cv2 def load_train_from_file(file): f = open(file, 'r') data = [line.split() for line in f] size = cv2.imread(data[0][0], 0).shape width = size[1] height = size[0] Y = np.zeros((len(data), ), dtype=int) X = [] print('Shape Y : ', Y.shape) for i, ldata in enumerate(data): Y[i] = int(data[i][1]) X.append(cv2.imread(data[i][0], 0).reshape(-1)) # cv2.imshow('image', X[i][:].reshape(self.height, self.width)) # cv2.waitKey(0) # cv2.destroyAllWindows() return np.asarray(X), Y, height, width def load_data_from_file(file): f = open(file, 'r') data = [line.split() for line in f] Y = np.zeros((len(data), ), dtype=int) X = [] print('Shape Y test : ', Y.shape) for i, ldata in enumerate(data): Y[i] = int(data[i][1]) X.append(cv2.imread(data[i][0], 0).reshape(-1)) return np.asarray(X), Y def get_model(X_train): # Compute a PCA n_components = 9 pca = PCA(n_components=n_components, whiten=True).fit(X_train) # apply PCA transformation X_train_pca = pca.transform(X_train) print('X train shape', X_train_pca.shape) return pca, X_train_pca def predict(X_train_pca, y, X_test_pca): distances = [] # Calculate euclidian distance from test image to each of the known images and save distances for i, test_pca in enumerate(X_train_pca): #print(X_test_pca.shape) #print(test_pca.shape) dist = math.sqrt(sum([diff**2 for diff in (X_test_pca - test_pca)])) distances.append(dist) found_ID = y[np.argmin(np.asarray(distances))]#min(distances)[1] return found_ID def evaluate(pca, X_train_pca, y, X_test, y_test): X_test_pca = pca.transform(X_test) tot = len(y_test) res = 0.0 for i, img_test in enumerate(X_test_pca): pred = int(predict(X_train_pca, y, img_test)) if pred == y_test[i]: res = res + 1 return res/tot def __main__(): X_train, Y_train, height, width = load_train_from_file('train10.txt') x_test, y_test = load_data_from_file('test10.txt') pca, x_train_pca = get_model(X_train) acc = evaluate(pca, x_train_pca, Y_train, x_test, y_test) print('accuracy : ', acc) __main__()