In [1]:
#imports

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn 
from scipy.io import loadmat
import csv
import random
from sklearn.utils import shuffle
from sklearn import preprocessing
In [2]:
#load data and shuffle data

letters_dict = loadmat('letters_data', mdict=None, appendmat=True, )
letters_train = letters_dict['train_x']
letters_label=letters_dict['train_y']
letters_train,letters_label=shuffle(letters_train,letters_label)

test_x=letters_dict['test_x']
In [3]:
letters_train=letters_train.astype(float)
num_rows=len(letters_train[:,0])
for x in range(0,num_rows):
    l2_norm = (np.dot(letters_train[x,:],letters_train[x,:].T))**.5
    letters_train[x,:]=letters_train[x,:]/l2_norm
In [4]:
# Some useful functions
def visualize_sample(sample):
    pic=np.zeros((28,28))
    for x in range(0,28):
        for y in range(0,28):
            pic[x,y] = sample[x*28+y]
            
    plt.imshow(pic, cmap='hot', interpolation='nearest')
    plt.show()

def sigmoid(x):
    return 1/(1+np.exp(-x))

def sec_squared(x):
    return 2/(np.cosh(2*x)+1)

def loss_function(prediction,correct):
    total_loss=0
    y=np.zeros(26)
    
    for x in range(0,26):
        y[x]= 0
        if ((x+1)==correct):
            y[x] = 1
    
    return (-1*np.dot(y,np.log(prediction).T) - np.dot((1-y),np.log(1-prediction).T))
In [5]:
class neural_net(object):
    def __init__(self,input_size,hidden_size,output_size):
        self.input_size=input_size
        self.hidden_size=hidden_size
        self.output_size=output_size
        self.w1 = np.random.normal(0,0.01,(self.input_size+1,self.hidden_size))
        self.w2 = np.random.normal(0,0.01,(self.hidden_size+1,self.output_size))
        
    def label_samples(self,input_data):
        
        ones = np.ones((len(input_data[:,0]), 1))
        input_data = np.c_[input_data, ones]
    
        layer1=np.dot(input_data,self.w1)
        layer1=np.tanh(layer1)
        
        ones = np.ones((len(layer1[:,0]), 1))
        layer1=np.c_[layer1, ones]
        
        layer2=np.dot(layer1,self.w2)
        layer2=sigmoid(layer2)

        return np.argmax(layer2,1)+1
    
    def compute_sample(self,sample):
        
        sample_with_one=np.zeros(self.input_size+1)
        sample_with_one[0:self.input_size]=sample
        sample_with_one[-1]=1
        
        sample=sample_with_one
        
        h_values=np.dot(sample,self.w1)
        h_values=np.tanh(h_values)
        
        h_values_with_one=np.zeros(self.hidden_size+1)
        h_values_with_one[0:self.hidden_size]=h_values
        h_values_with_one[-1]=1
        
        h_values=h_values_with_one
        
        output_values=np.dot(h_values,self.w2)
        z=sigmoid(output_values)
        
        return z
    
    def compute_gradient(self,sample,correct):
        
        #update w2 
             
        w2_prime=np.zeros((self.hidden_size+1,self.output_size))
        
        sample_with_one=np.zeros(self.input_size+1)
        sample_with_one[0:self.input_size]=sample
        sample_with_one[-1]=1
        
        sample=sample_with_one
        
        h_values=np.dot(sample,self.w1)
        h_values=np.tanh(h_values)
        
        h_values_with_one=np.zeros(self.hidden_size+1)
        h_values_with_one[0:self.hidden_size]=h_values
        h_values_with_one[-1]=1
        
        h_values=h_values_with_one
        
        output_values=np.dot(h_values,self.w2)
        z=sigmoid(output_values)
        
        y=np.zeros(self.output_size)
        y[correct-1]=1
        
        w2_derivative=np.outer(h_values,(y-z))*-1

        current_loss=loss_function(z,correct)
        
        #update w1
        
        w1_derivative=np.dot(self.w2,(y-z.T))
        w1_derivative=w1_derivative[0:-1]
        sec_square=(sec_squared(np.dot(sample,self.w1)))
              
        w1_derivative= np.multiply(w1_derivative,sec_square)
        w1_derivative=np.outer(sample,w1_derivative)*-1
        
        w1_derivative=w1_derivative[0:-1,:]
        w2_derivative=w2_derivative[0:-1,:]

        return (w1_derivative,w2_derivative,current_loss)
    
    def gradient_descent_train(self,training_set,training_labels,validation_set,validation_labels,numb_passes,alpha=0.05):  
        for y in range(0,numb_passes):
            for x in range(0,len(training_set[:,0])):
                if (x%1000 == 0):
                    alpha=alpha/1.005
                gradient=self.compute_gradient(training_set[x],training_labels[x])
    
                w1_prime=gradient[0]
                w2_prime=gradient[1]
        
                self.w1[0:-1,:]= self.w1[0:-1,:]-(w1_prime*alpha)
                self.w2[0:-1,:]= self.w2[0:-1,:]-(w2_prime*alpha)

    
    def test_accuracy(self,validation_set,labels):
        labels=labels[:,0]
        sum_elements=len(labels)
        predictions=self.label_samples(validation_set)
        cancelled_array=predictions-labels
    
        return (sum_elements-np.count_nonzero(cancelled_array))/float(sum_elements)
   
        
    
In [8]:
first_net= neural_net(784,100,26)
first_net.gradient_descent_train(letters_train[0:10000,:],letters_label[0:10000],letters_train[80000:-1,:],letters_label[80000:-1],1,0.1)
In [9]:
first_net.test_accuracy(letters_train[80000:-1,:],letters_label[80000:-1])
Out[9]:
0.5942320141074577
In [38]:
plt.plot(z[1])
plt.show()
In [12]:
print(first_net.test_accuracy(letters_train[80000:-1,:],letters_label[80000:-1]))
0.675595437398
In [ ]: