| import numpy as np |
|
|
| """ |
| ################################################################################################ |
| ################### METHODs: SIGMOID and DERIVATIVE OF SIGMOID ################################ |
| ################################################################################################ |
| """ |
|
|
| def sigmoid(vec): |
| evec = 1 + np.exp(-vec) |
| return 1/evec |
| |
| def d_sigmoid(output_of_gate): |
| return output_of_gate*(1-output_of_gate) |
| |
| """ |
| ################################################################################################ |
| ################### METHODs: ReLU AND DERIVATE OF ReLU ######################################## |
| ################################################################################################ |
| """ |
|
|
| def relu(vec_x): |
| relu_x = vec_x.copy() |
| relu_x[vec_x < 0] = 0 |
| return relu_x |
|
|
| def lrelu(vec_x): |
| relu_x = vec_x.copy() |
| relu_x[vec_x < 0] = relu_x[vec_x < 0]/100 |
| return relu_x |
|
|
| def d_relu(vec_x): |
| d_relu_x = vec_x.copy() |
| d_relu_x[vec_x > 0] = 1 |
| d_relu_x[vec_x <= 0] = 0 |
| return d_relu_x |
|
|
| def d_lrelu(vec_x): |
| d_relu_x = vec_x.copy() |
| d_relu_x[vec_x > 0] = 1 |
| d_relu_x[vec_x <= 0] = 0.01 |
| return d_relu_x |
| |
| """ |
| ################################################################################################ |
| ################## IMPLEMENTATION OF NEURAL NETWORK ########################################## |
| ################################################################################################ |
| """ |
|
|
| class NN: |
| def __init__(self, input_dimension, hidden_layer_size, outer_relu = True, keep_prob = 1.0): |
| |
| |
| |
| |
| self.n = hidden_layer_size |
| self.d = input_dimension |
|
|
| rand_init_range = 1e-2 |
| self.W = np.random.uniform(-rand_init_range, rand_init_range, (self.n, self.d)) |
| self.B1 = np.random.uniform(-rand_init_range, rand_init_range, (self.n, 1)) |
|
|
| rand_init_range = 1e-1 |
| self.U = np.random.uniform(-rand_init_range, rand_init_range, (self.n, 1)) |
| self.B2 = np.random.uniform(-rand_init_range, rand_init_range, (1, 1)) |
|
|
| |
| |
| |
| |
| |
| self.outer_relu = outer_relu |
|
|
|
|
| |
| self.etaW = None |
| self.etaB1 = None |
| |
| self.etaU = None |
| self.etaB2 = None |
| |
| self.version = 'h1' |
| |
| |
| self.keep_prob = keep_prob |
| self.dropout_prob = 1 - keep_prob |
| self.r1 = np.ones((input_dimension, 1)) |
| self.r2 = np.ones(self.B1.shape) |
| |
| self.training_time = True |
|
|
| def new_dropout(self): |
| self.r1 = np.random.binomial(1, self.keep_prob, size=self.r1.shape) |
| self.r2 = np.random.binomial(1, self.keep_prob, size=self.r2.shape) |
| def ForTraining(self): |
| self.training_time = True |
| def ForTesting(self): |
| self.training_time = False |
| def Forward_Prop(self, x): |
| if self.training_time: |
| z2 = np.matmul(self.W, x*self.r1) + self.B1 |
| a2 = lrelu(z2)*self.r2 |
| o = np.matmul(self.U.transpose(), a2) + self.B2 |
| else: |
| z2 = np.matmul(self.keep_prob*self.W, x) + self.B1 |
| a2 = lrelu(z2) |
| o = np.matmul(self.keep_prob*self.U.transpose(), a2) + self.B2 |
| |
| if self.outer_relu: |
| |
| s = o |
| else: |
| raise Exception('Support for Non-Outer_Relu removed') |
| s = sigmoid(o) |
| |
| return (z2, a2, s) |
| |
| ''' |
| def Forward_Prop(self, x): |
| z2 = np.matmul(self.keep_prob*self.W, x) + self.B1 |
| a2 = lrelu(z2) |
| o = np.matmul(self.keep_prob*self.U.transpose(), a2) + self.B2 |
| if self.outer_relu: |
| # s = relu(o) |
| s = o |
| else: |
| raise Exception('Support for Non-Outer_Relu removed') |
| s = sigmoid(o) |
| return (z2, a2, s) |
| ''' |
| def Get_Energy(self, x): |
| |
| x=x[0:1500] |
| |
| |
| z2 = np.matmul(self.W, x) + self.B1 |
| |
| a2 = lrelu(z2) |
| o = np.matmul(self.U.transpose(), a2) + self.B2 |
| if self.outer_relu: |
| |
| s = o |
| else: |
| raise Exception('Support for Non-Outer_Relu removed') |
| s = sigmoid(o) |
| return s |
| |
| |
| def Back_Prop(self, dLdOut, nodeLen, featVMat, _debug = True): |
| N = nodeLen |
| dLdU = np.zeros(self.U.shape) |
| dLdB2 = np.zeros(self.B2.shape) |
|
|
| dLdW = np.zeros(self.W.shape) |
| dLdB1 = np.zeros(self.B1.shape) |
|
|
| if not self.outer_relu: |
| raise Exception('Support for Non-Outer_Relu removed') |
| return |
| else: |
| etaW = self.etaW |
| etaB1 = self.etaB1 |
| |
| etaU = self.etaU |
| etaB2 = self.etaB2 |
|
|
| if (etaW is None) or (etaB1 is None) or (etaU is None) or (etaB2 is None): |
| raise Exception('Learning Rates Not Set...') |
| |
| batch_size = 0 |
| for i in range(N): |
| for j in range(N): |
| if dLdOut[i, j] != 0 and (featVMat[i][j] is not None): |
| batch_size += 1 |
| x = featVMat[i][j][0:1500] |
| (z2, a2, s) = self.Forward_Prop(x) |
| |
| |
| |
| |
| dLdU += dLdOut[i, j]*a2 |
| |
| dLdB2 += dLdOut[i, j] |
|
|
| dRelu = d_lrelu(z2) |
| dLdW += (dLdOut[i, j])*np.matmul((self.U*dRelu), (x*self.r1).transpose()) |
|
|
| dLdB1 += dLdOut[i, j]*np.matmul(self.U.transpose(), dRelu) |
|
|
| if batch_size > 0: |
| delW = etaW*dLdW/(batch_size) |
| delU = etaU*dLdU/(batch_size) |
| delB1 = etaB1*dLdB1/batch_size |
| delB2 = etaB2*dLdB2/batch_size |
| if _debug: |
| print('Max(delW): %10.6f\tMax(delU): %10.6f'%(np.max(np.abs(delW)), np.max(np.abs(delU)))) |
| self.W -= delW |
| self.B1 -= delB1 |
|
|
| self.U -= delU |
| self.B2 -= delB2 |
|
|
| |
| class NN_2: |
| def __init__(self, input_dimension, hidden_layer_1_size, hidden_layer_2_size = None, outer_relu = True): |
| |
| |
| |
| if hidden_layer_2_size is None: |
| hidden_layer_2_size = hidden_layer_1_size |
| |
| |
| self.h1 = hidden_layer_1_size |
| self.h2 = hidden_layer_2_size |
| self.d = input_dimension |
|
|
| rand_init_range = 1e-2 |
| self.W1 = np.random.uniform(-rand_init_range, rand_init_range, (self.h1, self.d)) |
| self.B1 = np.random.uniform(-rand_init_range, rand_init_range, (self.h1, 1)) |
| self.W2 = np.random.uniform(-rand_init_range, rand_init_range, (self.h2, self.h1)) |
| self.B2 = np.random.uniform(-rand_init_range, rand_init_range, (self.h2, 1)) |
|
|
| rand_init_range = 1e-1 |
| self.U = np.random.uniform(-rand_init_range, rand_init_range, (self.h2, 1)) |
| self.B3 = np.random.uniform(-rand_init_range, rand_init_range, (1, 1)) |
|
|
| |
| |
| |
| |
| |
| self.outer_relu = outer_relu |
|
|
|
|
| |
| self.etaW1 = None |
| self.etaB1 = None |
| self.etaW2 = None |
| self.etaB2 = None |
| |
| self.etaU = None |
| self.etaB3 = None |
| |
| self.version = 'h2' |
|
|
| def Forward_Prop(self, x): |
| z2 = np.matmul(self.W1, x) + self.B1 |
| a2 = lrelu(z2) |
| |
| z3 = np.matmul(self.W2, a2) + self.B2 |
| a3 = lrelu(z3) |
| |
| o = np.matmul(self.U.transpose(), a3) + self.B3 |
| if self.outer_relu: |
| |
| s = o |
| else: |
| raise Exception('Support for Non-Outer_Relu removed') |
| s = sigmoid(o) |
| return (z3, a3, z2, a2, s) |
| def Get_Energy(self, x): |
| z2 = np.matmul(self.W1, x) + self.B1 |
| a2 = lrelu(z2) |
| |
| z3 = np.matmul(self.W2, a2) + self.B2 |
| a3 = lrelu(z3) |
| |
| o = np.matmul(self.U.transpose(), a3) + self.B3 |
| if self.outer_relu: |
| |
| s = o |
| else: |
| raise Exception('Support for Non-Outer_Relu removed') |
| s = sigmoid(o) |
| return s |
| |
| |
| def Back_Prop(self, dLdOut, nodeLen, featVMat, _debug = True): |
| N = nodeLen |
| |
| dLdU = np.zeros(self.U.shape) |
| dLdB3 = np.zeros(self.B3.shape) |
|
|
| dLdW2 = np.zeros(self.W2.shape) |
| dLdB2 = np.zeros(self.B2.shape) |
|
|
| dLdW1 = np.zeros(self.W1.shape) |
| dLdB1 = np.zeros(self.B1.shape) |
|
|
| |
| if not self.outer_relu: |
| raise Exception('Support for Non-Outer_Relu removed') |
| return |
| else: |
| etaW1 = self.etaW1 |
| etaB1 = self.etaB1 |
| |
| etaW2 = self.etaW2 |
| etaB2 = self.etaB2 |
| |
| etaU = self.etaU |
| etaB3 = self.etaB3 |
|
|
| if (etaW1 is None) or (etaB1 is None) or (etaW2 is None) or (etaB2 is None) or (etaU is None) or (etaB3 is None): |
| raise Exception('Learning Rates Not Set...') |
| |
| batch_size = 0 |
| for i in range(N): |
| for j in range(N): |
| if dLdOut[i, j] != 0 and (featVMat[i][j] is not None): |
| batch_size += 1 |
| (z3, a3, z2, a2, s) = self.Forward_Prop(featVMat[i][j]) |
| |
| |
| |
| |
| dLdU += dLdOut[i, j]*a3 |
| |
| dLdB3 += dLdOut[i, j] |
|
|
| dRelu_z3 = d_lrelu(z3) |
| |
| dLdW2 += (dLdOut[i, j])*np.matmul((self.U*dRelu_z3), a2.transpose()) |
|
|
| dLdB2 += dLdOut[i, j]*self.U*dRelu_z3 |
| |
| dRelu_z2 = d_lrelu(z2) |
| |
| dLdW1 += (dLdOut[i, j])*np.matmul(np.matmul(self.W2.transpose(), self.U*dRelu_z3)*dRelu_z2, featVMat[i][j].transpose()) |
|
|
| dLdB1 += (dLdOut[i, j])*np.matmul(self.W2.transpose(), self.U*dRelu_z3)*dRelu_z2 |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| if batch_size > 0: |
| delW1 = etaW1*dLdW1/(batch_size) |
| delW2 = etaW1*dLdW2/(batch_size) |
| delU = etaU*dLdU/(batch_size) |
| delB1 = etaB1*dLdB1/batch_size |
| delB2 = etaB2*dLdB2/batch_size |
| delB3 = etaB2*dLdB3/batch_size |
| if _debug: |
| print('Max(delW2): %10.6f\tMax(delW1): %10.6f\tMax(delU): %10.6f'%(np.max(np.abs(delW2)), np.max(np.abs(delW1)), np.max(np.abs(delU)))) |
| |
| |
| self.W1 -= delW1 |
| self.B1 -= delB1 |
| |
| |
| self.B2 -= delB2 |
| self.W2 -= delW2 |
| |
| |
| self.U -= delU |
| self.B3 -= delB3 |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|