shivrajanand's picture
Add files using upload-large-folder tool
382124c verified
import numpy as np
"""
################################################################################################
################### METHODs: SIGMOID and DERIVATIVE OF SIGMOID ################################
################################################################################################
"""
def sigmoid(vec):
evec = 1 + np.exp(-vec)
return 1/evec
def d_sigmoid(output_of_gate):
return output_of_gate*(1-output_of_gate)
"""
################################################################################################
################### METHODs: ReLU AND DERIVATE OF ReLU ########################################
################################################################################################
"""
def relu(vec_x):
relu_x = vec_x.copy()
relu_x[vec_x < 0] = 0
return relu_x
def lrelu(vec_x):
relu_x = vec_x.copy()
relu_x[vec_x < 0] = relu_x[vec_x < 0]/100
return relu_x
def d_relu(vec_x):
d_relu_x = vec_x.copy()
d_relu_x[vec_x > 0] = 1
d_relu_x[vec_x <= 0] = 0
return d_relu_x
def d_lrelu(vec_x):
d_relu_x = vec_x.copy()
d_relu_x[vec_x > 0] = 1
d_relu_x[vec_x <= 0] = 0.01
return d_relu_x
"""
################################################################################################
################## IMPLEMENTATION OF NEURAL NETWORK ##########################################
################################################################################################
"""
class NN:
def __init__(self, input_dimension, hidden_layer_size, outer_relu = True, keep_prob = 1.0):
# d: Input feature dimension i.e. the dimension of the edge feature vectors
# n: Hidden layer size
# TODO: Add Bias terms
self.n = hidden_layer_size
self.d = input_dimension
rand_init_range = 1e-2
self.W = np.random.uniform(-rand_init_range, rand_init_range, (self.n, self.d))
self.B1 = np.random.uniform(-rand_init_range, rand_init_range, (self.n, 1))
rand_init_range = 1e-1
self.U = np.random.uniform(-rand_init_range, rand_init_range, (self.n, 1))
self.B2 = np.random.uniform(-rand_init_range, rand_init_range, (1, 1))
# Apply relu or sigmoid at the output layer
# If relu is applied it will be assumed that log is applied to the
# feature before passing it to the network
# Else in case of outer sigmoid
# log is applied after the neural network
self.outer_relu = outer_relu
# Learning Rates
self.etaW = None
self.etaB1 = None
self.etaU = None
self.etaB2 = None
self.version = 'h1'
# Dropout
self.keep_prob = keep_prob
self.dropout_prob = 1 - keep_prob
self.r1 = np.ones((input_dimension, 1)) # One hot for input layer
self.r2 = np.ones(self.B1.shape) # one hot for hidden layer
self.training_time = True
def new_dropout(self):
self.r1 = np.random.binomial(1, self.keep_prob, size=self.r1.shape)
self.r2 = np.random.binomial(1, self.keep_prob, size=self.r2.shape)
def ForTraining(self):
self.training_time = True
def ForTesting(self):
self.training_time = False
def Forward_Prop(self, x):
if self.training_time:
z2 = np.matmul(self.W, x*self.r1) + self.B1
a2 = lrelu(z2)*self.r2
o = np.matmul(self.U.transpose(), a2) + self.B2
else:
z2 = np.matmul(self.keep_prob*self.W, x) + self.B1
a2 = lrelu(z2)
o = np.matmul(self.keep_prob*self.U.transpose(), a2) + self.B2
if self.outer_relu:
# s = relu(o)
s = o
else:
raise Exception('Support for Non-Outer_Relu removed')
s = sigmoid(o)
return (z2, a2, s)
'''
def Forward_Prop(self, x):
z2 = np.matmul(self.keep_prob*self.W, x) + self.B1
a2 = lrelu(z2)
o = np.matmul(self.keep_prob*self.U.transpose(), a2) + self.B2
if self.outer_relu:
# s = relu(o)
s = o
else:
raise Exception('Support for Non-Outer_Relu removed')
s = sigmoid(o)
return (z2, a2, s)
'''
def Get_Energy(self, x):
# print("problem arises now")
x=x[0:1500]
# numpy.shape(self.W)
# numpy.shape(x)
z2 = np.matmul(self.W, x) + self.B1
# print(len(x))
a2 = lrelu(z2)
o = np.matmul(self.U.transpose(), a2) + self.B2
if self.outer_relu:
# s = relu(o)
s = o
else:
raise Exception('Support for Non-Outer_Relu removed')
s = sigmoid(o)
return s
# Back_Propagate gradient of Loss, L: Assuming S is the direct output of the network
def Back_Prop(self, dLdOut, nodeLen, featVMat, _debug = True):
N = nodeLen
dLdU = np.zeros(self.U.shape)
dLdB2 = np.zeros(self.B2.shape)
dLdW = np.zeros(self.W.shape)
dLdB1 = np.zeros(self.B1.shape)
if not self.outer_relu:
raise Exception('Support for Non-Outer_Relu removed')
return
else:
etaW = self.etaW
etaB1 = self.etaB1
etaU = self.etaU
etaB2 = self.etaB2
if (etaW is None) or (etaB1 is None) or (etaU is None) or (etaB2 is None):
raise Exception('Learning Rates Not Set...')
batch_size = 0
for i in range(N):
for j in range(N):
if dLdOut[i, j] != 0 and (featVMat[i][j] is not None):
batch_size += 1
x = featVMat[i][j][0:1500]
(z2, a2, s) = self.Forward_Prop(x)
# print(a2.transpose())
# print('o')
# print(np.matmul(self.U.transpose(), a2))
dLdU += dLdOut[i, j]*a2
dLdB2 += dLdOut[i, j]
dRelu = d_lrelu(z2)
dLdW += (dLdOut[i, j])*np.matmul((self.U*dRelu), (x*self.r1).transpose())
dLdB1 += dLdOut[i, j]*np.matmul(self.U.transpose(), dRelu)
if batch_size > 0:
delW = etaW*dLdW/(batch_size)
delU = etaU*dLdU/(batch_size)
delB1 = etaB1*dLdB1/batch_size
delB2 = etaB2*dLdB2/batch_size
if _debug:
print('Max(delW): %10.6f\tMax(delU): %10.6f'%(np.max(np.abs(delW)), np.max(np.abs(delU))))
self.W -= delW
self.B1 -= delB1
self.U -= delU
self.B2 -= delB2
class NN_2:
def __init__(self, input_dimension, hidden_layer_1_size, hidden_layer_2_size = None, outer_relu = True):
# d: Input feature dimension i.e. the dimension of the edge feature vectors
# n: Hidden layer size
if hidden_layer_2_size is None:
hidden_layer_2_size = hidden_layer_1_size
# TODO: Add Bias terms
self.h1 = hidden_layer_1_size
self.h2 = hidden_layer_2_size
self.d = input_dimension
rand_init_range = 1e-2
self.W1 = np.random.uniform(-rand_init_range, rand_init_range, (self.h1, self.d))
self.B1 = np.random.uniform(-rand_init_range, rand_init_range, (self.h1, 1))
self.W2 = np.random.uniform(-rand_init_range, rand_init_range, (self.h2, self.h1))
self.B2 = np.random.uniform(-rand_init_range, rand_init_range, (self.h2, 1))
rand_init_range = 1e-1
self.U = np.random.uniform(-rand_init_range, rand_init_range, (self.h2, 1))
self.B3 = np.random.uniform(-rand_init_range, rand_init_range, (1, 1))
# Apply relu or sigmoid at the output layer
# If relu is applied it will be assumed that log is applied to the
# feature before passing it to the network
# Else in case of outer sigmoid
# log is applied after the neural network
self.outer_relu = outer_relu
# Learning Rates
self.etaW1 = None
self.etaB1 = None
self.etaW2 = None
self.etaB2 = None
self.etaU = None
self.etaB3 = None
self.version = 'h2'
def Forward_Prop(self, x):
z2 = np.matmul(self.W1, x) + self.B1
a2 = lrelu(z2)
z3 = np.matmul(self.W2, a2) + self.B2
a3 = lrelu(z3)
o = np.matmul(self.U.transpose(), a3) + self.B3
if self.outer_relu:
# s = relu(o)
s = o
else:
raise Exception('Support for Non-Outer_Relu removed')
s = sigmoid(o)
return (z3, a3, z2, a2, s)
def Get_Energy(self, x):
z2 = np.matmul(self.W1, x) + self.B1
a2 = lrelu(z2)
z3 = np.matmul(self.W2, a2) + self.B2
a3 = lrelu(z3)
o = np.matmul(self.U.transpose(), a3) + self.B3
if self.outer_relu:
# s = relu(o)
s = o
else:
raise Exception('Support for Non-Outer_Relu removed')
s = sigmoid(o)
return s
# Back_Propagate gradient of Loss, L: Assuming S is the direct output of the network
def Back_Prop(self, dLdOut, nodeLen, featVMat, _debug = True):
N = nodeLen
dLdU = np.zeros(self.U.shape)
dLdB3 = np.zeros(self.B3.shape)
dLdW2 = np.zeros(self.W2.shape)
dLdB2 = np.zeros(self.B2.shape)
dLdW1 = np.zeros(self.W1.shape)
dLdB1 = np.zeros(self.B1.shape)
if not self.outer_relu:
raise Exception('Support for Non-Outer_Relu removed')
return
else:
etaW1 = self.etaW1
etaB1 = self.etaB1
etaW2 = self.etaW2
etaB2 = self.etaB2
etaU = self.etaU
etaB3 = self.etaB3
if (etaW1 is None) or (etaB1 is None) or (etaW2 is None) or (etaB2 is None) or (etaU is None) or (etaB3 is None):
raise Exception('Learning Rates Not Set...')
batch_size = 0
for i in range(N):
for j in range(N):
if dLdOut[i, j] != 0 and (featVMat[i][j] is not None):
batch_size += 1
(z3, a3, z2, a2, s) = self.Forward_Prop(featVMat[i][j])
# print(a2.transpose())
# print('o')
# print(np.matmul(self.U.transpose(), a2))
dLdU += dLdOut[i, j]*a3
dLdB3 += dLdOut[i, j]
dRelu_z3 = d_lrelu(z3)
dLdW2 += (dLdOut[i, j])*np.matmul((self.U*dRelu_z3), a2.transpose())
dLdB2 += dLdOut[i, j]*self.U*dRelu_z3
dRelu_z2 = d_lrelu(z2)
dLdW1 += (dLdOut[i, j])*np.matmul(np.matmul(self.W2.transpose(), self.U*dRelu_z3)*dRelu_z2, featVMat[i][j].transpose())
dLdB1 += (dLdOut[i, j])*np.matmul(self.W2.transpose(), self.U*dRelu_z3)*dRelu_z2
# for k in range(self.n):
# if dRelu[k] != 0:
# dLdW[k, :, None] += (dLdOut[i, j])*self.U[k]*dRelu[k]*(featVMat[i][j])
# print('dlDW:')
# print(dLdW/(batch_size))
# print('dlDU:')
# print(dLdU/(batch_size))
# print('Batch size: ', batch_size)
if batch_size > 0:
delW1 = etaW1*dLdW1/(batch_size)
delW2 = etaW1*dLdW2/(batch_size)
delU = etaU*dLdU/(batch_size)
delB1 = etaB1*dLdB1/batch_size
delB2 = etaB2*dLdB2/batch_size
delB3 = etaB2*dLdB3/batch_size
if _debug:
print('Max(delW2): %10.6f\tMax(delW1): %10.6f\tMax(delU): %10.6f'%(np.max(np.abs(delW2)), np.max(np.abs(delW1)), np.max(np.abs(delU))))
# Layer 1
self.W1 -= delW1
self.B1 -= delB1
# Layer 2
self.B2 -= delB2
self.W2 -= delW2
# Layer 3
self.U -= delU
self.B3 -= delB3