ShamanChingChong commited on
Commit
a43ef41
·
1 Parent(s): 027680f
Files changed (1) hide show
  1. RNN.py +86 -0
RNN.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # define batch_first = true dim(input) = (batch_size, sequence_len, input_size)
2
+
3
+ import numpy as np
4
+
5
+
6
+ class Layer:
7
+ def __init__(self, input_size, output_size, hidden_size, learning_rate):
8
+ self.input = None
9
+ self.output = None
10
+ self.input_size = input_size
11
+ self.output_size = output_size
12
+ self.learning_rate = learning_rate
13
+ self.w = np.random.randn(hidden_size, hidden_size)
14
+ self.u = np.random.randn(hidden_size, input_size)
15
+ self.b = np.zeros((hidden_size, 1))
16
+ self.V = np.random.randn(output_size, hidden_size)
17
+ self.c = np.zeros((output_size ,1))
18
+ self.hidden_size = hidden_size
19
+
20
+ @staticmethod
21
+ def softmax(x):
22
+ x_exp = np.exp(x)
23
+ return x_exp/(np.sum(x_exp))
24
+ @staticmethod
25
+ def relu(x):
26
+ return np.maximum(0 ,x)
27
+ @staticmethod
28
+ def softmax_derivative(x):
29
+ return x*(1-x)
30
+ @staticmethod
31
+ def relu_derivative(x):
32
+ return np.where(x>0, 1, 0)
33
+
34
+ def feedforward(self, x_train):
35
+
36
+ batch_size, seq_len, input_size = x_train.shape
37
+ s = np.zeros((self.hidden_size, batch_size))
38
+ y = []
39
+ s_list = []
40
+
41
+ for t in range(seq_len):
42
+ x_t = x_train[:, t, :]
43
+ s = np.tanh(np.dot(self.w, s) + np.dot(self.u, x_t.T) + self.b)
44
+ z = np.dot(self.V, s) + self.c
45
+ y.append(self.softmax(z).T) # calculate y and reverse to (batch_size, output_size)
46
+ s_list.append(s)
47
+
48
+ # dim(y) = (seq_len, batch_size, output_size) reverse to dim(y) = (batch_size, seq_len, output_size)
49
+ return np.stack(y, axis=1), s_list
50
+
51
+ def backward(self, x_train, y_train, yhat, s_list):
52
+ # Gradient descent backward pass
53
+ batch_size, seq_len, output_size = y_train.shape
54
+
55
+ # init gradient
56
+ dLdW, dLdU, dLdb = np.zeros_like(self.w), np.zeros_like(self.u), np.zeros_like(self.b)
57
+ dLdV, dLdc = np.zeros_like(self.V), np.zeros_like(self.c)
58
+ dLds_next = np.zeros_like(s_list[0])
59
+
60
+ for t in reversed(range(seq_len)):
61
+ x_t = x_train[:, t, :].T # (batch, input)
62
+ s_t = s_list[t] # (hidden, batch)
63
+ s_prev = s_list[t - 1] if t > 0 else np.zeros_like(s_t) # (hidden, batch)
64
+
65
+ dz = (yhat[:, t, :].T - y_train[:, t, :].T) / batch_size # (output, batch)
66
+ dLda = np.dot(self.V.T, dz) + dLds_next # (hidden, output)(output, batch) + (hidden, batch)
67
+ ds = (1 - s_t ** 2) * dLda # (hidden, batch)
68
+
69
+ dLdV += np.dot(dz, s_t.T) #dim(V) = (output, hidden) so (output, batch)(batch, hidden) = (output, hidden)
70
+ dLdc += np.sum(dz, axis=1, keepdims=True) #(output, 1)
71
+ dLdU += np.dot(ds, x_t.T) #dim(W) = (hidden, hidden) so (hidden, batch)(batch, input) = (hidden, input)
72
+ dLdW += np.dot(ds, s_prev.T) # (hidden, batch)(batch, hidden) = (hidden, hidden)
73
+ dLdb += np.sum(ds, axis=1, keepdims=True) # (hidden, 1)
74
+ dLds_next = np.dot(self.u.T, ds)
75
+ @staticmethod
76
+ def compute_loss(yhat, y_true):
77
+ loss = np.mean((yhat- y_true) ** 2)
78
+ return loss
79
+
80
+ def train(self, x_train, y_train, epochs=100):
81
+ for epoch in range(epochs):
82
+ yhat, s_list = self.feedforward(x_train)
83
+ loss = self.compute_loss(yhat, y_train)
84
+ self.backward(x_train, y_train, yhat, s_list)
85
+ if epoch % 10 == 0:
86
+ print(f'Epoch {epoch}, Loss: {loss}')