Spaces:
No application file
No application file
Commit ·
a43ef41
1
Parent(s): 027680f
Add RNN
Browse files
RNN.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# define batch_first = true dim(input) = (batch_size, sequence_len, input_size)
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class Layer:
|
| 7 |
+
def __init__(self, input_size, output_size, hidden_size, learning_rate):
|
| 8 |
+
self.input = None
|
| 9 |
+
self.output = None
|
| 10 |
+
self.input_size = input_size
|
| 11 |
+
self.output_size = output_size
|
| 12 |
+
self.learning_rate = learning_rate
|
| 13 |
+
self.w = np.random.randn(hidden_size, hidden_size)
|
| 14 |
+
self.u = np.random.randn(hidden_size, input_size)
|
| 15 |
+
self.b = np.zeros((hidden_size, 1))
|
| 16 |
+
self.V = np.random.randn(output_size, hidden_size)
|
| 17 |
+
self.c = np.zeros((output_size ,1))
|
| 18 |
+
self.hidden_size = hidden_size
|
| 19 |
+
|
| 20 |
+
@staticmethod
|
| 21 |
+
def softmax(x):
|
| 22 |
+
x_exp = np.exp(x)
|
| 23 |
+
return x_exp/(np.sum(x_exp))
|
| 24 |
+
@staticmethod
|
| 25 |
+
def relu(x):
|
| 26 |
+
return np.maximum(0 ,x)
|
| 27 |
+
@staticmethod
|
| 28 |
+
def softmax_derivative(x):
|
| 29 |
+
return x*(1-x)
|
| 30 |
+
@staticmethod
|
| 31 |
+
def relu_derivative(x):
|
| 32 |
+
return np.where(x>0, 1, 0)
|
| 33 |
+
|
| 34 |
+
def feedforward(self, x_train):
|
| 35 |
+
|
| 36 |
+
batch_size, seq_len, input_size = x_train.shape
|
| 37 |
+
s = np.zeros((self.hidden_size, batch_size))
|
| 38 |
+
y = []
|
| 39 |
+
s_list = []
|
| 40 |
+
|
| 41 |
+
for t in range(seq_len):
|
| 42 |
+
x_t = x_train[:, t, :]
|
| 43 |
+
s = np.tanh(np.dot(self.w, s) + np.dot(self.u, x_t.T) + self.b)
|
| 44 |
+
z = np.dot(self.V, s) + self.c
|
| 45 |
+
y.append(self.softmax(z).T) # calculate y and reverse to (batch_size, output_size)
|
| 46 |
+
s_list.append(s)
|
| 47 |
+
|
| 48 |
+
# dim(y) = (seq_len, batch_size, output_size) reverse to dim(y) = (batch_size, seq_len, output_size)
|
| 49 |
+
return np.stack(y, axis=1), s_list
|
| 50 |
+
|
| 51 |
+
def backward(self, x_train, y_train, yhat, s_list):
|
| 52 |
+
# Gradient descent backward pass
|
| 53 |
+
batch_size, seq_len, output_size = y_train.shape
|
| 54 |
+
|
| 55 |
+
# init gradient
|
| 56 |
+
dLdW, dLdU, dLdb = np.zeros_like(self.w), np.zeros_like(self.u), np.zeros_like(self.b)
|
| 57 |
+
dLdV, dLdc = np.zeros_like(self.V), np.zeros_like(self.c)
|
| 58 |
+
dLds_next = np.zeros_like(s_list[0])
|
| 59 |
+
|
| 60 |
+
for t in reversed(range(seq_len)):
|
| 61 |
+
x_t = x_train[:, t, :].T # (batch, input)
|
| 62 |
+
s_t = s_list[t] # (hidden, batch)
|
| 63 |
+
s_prev = s_list[t - 1] if t > 0 else np.zeros_like(s_t) # (hidden, batch)
|
| 64 |
+
|
| 65 |
+
dz = (yhat[:, t, :].T - y_train[:, t, :].T) / batch_size # (output, batch)
|
| 66 |
+
dLda = np.dot(self.V.T, dz) + dLds_next # (hidden, output)(output, batch) + (hidden, batch)
|
| 67 |
+
ds = (1 - s_t ** 2) * dLda # (hidden, batch)
|
| 68 |
+
|
| 69 |
+
dLdV += np.dot(dz, s_t.T) #dim(V) = (output, hidden) so (output, batch)(batch, hidden) = (output, hidden)
|
| 70 |
+
dLdc += np.sum(dz, axis=1, keepdims=True) #(output, 1)
|
| 71 |
+
dLdU += np.dot(ds, x_t.T) #dim(W) = (hidden, hidden) so (hidden, batch)(batch, input) = (hidden, input)
|
| 72 |
+
dLdW += np.dot(ds, s_prev.T) # (hidden, batch)(batch, hidden) = (hidden, hidden)
|
| 73 |
+
dLdb += np.sum(ds, axis=1, keepdims=True) # (hidden, 1)
|
| 74 |
+
dLds_next = np.dot(self.u.T, ds)
|
| 75 |
+
@staticmethod
|
| 76 |
+
def compute_loss(yhat, y_true):
|
| 77 |
+
loss = np.mean((yhat- y_true) ** 2)
|
| 78 |
+
return loss
|
| 79 |
+
|
| 80 |
+
def train(self, x_train, y_train, epochs=100):
|
| 81 |
+
for epoch in range(epochs):
|
| 82 |
+
yhat, s_list = self.feedforward(x_train)
|
| 83 |
+
loss = self.compute_loss(yhat, y_train)
|
| 84 |
+
self.backward(x_train, y_train, yhat, s_list)
|
| 85 |
+
if epoch % 10 == 0:
|
| 86 |
+
print(f'Epoch {epoch}, Loss: {loss}')
|