Spaces:
No application file
No application file
| # define batch_first = true dim(input) = (batch_size, sequence_len, input_size) | |
| import numpy as np | |
| class Layer: | |
| def __init__(self, input_size, output_size, hidden_size, learning_rate): | |
| self.input = None | |
| self.output = None | |
| self.input_size = input_size | |
| self.output_size = output_size | |
| self.learning_rate = learning_rate | |
| self.w = np.random.randn(hidden_size, hidden_size) | |
| self.u = np.random.randn(hidden_size, input_size) | |
| self.b = np.zeros((hidden_size, 1)) | |
| self.V = np.random.randn(output_size, hidden_size) | |
| self.c = np.zeros((output_size ,1)) | |
| self.hidden_size = hidden_size | |
| def softmax(x): | |
| x_exp = np.exp(x) | |
| return x_exp/(np.sum(x_exp)) | |
| def relu(x): | |
| return np.maximum(0 ,x) | |
| def softmax_derivative(x): | |
| return x*(1-x) | |
| def relu_derivative(x): | |
| return np.where(x>0, 1, 0) | |
| def feedforward(self, x_train): | |
| batch_size, seq_len, input_size = x_train.shape | |
| s = np.zeros((self.hidden_size, batch_size)) | |
| y = [] | |
| s_list = [] | |
| for t in range(seq_len): | |
| x_t = x_train[:, t, :] | |
| s = np.tanh(np.dot(self.w, s) + np.dot(self.u, x_t.T) + self.b) | |
| z = np.dot(self.V, s) + self.c | |
| y.append(self.softmax(z).T) # calculate y and reverse to (batch_size, output_size) | |
| s_list.append(s) | |
| # dim(y) = (seq_len, batch_size, output_size) reverse to dim(y) = (batch_size, seq_len, output_size) | |
| return np.stack(y, axis=1), s_list | |
| def backward(self, x_train, y_train, yhat, s_list): | |
| # Gradient descent backward pass | |
| batch_size, seq_len, output_size = y_train.shape | |
| # init gradient | |
| dLdW, dLdU, dLdb = np.zeros_like(self.w), np.zeros_like(self.u), np.zeros_like(self.b) | |
| dLdV, dLdc = np.zeros_like(self.V), np.zeros_like(self.c) | |
| dLds_next = np.zeros_like(s_list[0]) | |
| for t in reversed(range(seq_len)): | |
| x_t = x_train[:, t, :].T # (batch, input) | |
| s_t = s_list[t] # (hidden, batch) | |
| s_prev = s_list[t - 1] if t > 0 else np.zeros_like(s_t) # (hidden, batch) | |
| dz = (yhat[:, t, :].T - y_train[:, t, :].T) / batch_size # (output, batch) | |
| dLda = np.dot(self.V.T, dz) + dLds_next # (hidden, output)(output, batch) + (hidden, batch) | |
| ds = (1 - s_t ** 2) * dLda # (hidden, batch) | |
| dLdV += np.dot(dz, s_t.T) #dim(V) = (output, hidden) so (output, batch)(batch, hidden) = (output, hidden) | |
| dLdc += np.sum(dz, axis=1, keepdims=True) #(output, 1) | |
| dLdU += np.dot(ds, x_t.T) #dim(W) = (hidden, hidden) so (hidden, batch)(batch, input) = (hidden, input) | |
| dLdW += np.dot(ds, s_prev.T) # (hidden, batch)(batch, hidden) = (hidden, hidden) | |
| dLdb += np.sum(ds, axis=1, keepdims=True) # (hidden, 1) | |
| dLds_next = np.dot(self.u.T, ds) | |
| def compute_loss(yhat, y_true): | |
| loss = np.mean((yhat- y_true) ** 2) | |
| return loss | |
| def train(self, x_train, y_train, epochs=100): | |
| for epoch in range(epochs): | |
| yhat, s_list = self.feedforward(x_train) | |
| loss = self.compute_loss(yhat, y_train) | |
| self.backward(x_train, y_train, yhat, s_list) | |
| if epoch % 10 == 0: | |
| print(f'Epoch {epoch}, Loss: {loss}') |