Spaces:
Sleeping
Sleeping
| """A multilayer perceptron built from the hand-written layers, plus an Adam optimizer.""" | |
| from __future__ import annotations | |
| import numpy as np | |
| from .layers import Linear, ReLU, softmax_cross_entropy | |
| class MLP: | |
| """Linear -> ReLU -> Linear -> ReLU -> Linear classifier.""" | |
| def __init__(self, sizes=(784, 256, 128, 10), seed: int = 0): | |
| rng = np.random.default_rng(seed) | |
| self.layers = [ | |
| Linear(sizes[0], sizes[1], rng), ReLU(), | |
| Linear(sizes[1], sizes[2], rng), ReLU(), | |
| Linear(sizes[2], sizes[3], rng), | |
| ] | |
| def forward(self, x: np.ndarray) -> np.ndarray: | |
| for layer in self.layers: | |
| x = layer.forward(x) | |
| return x | |
| def backward(self, dlogits: np.ndarray) -> None: | |
| for layer in reversed(self.layers): | |
| dlogits = layer.backward(dlogits) | |
| def loss_and_grad(self, x: np.ndarray, y: np.ndarray): | |
| logits = self.forward(x) | |
| loss, dlogits = softmax_cross_entropy(logits, y) | |
| self.backward(dlogits) | |
| return loss | |
| def predict(self, x: np.ndarray) -> np.ndarray: | |
| return self.forward(x).argmax(axis=1) | |
| def probabilities(self, x: np.ndarray) -> np.ndarray: | |
| logits = self.forward(x) | |
| shifted = logits - logits.max(axis=1, keepdims=True) | |
| exp = np.exp(shifted) | |
| return exp / exp.sum(axis=1, keepdims=True) | |
| def params_and_grads(self): | |
| for layer in self.layers: | |
| yield from layer.params_and_grads() | |
| # --- save / load ------------------------------------------------------- | |
| def state(self) -> dict: | |
| out = {} | |
| for i, layer in enumerate(self.layers): | |
| if isinstance(layer, Linear): | |
| out[f"W{i}"] = layer.W | |
| out[f"b{i}"] = layer.b | |
| return out | |
| def load_state(self, state: dict) -> None: | |
| for i, layer in enumerate(self.layers): | |
| if isinstance(layer, Linear): | |
| layer.W = state[f"W{i}"] | |
| layer.b = state[f"b{i}"] | |
| class Adam: | |
| """Adam optimizer over a model's (param, grad) pairs.""" | |
| def __init__(self, model: MLP, lr: float = 1e-3, betas=(0.9, 0.999), eps: float = 1e-8): | |
| self.model = model | |
| self.lr, self.b1, self.b2, self.eps = lr, betas[0], betas[1], eps | |
| self.t = 0 | |
| self._m = [np.zeros_like(p) for p, _ in model.params_and_grads()] | |
| self._v = [np.zeros_like(p) for p, _ in model.params_and_grads()] | |
| def step(self) -> None: | |
| self.t += 1 | |
| for i, (p, g) in enumerate(self.model.params_and_grads()): | |
| self._m[i] = self.b1 * self._m[i] + (1 - self.b1) * g | |
| self._v[i] = self.b2 * self._v[i] + (1 - self.b2) * (g * g) | |
| m_hat = self._m[i] / (1 - self.b1 ** self.t) | |
| v_hat = self._v[i] / (1 - self.b2 ** self.t) | |
| p -= self.lr * m_hat / (np.sqrt(v_hat) + self.eps) | |