nn-from-scratch / nn /model.py
LaelaZ's picture
Upload folder using huggingface_hub
5041f39 verified
"""A multilayer perceptron built from the hand-written layers, plus an Adam optimizer."""
from __future__ import annotations
import numpy as np
from .layers import Linear, ReLU, softmax_cross_entropy
class MLP:
"""Linear -> ReLU -> Linear -> ReLU -> Linear classifier."""
def __init__(self, sizes=(784, 256, 128, 10), seed: int = 0):
rng = np.random.default_rng(seed)
self.layers = [
Linear(sizes[0], sizes[1], rng), ReLU(),
Linear(sizes[1], sizes[2], rng), ReLU(),
Linear(sizes[2], sizes[3], rng),
]
def forward(self, x: np.ndarray) -> np.ndarray:
for layer in self.layers:
x = layer.forward(x)
return x
def backward(self, dlogits: np.ndarray) -> None:
for layer in reversed(self.layers):
dlogits = layer.backward(dlogits)
def loss_and_grad(self, x: np.ndarray, y: np.ndarray):
logits = self.forward(x)
loss, dlogits = softmax_cross_entropy(logits, y)
self.backward(dlogits)
return loss
def predict(self, x: np.ndarray) -> np.ndarray:
return self.forward(x).argmax(axis=1)
def probabilities(self, x: np.ndarray) -> np.ndarray:
logits = self.forward(x)
shifted = logits - logits.max(axis=1, keepdims=True)
exp = np.exp(shifted)
return exp / exp.sum(axis=1, keepdims=True)
def params_and_grads(self):
for layer in self.layers:
yield from layer.params_and_grads()
# --- save / load -------------------------------------------------------
def state(self) -> dict:
out = {}
for i, layer in enumerate(self.layers):
if isinstance(layer, Linear):
out[f"W{i}"] = layer.W
out[f"b{i}"] = layer.b
return out
def load_state(self, state: dict) -> None:
for i, layer in enumerate(self.layers):
if isinstance(layer, Linear):
layer.W = state[f"W{i}"]
layer.b = state[f"b{i}"]
class Adam:
"""Adam optimizer over a model's (param, grad) pairs."""
def __init__(self, model: MLP, lr: float = 1e-3, betas=(0.9, 0.999), eps: float = 1e-8):
self.model = model
self.lr, self.b1, self.b2, self.eps = lr, betas[0], betas[1], eps
self.t = 0
self._m = [np.zeros_like(p) for p, _ in model.params_and_grads()]
self._v = [np.zeros_like(p) for p, _ in model.params_and_grads()]
def step(self) -> None:
self.t += 1
for i, (p, g) in enumerate(self.model.params_and_grads()):
self._m[i] = self.b1 * self._m[i] + (1 - self.b1) * g
self._v[i] = self.b2 * self._v[i] + (1 - self.b2) * (g * g)
m_hat = self._m[i] / (1 - self.b1 ** self.t)
v_hat = self._v[i] / (1 - self.b2 ** self.t)
p -= self.lr * m_hat / (np.sqrt(v_hat) + self.eps)