nn-from-scratch / tests /test_nn.py
LaelaZ's picture
Upload folder using huggingface_hub
5041f39 verified
"""Tests for the from-scratch network.
The important one is `test_gradient_check`: it verifies the hand-written backward
passes against numerical finite-difference gradients. If the chain rule were wired
up wrong, this test would catch it. This is the "build it AND prove it" guarantee.
"""
from __future__ import annotations
import numpy as np
from nn.layers import softmax_cross_entropy
from nn.model import MLP, Adam
def _loss_only(model: MLP, x, y) -> float:
"""Forward pass + loss, without touching gradients."""
logits = model.forward(x)
loss, _ = softmax_cross_entropy(logits, y)
return loss
def test_softmax_cross_entropy_uniform():
# Uniform logits over C classes -> loss should equal ln(C).
logits = np.zeros((4, 10))
y = np.array([0, 1, 2, 3])
loss, dlogits = softmax_cross_entropy(logits, y)
assert abs(loss - np.log(10)) < 1e-6
assert dlogits.shape == logits.shape
def test_forward_shape():
model = MLP(sizes=(784, 64, 32, 10), seed=1)
x = np.random.default_rng(0).standard_normal((8, 784)).astype(np.float32)
assert model.forward(x).shape == (8, 10)
assert model.predict(x).shape == (8,)
probs = model.probabilities(x)
assert np.allclose(probs.sum(axis=1), 1.0, atol=1e-6)
def test_gradient_check():
"""Analytic gradients must match finite differences to high precision."""
rng = np.random.default_rng(42)
model = MLP(sizes=(6, 5, 4, 3), seed=2)
x = rng.standard_normal((4, 6))
y = rng.integers(0, 3, size=4)
# Analytic gradients (snapshot them; FD will perturb params in place afterwards).
model.loss_and_grad(x, y)
analytic = [g.copy() for _, g in model.params_and_grads()]
eps = 1e-5
for idx, (p, _) in enumerate(model.params_and_grads()):
flat = p.ravel()
ga = analytic[idx].ravel()
# Check a handful of random coordinates per parameter tensor.
coords = rng.choice(flat.size, size=min(5, flat.size), replace=False)
for c in coords:
orig = flat[c]
flat[c] = orig + eps
lp = _loss_only(model, x, y)
flat[c] = orig - eps
lm = _loss_only(model, x, y)
flat[c] = orig
numeric = (lp - lm) / (2 * eps)
denom = max(1e-8, abs(numeric) + abs(ga[c]))
rel_err = abs(numeric - ga[c]) / denom
assert rel_err < 1e-4, f"grad mismatch at param {idx} coord {c}: {rel_err:.2e}"
def test_overfit_tiny_batch():
"""A tiny batch should be driven to near-zero loss — proves the loop learns."""
rng = np.random.default_rng(0)
model = MLP(sizes=(20, 32, 16, 4), seed=0)
opt = Adam(model, lr=1e-2)
x = rng.standard_normal((8, 20))
y = rng.integers(0, 4, size=8)
first = model.loss_and_grad(x, y)
for _ in range(300):
model.loss_and_grad(x, y)
opt.step()
last = _loss_only(model, x, y)
assert last < 0.05, f"expected near-zero loss, got {last:.4f} (started {first:.4f})"
assert (model.predict(x) == y).all()