Spaces:
Sleeping
Sleeping
| """Tests for the from-scratch network. | |
| The important one is `test_gradient_check`: it verifies the hand-written backward | |
| passes against numerical finite-difference gradients. If the chain rule were wired | |
| up wrong, this test would catch it. This is the "build it AND prove it" guarantee. | |
| """ | |
| from __future__ import annotations | |
| import numpy as np | |
| from nn.layers import softmax_cross_entropy | |
| from nn.model import MLP, Adam | |
| def _loss_only(model: MLP, x, y) -> float: | |
| """Forward pass + loss, without touching gradients.""" | |
| logits = model.forward(x) | |
| loss, _ = softmax_cross_entropy(logits, y) | |
| return loss | |
| def test_softmax_cross_entropy_uniform(): | |
| # Uniform logits over C classes -> loss should equal ln(C). | |
| logits = np.zeros((4, 10)) | |
| y = np.array([0, 1, 2, 3]) | |
| loss, dlogits = softmax_cross_entropy(logits, y) | |
| assert abs(loss - np.log(10)) < 1e-6 | |
| assert dlogits.shape == logits.shape | |
| def test_forward_shape(): | |
| model = MLP(sizes=(784, 64, 32, 10), seed=1) | |
| x = np.random.default_rng(0).standard_normal((8, 784)).astype(np.float32) | |
| assert model.forward(x).shape == (8, 10) | |
| assert model.predict(x).shape == (8,) | |
| probs = model.probabilities(x) | |
| assert np.allclose(probs.sum(axis=1), 1.0, atol=1e-6) | |
| def test_gradient_check(): | |
| """Analytic gradients must match finite differences to high precision.""" | |
| rng = np.random.default_rng(42) | |
| model = MLP(sizes=(6, 5, 4, 3), seed=2) | |
| x = rng.standard_normal((4, 6)) | |
| y = rng.integers(0, 3, size=4) | |
| # Analytic gradients (snapshot them; FD will perturb params in place afterwards). | |
| model.loss_and_grad(x, y) | |
| analytic = [g.copy() for _, g in model.params_and_grads()] | |
| eps = 1e-5 | |
| for idx, (p, _) in enumerate(model.params_and_grads()): | |
| flat = p.ravel() | |
| ga = analytic[idx].ravel() | |
| # Check a handful of random coordinates per parameter tensor. | |
| coords = rng.choice(flat.size, size=min(5, flat.size), replace=False) | |
| for c in coords: | |
| orig = flat[c] | |
| flat[c] = orig + eps | |
| lp = _loss_only(model, x, y) | |
| flat[c] = orig - eps | |
| lm = _loss_only(model, x, y) | |
| flat[c] = orig | |
| numeric = (lp - lm) / (2 * eps) | |
| denom = max(1e-8, abs(numeric) + abs(ga[c])) | |
| rel_err = abs(numeric - ga[c]) / denom | |
| assert rel_err < 1e-4, f"grad mismatch at param {idx} coord {c}: {rel_err:.2e}" | |
| def test_overfit_tiny_batch(): | |
| """A tiny batch should be driven to near-zero loss — proves the loop learns.""" | |
| rng = np.random.default_rng(0) | |
| model = MLP(sizes=(20, 32, 16, 4), seed=0) | |
| opt = Adam(model, lr=1e-2) | |
| x = rng.standard_normal((8, 20)) | |
| y = rng.integers(0, 4, size=8) | |
| first = model.loss_and_grad(x, y) | |
| for _ in range(300): | |
| model.loss_and_grad(x, y) | |
| opt.step() | |
| last = _loss_only(model, x, y) | |
| assert last < 0.05, f"expected near-zero loss, got {last:.4f} (started {first:.4f})" | |
| assert (model.predict(x) == y).all() | |