| """STANNO-style proof of concept |
| |
| This module implements a very simple Self-Training Artificial Neural Network Object (STANNO) |
| loosely inspired by Thaler's description: two neural networks, one of which trains the other, |
| optionally folded into a single object.[cite:1][cite:3] |
| |
| Design choices: |
| - TraineeNet: a small multilayer perceptron (MLP) that learns a supervised mapping. |
| - Trainer: training logic embedded inside STANNO using standard gradient descent. |
| Conceptually this plays the role of the "trainer" network described in the literature, |
| but here it is implemented as explicit code for simplicity. |
| |
| Features included for experimentation: |
| - Supervised training on a toy dataset (e.g., y = sin(x)). |
| - "Dreaming": run the trained net on a fixed or random latent input with inputs partially |
| or totally "blinded" (set to zero or constant) to observe internal dynamics. |
| - Noise injection: add Gaussian noise with adjustable standard deviation to all weights, |
| to explore how output complexity changes with noise level (from "stupidity" to chaos). |
| - Lesioning: randomly zero out a fraction of weights to mimic progressive "death" of |
| connections and observe degradation ("tunnel vision").[cite:2] |
| |
| The goal is not to reproduce the original spreadsheet implementation, but to give a |
| simple, hackable playground in modern Python/NumPy that you can extend (including |
| replacing the hard-coded trainer by a learned meta-network if desired). |
| """ |
|
|
| from __future__ import annotations |
| import numpy as np |
| from dataclasses import dataclass |
| from typing import Tuple, Callable |
|
|
|
|
| @dataclass |
| class TraineeNet: |
| """Simple 2-layer MLP (input -> hidden -> output). |
| |
| This is the network that will be trained by the STANNO object. |
| """ |
|
|
| input_dim: int |
| hidden_dim: int |
| output_dim: int |
|
|
| def __post_init__(self) -> None: |
| rng = np.random.default_rng() |
| |
| self.W1 = rng.normal(0.0, 1.0 / np.sqrt(self.input_dim), (self.input_dim, self.hidden_dim)) |
| self.b1 = np.zeros((1, self.hidden_dim)) |
| self.W2 = rng.normal(0.0, 1.0 / np.sqrt(self.hidden_dim), (self.hidden_dim, self.output_dim)) |
| self.b2 = np.zeros((1, self.output_dim)) |
|
|
| def parameters(self): |
| return [self.W1, self.b1, self.W2, self.b2] |
|
|
| def forward(self, x: np.ndarray) -> Tuple[np.ndarray, dict]: |
| """Forward pass returning output and cache for backprop.""" |
| z1 = x @ self.W1 + self.b1 |
| a1 = np.tanh(z1) |
| z2 = a1 @ self.W2 + self.b2 |
| y = z2 |
| cache = {"x": x, "z1": z1, "a1": a1, "z2": z2} |
| return y, cache |
|
|
| def apply_parameter_noise(self, sigma: float, rng: np.random.Generator | None = None) -> None: |
| """Add Gaussian noise with std sigma to all parameters in-place.""" |
| if sigma <= 0: |
| return |
| if rng is None: |
| rng = np.random.default_rng() |
| for p in self.parameters(): |
| p += rng.normal(0.0, sigma, p.shape) |
|
|
| def lesion(self, fraction: float, rng: np.random.Generator | None = None) -> None: |
| """Randomly zero out a fraction of weights (simulated neuron/connection death). |
| |
| fraction in [0, 1]. Only affects W1 and W2; biases remain. |
| """ |
| fraction = float(np.clip(fraction, 0.0, 1.0)) |
| if fraction <= 0: |
| return |
| if rng is None: |
| rng = np.random.default_rng() |
| for W in (self.W1, self.W2): |
| mask = rng.random(W.shape) < fraction |
| W[mask] = 0.0 |
|
|
|
|
| class STANNO: |
| """Self-Training Neural Network Object (STANNO-style). |
| |
| Encapsula: |
| - Una red entrenable (TraineeNet). |
| - Un algoritmo de entrenamiento interno (gradient descent) que actúa como |
| "trainer" y actualiza los pesos a partir de ejemplos. |
| |
| Esto sigue el espíritu de los STANNO descritos por Thaler: un objeto que |
| contiene la red y su mecanismo de entrenamiento, con capacidad de seguir |
| aprendiendo en línea.[cite:1][cite:3] |
| """ |
|
|
| def __init__( |
| self, |
| input_dim: int, |
| hidden_dim: int, |
| output_dim: int, |
| learning_rate: float = 1e-2, |
| ) -> None: |
| self.net = TraineeNet(input_dim, hidden_dim, output_dim) |
| self.learning_rate = learning_rate |
|
|
| |
|
|
| def _loss_and_grads(self, x: np.ndarray, y_true: np.ndarray) -> Tuple[float, list]: |
| """Compute MSE loss and gradients via backprop for one batch.""" |
| y_pred, cache = self.net.forward(x) |
| |
| diff = y_pred - y_true |
| loss = float(np.mean(diff ** 2)) |
|
|
| |
| batch_size = x.shape[0] |
| dL_dy = (2.0 / batch_size) * diff |
|
|
| |
| a1 = cache["a1"] |
| dL_dW2 = a1.T @ dL_dy |
| dL_db2 = np.sum(dL_dy, axis=0, keepdims=True) |
|
|
| |
| dz2 = dL_dy @ self.net.W2.T |
| da1 = dz2 |
| dz1 = da1 * (1.0 - np.tanh(cache["z1"]) ** 2) |
|
|
| |
| x_batch = cache["x"] |
| dL_dW1 = x_batch.T @ dz1 |
| dL_db1 = np.sum(dz1, axis=0, keepdims=True) |
|
|
| grads = [dL_dW1, dL_db1, dL_dW2, dL_db2] |
| return loss, grads |
|
|
| def trainer_step(self, x: np.ndarray, y_true: np.ndarray) -> float: |
| """One training step of the internal trainer over a mini-batch. |
| |
| Conceptualmente, esto es el "trainer network" que ajusta pesos del |
| TraineeNet. Aquí se implementa como gradiente descendente directo. |
| """ |
| loss, grads = self._loss_and_grads(x, y_true) |
| for param, grad in zip(self.net.parameters(), grads): |
| param -= self.learning_rate * grad |
| return loss |
|
|
| def fit( |
| self, |
| x: np.ndarray, |
| y: np.ndarray, |
| epochs: int = 1000, |
| batch_size: int = 32, |
| shuffle: bool = True, |
| callback: Callable[[int, float], None] | None = None, |
| ) -> None: |
| """Train on a dataset using internal trainer. |
| |
| Args: |
| x: shape (N, input_dim) |
| y: shape (N, output_dim) |
| epochs: number of passes over the dataset |
| batch_size: mini-batch size |
| shuffle: whether to shuffle each epoch |
| callback: optional function(epoch, loss) for logging |
| """ |
| N = x.shape[0] |
| rng = np.random.default_rng() |
|
|
| for epoch in range(epochs): |
| idx = np.arange(N) |
| if shuffle: |
| rng.shuffle(idx) |
| x_shuf = x[idx] |
| y_shuf = y[idx] |
|
|
| losses = [] |
| for start in range(0, N, batch_size): |
| end = start + batch_size |
| xb = x_shuf[start:end] |
| yb = y_shuf[start:end] |
| loss = self.trainer_step(xb, yb) |
| losses.append(loss) |
|
|
| mean_loss = float(np.mean(losses)) |
| if callback is not None: |
| callback(epoch, mean_loss) |
|
|
| |
|
|
| def predict(self, x: np.ndarray) -> np.ndarray: |
| y, _ = self.net.forward(x) |
| return y |
|
|
| def dream( |
| self, |
| num_steps: int = 128, |
| input_seed: np.ndarray | None = None, |
| noise_sigma: float = 0.0, |
| blind_inputs: bool = False, |
| rng: np.random.Generator | None = None, |
| ) -> np.ndarray: |
| """Generate a sequence of outputs by driving the net with a simple or blind input. |
| |
| Args: |
| num_steps: length of the sequence to generate. |
| input_seed: initial input vector; if None, uses zeros. |
| noise_sigma: amount of noise to add to weights *once* before dreaming. |
| blind_inputs: if True, inputs are forced to zero every step. |
| rng: optional RNG. |
| |
| Returns: |
| Array of generated outputs of shape (num_steps, output_dim). |
| """ |
| if rng is None: |
| rng = np.random.default_rng() |
|
|
| |
| shadow = TraineeNet(self.net.input_dim, self.net.hidden_dim, self.net.output_dim) |
| shadow.W1 = self.net.W1.copy() |
| shadow.b1 = self.net.b1.copy() |
| shadow.W2 = self.net.W2.copy() |
| shadow.b2 = self.net.b2.copy() |
| shadow.apply_parameter_noise(noise_sigma, rng=rng) |
|
|
| if input_seed is None: |
| x = np.zeros((1, self.net.input_dim)) |
| else: |
| x = input_seed.reshape(1, -1) |
|
|
| outputs = [] |
| for _ in range(num_steps): |
| if blind_inputs: |
| x_step = np.zeros_like(x) |
| else: |
| x_step = x |
| y, _ = shadow.forward(x_step) |
| outputs.append(y.copy()) |
| |
| |
| if self.net.output_dim == self.net.input_dim: |
| x = y |
| else: |
| |
| x = np.repeat(y, self.net.input_dim // self.net.output_dim + 1, axis=1)[ |
| :, : self.net.input_dim |
| ] |
|
|
| return np.concatenate(outputs, axis=0) |
|
|
|
|
| |
|
|
| def make_sin_dataset(n_samples: int = 256) -> Tuple[np.ndarray, np.ndarray]: |
| """Simple 1D regression dataset: y = sin(x) on [0, 2π].""" |
| rng = np.random.default_rng() |
| x = rng.uniform(0.0, 2.0 * np.pi, size=(n_samples, 1)) |
| y = np.sin(x) |
| return x, y |
|
|
|
|
| def demo_train_and_dream() -> None: |
| """Train a STANNO on sin(x) and then explore noise/lesion effects. |
| |
| Run this function directly ("python stanno_poc.py") to see numeric output. |
| """ |
| x, y = make_sin_dataset(512) |
| stanno = STANNO(input_dim=1, hidden_dim=32, output_dim=1, learning_rate=5e-3) |
|
|
| print("Training STANNO on y = sin(x)...") |
| stanno.fit( |
| x, |
| y, |
| epochs=500, |
| batch_size=64, |
| callback=lambda e, l: print(f"Epoch {e:4d} loss={l:.5f}") if (e + 1) % 100 == 0 else None, |
| ) |
|
|
| |
| xs = np.linspace(0, 2 * np.pi, 16).reshape(-1, 1) |
| preds = stanno.predict(xs) |
| print(" |
| Sample predictions after training:") |
| for xi, yi, yi_hat in zip(xs.flatten(), np.sin(xs).flatten(), preds.flatten()): |
| print(f"x={xi:5.2f} sin(x)={yi: .3f} pred={yi_hat: .3f}") |
|
|
| |
| for sigma in [0.0, 0.05, 0.2, 0.5]: |
| seq = stanno.dream(num_steps=32, noise_sigma=sigma, blind_inputs=True) |
| print(f" |
| Dreaming with noise_sigma={sigma} (first 10 outputs):") |
| print(np.round(seq[:10].flatten(), 3)) |
|
|
| |
| print(" |
| Lesioning 70% of weights and evaluating error on test points...") |
| |
| backup = [p.copy() for p in stanno.net.parameters()] |
| stanno.net.lesion(fraction=0.7) |
| preds_lesioned = stanno.predict(xs) |
| mse_lesioned = float(np.mean((preds_lesioned - np.sin(xs)) ** 2)) |
| print(f"MSE after lesioning 70% of weights: {mse_lesioned:.4f}") |
| |
| for param, b in zip(stanno.net.parameters(), backup): |
| param[...] = b |
|
|
|
|
| if __name__ == "__main__": |
| demo_train_and_dream() |
|
|