stanno / stanno.py

Up-to-date with original repo

8f0d906 verified 12 days ago

11.3 kB

	"""STANNO-style proof of concept

	This module implements a very simple Self-Training Artificial Neural Network Object (STANNO)
	loosely inspired by Thaler's description: two neural networks, one of which trains the other,
	optionally folded into a single object.[cite:1][cite:3]

	Design choices:
	- TraineeNet: a small multilayer perceptron (MLP) that learns a supervised mapping.
	- Trainer: training logic embedded inside STANNO using standard gradient descent.
	Conceptually this plays the role of the "trainer" network described in the literature,
	but here it is implemented as explicit code for simplicity.

	Features included for experimentation:
	- Supervised training on a toy dataset (e.g., y = sin(x)).
	- "Dreaming": run the trained net on a fixed or random latent input with inputs partially
	or totally "blinded" (set to zero or constant) to observe internal dynamics.
	- Noise injection: add Gaussian noise with adjustable standard deviation to all weights,
	to explore how output complexity changes with noise level (from "stupidity" to chaos).
	- Lesioning: randomly zero out a fraction of weights to mimic progressive "death" of
	connections and observe degradation ("tunnel vision").[cite:2]

	The goal is not to reproduce the original spreadsheet implementation, but to give a
	simple, hackable playground in modern Python/NumPy that you can extend (including
	replacing the hard-coded trainer by a learned meta-network if desired).
	"""

	from __future__ import annotations
	import numpy as np
	from dataclasses import dataclass
	from typing import Tuple, Callable


	@dataclass
	class TraineeNet:
	"""Simple 2-layer MLP (input -> hidden -> output).

	This is the network that will be trained by the STANNO object.
	"""

	input_dim: int
	hidden_dim: int
	output_dim: int

	def __post_init__(self) -> None:
	rng = np.random.default_rng()
	# Xavier-like initialization
	self.W1 = rng.normal(0.0, 1.0 / np.sqrt(self.input_dim), (self.input_dim, self.hidden_dim))
	self.b1 = np.zeros((1, self.hidden_dim))
	self.W2 = rng.normal(0.0, 1.0 / np.sqrt(self.hidden_dim), (self.hidden_dim, self.output_dim))
	self.b2 = np.zeros((1, self.output_dim))

	def parameters(self):
	return [self.W1, self.b1, self.W2, self.b2]

	def forward(self, x: np.ndarray) -> Tuple[np.ndarray, dict]:
	"""Forward pass returning output and cache for backprop."""
	z1 = x @ self.W1 + self.b1
	a1 = np.tanh(z1)
	z2 = a1 @ self.W2 + self.b2
	y = z2 # regression; for classification you could add softmax
	cache = {"x": x, "z1": z1, "a1": a1, "z2": z2}
	return y, cache

	def apply_parameter_noise(self, sigma: float, rng: np.random.Generator \| None = None) -> None:
	"""Add Gaussian noise with std sigma to all parameters in-place."""
	if sigma <= 0:
	return
	if rng is None:
	rng = np.random.default_rng()
	for p in self.parameters():
	p += rng.normal(0.0, sigma, p.shape)

	def lesion(self, fraction: float, rng: np.random.Generator \| None = None) -> None:
	"""Randomly zero out a fraction of weights (simulated neuron/connection death).

	fraction in [0, 1]. Only affects W1 and W2; biases remain.
	"""
	fraction = float(np.clip(fraction, 0.0, 1.0))
	if fraction <= 0:
	return
	if rng is None:
	rng = np.random.default_rng()
	for W in (self.W1, self.W2):
	mask = rng.random(W.shape) < fraction
	W[mask] = 0.0


	class STANNO:
	"""Self-Training Neural Network Object (STANNO-style).

	Encapsula:
	- Una red entrenable (TraineeNet).
	- Un algoritmo de entrenamiento interno (gradient descent) que actúa como
	"trainer" y actualiza los pesos a partir de ejemplos.

	Esto sigue el espíritu de los STANNO descritos por Thaler: un objeto que
	contiene la red y su mecanismo de entrenamiento, con capacidad de seguir
	aprendiendo en línea.[cite:1][cite:3]
	"""

	def __init__(
	self,
	input_dim: int,
	hidden_dim: int,
	output_dim: int,
	learning_rate: float = 1e-2,
	) -> None:
	self.net = TraineeNet(input_dim, hidden_dim, output_dim)
	self.learning_rate = learning_rate

	# ---------------------- Core training logic ----------------------

	def _loss_and_grads(self, x: np.ndarray, y_true: np.ndarray) -> Tuple[float, list]:
	"""Compute MSE loss and gradients via backprop for one batch."""
	y_pred, cache = self.net.forward(x)
	# Mean squared error
	diff = y_pred - y_true
	loss = float(np.mean(diff ** 2))

	# Backprop
	batch_size = x.shape[0]
	dL_dy = (2.0 / batch_size) * diff # dL/dy

	# Layer 2
	a1 = cache["a1"]
	dL_dW2 = a1.T @ dL_dy
	dL_db2 = np.sum(dL_dy, axis=0, keepdims=True)

	# Through tanh
	dz2 = dL_dy @ self.net.W2.T
	da1 = dz2
	dz1 = da1 * (1.0 - np.tanh(cache["z1"]) ** 2)

	# Layer 1
	x_batch = cache["x"]
	dL_dW1 = x_batch.T @ dz1
	dL_db1 = np.sum(dz1, axis=0, keepdims=True)

	grads = [dL_dW1, dL_db1, dL_dW2, dL_db2]
	return loss, grads

	def trainer_step(self, x: np.ndarray, y_true: np.ndarray) -> float:
	"""One training step of the internal trainer over a mini-batch.

	Conceptualmente, esto es el "trainer network" que ajusta pesos del
	TraineeNet. Aquí se implementa como gradiente descendente directo.
	"""
	loss, grads = self._loss_and_grads(x, y_true)
	for param, grad in zip(self.net.parameters(), grads):
	param -= self.learning_rate * grad
	return loss

	def fit(
	self,
	x: np.ndarray,
	y: np.ndarray,
	epochs: int = 1000,
	batch_size: int = 32,
	shuffle: bool = True,
	callback: Callable[[int, float], None] \| None = None,
	) -> None:
	"""Train on a dataset using internal trainer.

	Args:
	x: shape (N, input_dim)
	y: shape (N, output_dim)
	epochs: number of passes over the dataset
	batch_size: mini-batch size
	shuffle: whether to shuffle each epoch
	callback: optional function(epoch, loss) for logging
	"""
	N = x.shape[0]
	rng = np.random.default_rng()

	for epoch in range(epochs):
	idx = np.arange(N)
	if shuffle:
	rng.shuffle(idx)
	x_shuf = x[idx]
	y_shuf = y[idx]

	losses = []
	for start in range(0, N, batch_size):
	end = start + batch_size
	xb = x_shuf[start:end]
	yb = y_shuf[start:end]
	loss = self.trainer_step(xb, yb)
	losses.append(loss)

	mean_loss = float(np.mean(losses))
	if callback is not None:
	callback(epoch, mean_loss)

	# ---------------------- Inference & "dreaming" ----------------------

	def predict(self, x: np.ndarray) -> np.ndarray:
	y, _ = self.net.forward(x)
	return y

	def dream(
	self,
	num_steps: int = 128,
	input_seed: np.ndarray \| None = None,
	noise_sigma: float = 0.0,
	blind_inputs: bool = False,
	rng: np.random.Generator \| None = None,
	) -> np.ndarray:
	"""Generate a sequence of outputs by driving the net with a simple or blind input.

	Args:
	num_steps: length of the sequence to generate.
	input_seed: initial input vector; if None, uses zeros.
	noise_sigma: amount of noise to add to weights once before dreaming.
	blind_inputs: if True, inputs are forced to zero every step.
	rng: optional RNG.

	Returns:
	Array of generated outputs of shape (num_steps, output_dim).
	"""
	if rng is None:
	rng = np.random.default_rng()

	# Work on a copy so as not to permanently corrupt the trained net
	shadow = TraineeNet(self.net.input_dim, self.net.hidden_dim, self.net.output_dim)
	shadow.W1 = self.net.W1.copy()
	shadow.b1 = self.net.b1.copy()
	shadow.W2 = self.net.W2.copy()
	shadow.b2 = self.net.b2.copy()
	shadow.apply_parameter_noise(noise_sigma, rng=rng)

	if input_seed is None:
	x = np.zeros((1, self.net.input_dim))
	else:
	x = input_seed.reshape(1, -1)

	outputs = []
	for _ in range(num_steps):
	if blind_inputs:
	x_step = np.zeros_like(x)
	else:
	x_step = x
	y, _ = shadow.forward(x_step)
	outputs.append(y.copy())
	# Simple feedback: feed output (or part of él) as next input
	# This makes the sequence sensitive to internal weights.
	if self.net.output_dim == self.net.input_dim:
	x = y
	else:
	# Project or tile to match input dim
	x = np.repeat(y, self.net.input_dim // self.net.output_dim + 1, axis=1)[
	:, : self.net.input_dim
	]

	return np.concatenate(outputs, axis=0)


	# ---------------------- Demo utilities ----------------------

	def make_sin_dataset(n_samples: int = 256) -> Tuple[np.ndarray, np.ndarray]:
	"""Simple 1D regression dataset: y = sin(x) on [0, 2π]."""
	rng = np.random.default_rng()
	x = rng.uniform(0.0, 2.0 * np.pi, size=(n_samples, 1))
	y = np.sin(x)
	return x, y


	def demo_train_and_dream() -> None:
	"""Train a STANNO on sin(x) and then explore noise/lesion effects.

	Run this function directly ("python stanno_poc.py") to see numeric output.
	"""
	x, y = make_sin_dataset(512)
	stanno = STANNO(input_dim=1, hidden_dim=32, output_dim=1, learning_rate=5e-3)

	print("Training STANNO on y = sin(x)...")
	stanno.fit(
	x,
	y,
	epochs=500,
	batch_size=64,
	callback=lambda e, l: print(f"Epoch {e:4d} loss={l:.5f}") if (e + 1) % 100 == 0 else None,
	)

	# Evaluate basic fit
	xs = np.linspace(0, 2 * np.pi, 16).reshape(-1, 1)
	preds = stanno.predict(xs)
	print("
	Sample predictions after training:")
	for xi, yi, yi_hat in zip(xs.flatten(), np.sin(xs).flatten(), preds.flatten()):
	print(f"x={xi:5.2f} sin(x)={yi: .3f} pred={yi_hat: .3f}")

	# Dreaming with different noise levels
	for sigma in [0.0, 0.05, 0.2, 0.5]:
	seq = stanno.dream(num_steps=32, noise_sigma=sigma, blind_inputs=True)
	print(f"
	Dreaming with noise_sigma={sigma} (first 10 outputs):")
	print(np.round(seq[:10].flatten(), 3))

	# Lesion experiment
	print("
	Lesioning 70% of weights and evaluating error on test points...")
	# Backup parameters
	backup = [p.copy() for p in stanno.net.parameters()]
	stanno.net.lesion(fraction=0.7)
	preds_lesioned = stanno.predict(xs)
	mse_lesioned = float(np.mean((preds_lesioned - np.sin(xs)) ** 2))
	print(f"MSE after lesioning 70% of weights: {mse_lesioned:.4f}")
	# Restore
	for param, b in zip(stanno.net.parameters(), backup):
	param[...] = b


	if __name__ == "__main__":
	demo_train_and_dream()