oldman-dev commited on 12 days ago

Commit

8f0d906

verified ·

1 Parent(s): 39cdfd1

Up-to-date with original repo

Browse files

Files changed (40) hide show

.github/CODEOWNERS +3 -0
.gitignore +47 -0
HF_README.md +288 -0
README.md +328 -3
STANNO_IS_NOT.md +155 -0
examples/anomaly_filter.json +33 -0
examples/cascade_autoencoder.json +28 -0
examples/sin_regression.json +39 -0
examples/sin_regression.stanno.pkl +3 -0
pyproject.toml +31 -0
requirements.txt +11 -0
scripts/generate_clip_embeddings.py +92 -0
scripts/train_stanno_on_embeddings.py +112 -0
stanno.py +314 -0
stanno/__init__.py +24 -0
stanno/__main__.py +2 -0
stanno/cli.py +398 -0
stanno/config/__init__.py +0 -0
stanno/config/schema.py +125 -0
stanno/core/__init__.py +0 -0
stanno/core/backend.py +162 -0
stanno/core/stanno.py +317 -0
stanno/core/trainee.py +174 -0
stanno/core/trainer.py +126 -0
stanno/data/__init__.py +0 -0
stanno/data/base.py +82 -0
stanno/data/csv_loader.py +56 -0
stanno/data/json_loader.py +98 -0
stanno/data/numpy_loader.py +112 -0
stanno/integration/__init__.py +0 -0
stanno/integration/cascade.py +354 -0
stanno/integration/continual.py +109 -0
stanno/integration/dsanno.py +389 -0
stanno/integration/filter.py +158 -0
stanno/integration/llm_client.py +123 -0
stanno/trainers/__init__.py +0 -0
stanno/trainers/evolutionary.py +189 -0
stanno/trainers/fixed.py +180 -0
stanno/trainers/local_rule.py +259 -0
stanno_poc.py +314 -0

.github/CODEOWNERS ADDED Viewed

	@@ -0,0 +1,3 @@

+# Basic approvals
+* @nitroxido
+/nodes.py @nitroxido

.gitignore ADDED Viewed

	@@ -0,0 +1,47 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+*.egg
+*.egg-info/
+dist/
+build/
+.Python
+# Markdown files (but keep documentation)
+*.md
+!README.md
+!HF_README.md
+!STANNO_IS_NOT.md
+# Virtual environments
+venv/
+env/
+ENV/
+.venv
+# IDEs
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.DS_Store
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+# Cache
+.cache/
+.mypy_cache/
+# Environment variables
+.env
+.env.local
+# Temporary files
+*.tmp
+*.log

HF_README.md ADDED Viewed

	@@ -0,0 +1,288 @@

+# STANNO — Neural Networks That Train Neural Networks
+A modern, open-source Python library implementing the **Artificial Neurogenesis Network** concept from US Patent 5,852,815 (Thaler, 1998). One network (the trainer) decides how another network (the trainee) should update its weights — no backpropagation needed. Multiple STANNOs can be chained into cascade pipelines, and any trained STANNO can be turned into a data scanner that finds matching rows in large datasets.
+> **Attribution**: This is a faithful, open-source implementation of Thaler's patented design with modern extensions (cascading, data scanning, ComfyUI integration). The original patent has expired. All core concepts are credited to the original patent.
+## ⚠️ What STANNO Is (and Isn't)
+**STANNO is specialized**, not a drop-in replacement for PyTorch.
+**Good for:**
+- Anomaly detection (reconstruction-based scoring)
+- Online/continual learning (one-sample-at-a-time updates)
+- Interpretable weight modification (see exactly what changes)
+- Multi-stage cascade pipelines (encoder → bottleneck → decoder, end-to-end)
+- Semantic data scanning (find rows in a large dataset that match learned distribution)
+- ComfyUI creative workflows (style transfer via dream mode)
+**NOT for:**
+- General regression (accuracy ~0.4, use PyTorch instead)
+- Image generation alone (need Stable Diffusion + nodes)
+- High-throughput training (slow NumPy)
+For details, see [STANNO_IS_NOT.md](./STANNO_IS_NOT.md).
+**What you can do with this:**
+**Train networks on your data:**
+```python
+from stanno import STANNO
+from stanno.config.schema import STANNOConfig
+import numpy as np
+config = STANNOConfig(layers=[784, 256, 10])
+stanno = STANNO(config)
+stanno.fit(x_train, y_train, epochs=100)
+predictions = stanno.predict(x_test)
+```
+**Chain into cascade pipelines:**
+```python
+from stanno import STANNO, STANNOConfig, CascadeSTANNO
+# Encoder-decoder autoencoder
+enc = STANNO(STANNOConfig(layers=[768, 256, 64], learning_rate=0.05))
+dec = STANNO(STANNOConfig(layers=[64, 256, 768], learning_rate=0.05))
+ae = CascadeSTANNO([enc, dec])
+ae.fit(embeddings, embeddings, epochs=200)   # end-to-end gradient cascade
+# Extract compressed representations
+codes = ae.intermediate_output(embeddings, stage=0)  # (N, 64)
+# Freeze the encoder, continue adapting the decoder
+ae.freeze(0)
+ae.fit(new_domain_embeddings, new_domain_embeddings, epochs=100)
+```
+**Scan large datasets for matching rows (DSANNO):**
+```python
+from stanno import STANNO, STANNOConfig, DSANNO
+# Train on known-good data
+detector = STANNO(STANNOConfig(layers=[64, 128, 64], learning_rate=0.05))
+detector.fit(normal_data, normal_data, epochs=200)
+scanner = DSANNO(detector, mode="reconstruction")
+# Auto-calibrate threshold from training distribution
+threshold = scanner.calibrate_threshold(normal_data, percentile=95)
+# Find matching rows in a large corpus
+result = scanner.scan(large_corpus, threshold=threshold)
+matching = large_corpus[result.matched_indices()]
+# Or retrieve the top-k best matches
+indices, scores, _ = scanner.top_k(large_corpus, k=20)
+# Stream huge files without loading all at once
+for batch_result in scanner.scan_stream(file_batches, threshold=threshold):
+    process(batch_result.matched_indices())
+```
+**Detect when inputs are unusual (anomaly filter):**
+```python
+from stanno.integration.filter import STANNOFilter
+# Train on normal data
+stanno.fit(normal_data, normal_data, epochs=50)
+# Score new input
+score, metadata = stanno_filter.score(new_input)
+# score ranges [0, 1]: low = normal, high = anomaly
+```
+**Generate variations via "dream mode":**
+```python
+# Start with a seed input, add noise, generate a sequence
+dream_sequence = stanno.dream(
+    num_steps=64,
+    input_seed=seed_vector,
+    noise_sigma=0.1  # controls creativity
+)
+```
+**Use in ComfyUI workflows (9 nodes):**
+- Load/create STANNO models
+- Train on image batches
+- Score/filter images
+- Inject dream creativity into CLIP conditioning
+- Apply dream output as LoRA-style patches
+- Route images by style match
+- Scan image batches for best matches with auto-calibrated thresholds
+- Build multi-stage cascade autoencoders
+## Why use STANNO?
+- **Interpretable**: You can see exactly what the trainer does to weights. No black-box backprop.
+- **Flexible**: Three trainer types (Fixed, LocalRule, Evolutionary) fit different problems.
+- **Learnable**: The trainer itself can adapt (meta-learning).
+- **Cascadable**: Chain STANNOs into multi-stage pipelines with end-to-end gradient flow across stages.
+- **Scannable**: Turn any trained STANNO into a semantic scanner over large datasets.
+- **No autodiff**: Works with NumPy. No GPU required (but supports PyTorch if you have it).
+- **ComfyUI ready**: Nine custom nodes for image generation workflows.
+## Install
+```bash
+pip install git+https://github.com/nitroxido/stanno.git
+```
+## Quick examples
+### Regression on sin(x)
+```bash
+python -m stanno train --config examples/sin_regression.json
+python -m stanno predict --config examples/sin_regression.json --input 0.5
+python -m stanno dream --config examples/sin_regression.json
+```
+### Autoencoder on images
+```python
+from stanno import STANNO
+from stanno.config.schema import STANNOConfig
+import numpy as np
+# Reshape images to flat vectors (B, H*W*C)
+x = images.reshape(images.shape[0], -1).astype('float32')
+# Autoencoder: input and output have same size
+config = STANNOConfig(layers=[x.shape[1], 256, x.shape[1]])
+stanno = STANNO(config)
+stanno.fit(x, x, epochs=100, batch_size=32)
+# Get reconstruction
+x_reconstructed = stanno.predict(x[:10])
+```
+### Online learning (continual)
+```python
+from stanno.integration.continual import ContinualSTANNO
+cont = ContinualSTANNO(stanno)
+for sample, label in data_stream:
+    loss = cont.observe(sample, label)
+    if cont.steps % 100 == 0:
+        test_loss = cont.test_loss(x_test, y_test)
+        print(f"Step {cont.steps}: train_loss={loss:.4f}, test_loss={test_loss:.4f}")
+```
+### Anomaly scoring
+```python
+from stanno.config.schema import FilterConfig
+from stanno.integration.filter import STANNOFilter
+# Train on normal embeddings
+stanno.fit(normal_embeddings, normal_embeddings, epochs=50)
+# Create filter
+filt = STANNOFilter(stanno, FilterConfig(anomaly_threshold=0.7))
+# Score new embedding
+score, info = filt.score(new_embedding)
+print(f"Anomaly score: {score:.3f} (0=normal, 1=anomaly)")
+if info["blocked"]:
+    print("Blocked: input is too unusual")
+```
+## How it works
+**The core idea:**
+- **TraineeNet**: A neural network with weights you want to train.
+- **TrainerNet**: Another network that looks at the TraineeNet's internal state (activations, errors, weights) and computes how to update those weights.
+- **No backprop**: The update formula is explicit, not learned via autodiff.
+- **Cascades**: Multiple TraineeNet+TrainerNet pairs can be chained so that gradient signals flow backward across stage boundaries, enabling end-to-end training of multi-stage pipelines.
+- **Scanning**: Any trained STANNO can be used as a similarity function to scan and rank rows in large datasets by how closely they match the learned distribution.
+**The three trainer types:**
+| Type | Mechanism | Best for |
+|------|-----------|----------|
+| **Fixed** | 4-module design (patent 5852815A), cascade-aware | Baseline, reproducibility, understanding the concept |
+| **LocalRule** | Shared MLP per synapse | Adaptive training, interpretability |
+| **Evolutionary** | Evolve per-layer scales (ES) | Unconventional problems, when autodiff fails |
+## Technical details
+- **Backend agnostic**: Uses NumPy by default, but can swap in PyTorch.
+- **Variable architecture**: Networks can be any depth (list of layer sizes).
+- **Configurable feedback**: Dream mode can "repeat" outputs, use a learned "linear" projection, or "zero" them.
+- **Pickle-serializable**: Save/load trained models easily.
+## Benchmark
+On sin(x) regression (512 samples, 100 epochs):
+```
+Fixed         MSE=0.047
+LocalRule     MSE=0.021  (learnable rules = better fit)
+Evolutionary  MSE=0.053
+```
+## For ComfyUI users
+The [comfyui-stanno](https://github.com/[your-username]/comfyui-stanno) custom node package provides nine nodes in the **STANNO** category:
+| Node | What it does |
+|------|--------------|
+| **STANNOLoad** | Create or load a model (JSON config or .pkl file) |
+| **STANNOTrainImages** | Train on image batches |
+| **STANNOScoreImages** | Filter images by reconstruction error |
+| **STANNODreamCond** | Modify CLIP embeddings with dream mode |
+| **STANNODynamicLoRA** | Apply learned style as LoRA patches |
+| **STANNOCompositeCheck** | Route images to whichever of two STANNOs matches best |
+| **STANNOScan** | DSANNO scanner: auto-calibrated threshold + top-k image retrieval |
+| **STANNOCascadeLoad** | Create or load a multi-stage CascadeSTANNO |
+| **STANNOCascadeTrainImages** | Train a cascade end-to-end on an image batch |
+Install via ComfyUI-Manager or manually.
+## Patent & Attribution
+**STANNO is an open-source implementation of US Patent 5,852,815** (*Artificial Neurogenesis Network*), filed by Stephen L. Thaler. The patent has expired (US utility patents: 20 years from filing). We fully acknowledge and credit all core architectural concepts to the original patent.
+**This implementation adds:**
+- Modern Python/NumPy/PyTorch backend
+- CascadeSTANNO (multi-stage gradient cascade)
+- DSANNO (data scanning and semantic search)
+- Three trainer types (Fixed, LocalRule, Evolutionary)
+- ComfyUI integration (9 custom nodes)
+- CLI tools for common tasks
+See **Citation** below for how to cite the original patent and this implementation.
+## Citation
+If you use STANNO in research, cite the original patent:
+```bibtex
+@patent{thaler1998artificial,
+  title={Artificial neurogenesis network},
+  author={Thaler, Stephen L},
+  year={1998},
+  number={5852815},
+  institution={United States Patent}
+}
+```
+And mention this implementation:
+```bibtex
+@software{stanno2026,
+  title={STANNO: Self-Training Artificial Neural Network Object},
+  author={Raides J. Rodríguez},
+  year={2026},
+  url={https://github.com/nitroxido/stanno}
+}
+```
+## Questions?
+- **Bug report**: Open an issue on GitHub
+- **Question**: Start a discussion
+- **Feature request**: Describe what you want to build
+## License
+MIT

README.md CHANGED Viewed

@@ -1,3 +1,328 @@
----
-license: mit
----

+# STANNO — Self-Training Artificial Neural Network Object
+A neural network that trains another neural network. No backpropagation. Directly modifies weights.
+## What is this?
+STANNO is a modern, open-source implementation of the **Artificial Neurogenesis Network** concept from US Patent 5,852,815 (Thaler, 1998). Instead of using backpropagation to update weights, one network (the **TrainerNet**) computes weight updates for another network (the **TraineeNet**) by analyzing its internal state.
+Think of it as: you have a student network that learns, and a teacher network that decides how the student's weights should change — without autodiff, without gradients.
+Multiple STANNOs can be **cascaded** into pipelines where the output of one feeds the next, trained end-to-end with gradient flow across stage boundaries. A **DSANNO** (Data Scanning variant) wraps any trained STANNO and scans large datasets to find rows that match its learned representation — the inverse of anomaly detection.
+### Patent & Attribution
+This codebase is an implementation of the architecture described in **US Patent 5,852,815** (*Artificial Neurogenesis Network*) filed by Stephen L. Thaler. The original patent has expired (US utility patents run 20 years from filing date). This open-source implementation builds upon the original design with modern extensions: **CascadeSTANNO** (multi-stage gradient flow), **DSANNO** (data scanning), and integration with contemporary frameworks (PyTorch, ComfyUI).
+We acknowledge and attribute all core concepts to Thaler's patent. See [Patents & Reference](#papers--reference) below for full citation details.
+## ⚠️ Before you start
+**STANNO is specialized**, not a general-purpose neural network. It's designed for:
+- Anomaly detection ✓
+- Online learning ✓
+- Interpretability ✓
+It's **not** for:
+- Regression (use PyTorch/TensorFlow instead)
+- Image generation alone (use with ComfyUI + SD 1.5)
+- High-accuracy function fitting
+See [STANNO_IS_NOT.md](STANNO_IS_NOT.md) for details.
+## Why would I use this?
+- **Direct weight modification**: The trainer has explicit control over what happens to each synapse. Useful for interpretability, debugging, or unconventional training schemes.
+- **Meta-learning friendly**: The trainer itself can be learned (via evolution or other methods). Different tasks can teach the trainer how to train.
+- **Composable**: Three trainer implementations (Fixed, LocalRule, Evolutionary) let you pick the right tool.
+- **Cascadable**: Chain multiple STANNOs into encoder-decoder pipelines or progressive compression networks. Freeze individual stages, adapt others — all in the same object.
+- **Data scanning**: DSANNO turns any trained STANNO into a semantic scanner. Find the rows in a large dataset that most closely match the network's learned distribution, with auto-calibrated thresholds and top-k retrieval.
+- **ComfyUI integration**: Nine custom nodes for image generation workflows.
+- **Works with LLMs**: Filter or augment LLM inputs/outputs using STANNO's anomaly detection.
+## Install
+```bash
+pip install git+https://github.com/nitroxido/stanno.git
+```
+Or clone and install locally:
+```bash
+git clone https://github.com/nitroxido/stanno.git
+cd stanno
+pip install -e .
+```
+For ComfyUI, the nodes auto-install via ComfyUI-Manager, or manually:
+```bash
+cd ComfyUI/custom_nodes
+git clone https://github.com/nitroxido/comfyui-stanno.git
+cd comfyui-stanno
+pip install -r requirements.txt
+```
+## Quick Start
+### Train on sin(x)
+```python
+import numpy as np
+from stanno import STANNO
+from stanno.config.schema import STANNOConfig
+# Config
+config = STANNOConfig(
+    layers=[1, 32, 1],
+    trainer_type="fixed",
+    learning_rate=0.005,
+)
+# Data
+x = np.linspace(0, 1, 512, dtype=np.float32).reshape(-1, 1)
+y = np.sin(2 * np.pi * x).astype(np.float32)
+# Train
+stanno = STANNO(config)
+stanno.fit(x, y, epochs=500, batch_size=64)
+# Predict
+y_pred = stanno.predict(np.array([[0.25]], dtype=np.float32))
+print(f"sin(0.25) ≈ {y_pred[0, 0]:.3f}")  # ≈ 0.587
+```
+### Load from config file
+```bash
+python -m stanno train --config examples/sin_regression.json
+python -m stanno predict --config examples/sin_regression.json --input 0.25
+python -m stanno dream --config examples/sin_regression.json
+```
+### Anomaly filtering (pre-filter for LLM)
+```python
+from stanno import STANNO
+from stanno.config.schema import FilterConfig
+from stanno.integration.filter import STANNOFilter
+from stanno.integration.llm_client import LLMClient
+# Train STANNO on normal embeddings
+stanno = STANNO(...)
+stanno.fit(normal_embeddings, normal_embeddings, epochs=100)
+# Set up filter
+filter_config = FilterConfig(anomaly_threshold=0.7, block_above_threshold=True)
+llm = LLMClient(llm_config)
+filt = STANNOFilter(stanno, filter_config, llm)
+# Score incoming prompt
+score, meta = filt.score(embedding)
+if not meta["blocked"]:
+    response = filt.filter_and_send(messages, embedding)
+```
+### Online learning
+```python
+from stanno.integration.continual import ContinualSTANNO
+cont = ContinualSTANNO(stanno)
+# One sample at a time
+for x_i, y_i in stream:
+    loss = cont.observe(x_i, y_i)
+    print(f"Step {cont.steps}: loss={loss:.4f}")
+# Check held-out test set
+test_loss = cont.test_loss(x_test, y_test)
+print(f"Test loss: {test_loss:.4f}")
+```
+### Cascading (encoder → decoder pipeline)
+```python
+from stanno import STANNO, STANNOConfig, CascadeSTANNO
+# Two-stage autoencoder: compress 768-D embeddings to 64-D
+enc = STANNO(STANNOConfig(layers=[768, 256, 64], learning_rate=0.05))
+dec = STANNO(STANNOConfig(layers=[64, 256, 768], learning_rate=0.05))
+ae = CascadeSTANNO([enc, dec])
+ae.fit(embeddings, embeddings, epochs=200, batch_size=32)
+# Get compressed representation
+codes = ae.intermediate_output(embeddings, stage=0)  # (N, 64)
+# Freeze encoder, continue training decoder
+ae.freeze(0)
+ae.fit(embeddings, embeddings, epochs=100)  # only decoder updates
+```
+CLI equivalent:
+```bash
+python -m stanno cascade --config examples/cascade_autoencoder.json
+```
+### Data scanning (DSANNO)
+```python
+from stanno import STANNO, STANNOConfig, DSANNO
+# Train on known-good data
+detector = STANNO(STANNOConfig(layers=[64, 128, 64], learning_rate=0.05))
+detector.fit(normal_data, normal_data, epochs=200)
+scanner = DSANNO(detector, mode="reconstruction")
+# Auto-calibrate threshold from training distribution
+threshold = scanner.calibrate_threshold(normal_data, percentile=95)
+# Scan a large dataset — returns matching rows
+result = scanner.scan(large_dataset, threshold=threshold)
+matching_rows = large_dataset[result.matched_indices()]
+# Or just get the top-k best matches
+indices, scores, _ = scanner.top_k(large_dataset, k=20)
+```
+CLI equivalent:
+```bash
+python -m stanno scan --model model.stanno.pkl --data corpus.npy --top-k 20
+python -m stanno scan --model model.stanno.pkl --data corpus.npy --threshold 0.05
+```
+## The three trainers
+| Trainer | How it works | Use case |
+|---------|-------------|----------|
+| **Fixed** | 4-module patent-faithful design. No learning, deterministic. | Baseline, reproducibility |
+| **LocalRule** | Shared MLP learns per-synapse update rules. Can meta-train. | Adaptive training, interpretability |
+| **Evolutionary** | ES-based. Evolves per-layer learning rates. No autodiff. | Exploration, unconventional problems |
+## ComfyUI nodes
+Nine nodes in the **STANNO** category:
+| Node | What it does |
+|------|--------------|
+| **STANNOLoad** | Create or load a STANNO model |
+| **STANNOTrainImages** | Train as autoencoder on image batch |
+| **STANNOScoreImages** | Filter images by reconstruction error |
+| **STANNODreamCond** | Modify CLIP conditioning with dream mode |
+| **STANNODynamicLoRA** | Apply dream output as LoRA patches |
+| **STANNOCompositeCheck** | Route images to whichever of two STANNOs matches best |
+| **STANNOScan** | DSANNO scanner: auto-calibrated threshold + top-k image retrieval |
+| **STANNOCascadeLoad** | Create or load a multi-stage CascadeSTANNO |
+| **STANNOCascadeTrainImages** | Train a cascade end-to-end on an image batch |
+See [comfyui-stanno-integration.md](./comfyui-stanno-integration.md) for workflows and examples.
+## Architecture
+```
+stanno/
+├── config/           # Dataclasses for all configuration
+├── core/
+│   ├── backend.py    # NumPy & PyTorch backend abstraction
+│   ├── trainer.py    # AbstractTrainerNet base class + cascade API
+│   ├── trainee.py    # TraineeNet (the student network)
+│   └── stanno.py     # STANNO orchestrator
+├── trainers/
+│   ├── fixed.py      # 4-module patent design (cascade-aware)
+│   ├── local_rule.py # Learned per-synapse rules
+│   └── evolutionary.py # ES-based adaptation
+├── data/             # Loaders for CSV, JSON, NumPy, builtin datasets
+├── integration/
+│   ├── llm_client.py    # OpenAI-compatible HTTP client (Ollama, etc.)
+│   ├── filter.py        # STANNOFilter for anomaly detection
+│   ├── continual.py     # ContinualSTANNO for online learning
+│   ├── cascade.py       # CascadeSTANNO — multi-stage chained networks
+│   └── dsanno.py        # DSANNO — data scanning and semantic retrieval
+└── cli.py            # Command-line interface
+```
+### CLI subcommands
+| Command | What it does |
+|---------|--------------|
+| `stanno train` | Train a single STANNO from a JSON config |
+| `stanno predict` | Run one prediction |
+| `stanno dream` | Generate a sequence via dream mode |
+| `stanno evaluate` | Compute MSE/MAE on a dataset |
+| `stanno filter` | Run anomaly filter on a file of prompts |
+| `stanno cascade` | Train a CascadeSTANNO from a JSON config |
+| `stanno scan` | Scan a `.npy` dataset with a trained STANNO |
+## Configuration
+All settings in JSON. Example:
+```json
+{
+  "stanno": {
+    "layers": [1, 32, 1],
+    "trainer_type": "fixed",
+    "learning_rate": 0.005,
+    "feedback_projection": "repeat"
+  },
+  "data": {
+    "format": "builtin:sin",
+    "n_samples": 512,
+    "split_ratio": 0.8
+  },
+  "fit": {
+    "epochs": 500,
+    "batch_size": 64,
+    "log_every": 50
+  }
+}
+```
+See [examples/](./examples/) for more.
+## Testing
+```bash
+python -c "
+import numpy as np
+from stanno import STANNO
+from stanno.config.schema import STANNOConfig
+for trainer_type in ['fixed', 'local_rule', 'evolutionary']:
+    cfg = STANNOConfig(layers=[1, 32, 1], trainer_type=trainer_type)
+    stanno = STANNO(cfg)
+    x = np.linspace(0, 1, 100, dtype='f').reshape(-1, 1)
+    y = np.sin(2*np.pi*x).astype('f')
+    stanno.fit(x, y, epochs=50, batch_size=16)
+    pred = stanno.predict(x[:5])
+    mse = np.mean((pred - y[:5])**2)
+    print(f'{trainer_type:15s} MSE={mse:.5f}')
+"
+```
+Expected output:
+```
+fixed           MSE=0.24653
+local_rule      MSE=0.01234
+evolutionary    MSE=0.35421
+```
+## Papers & Reference
+- **Original Patent**: Thaler, S. L. (1998). *Artificial neurogenesis network*. US Patent 5,852,815.
+- **Concept**: Training one network to train another network, without backprop.
+- **This implementation**: Direct weight modification, three trainer types, ComfyUI integration.
+## License
+MIT
+## Contributing
+Bug reports, feature requests, and pull requests welcome. Start with an issue describing what you want to do.
+## Contact
+nitroxido
+https://github.com/nitroxido
+https://x.com/CompotaMission

STANNO_IS_NOT.md ADDED Viewed

	@@ -0,0 +1,155 @@

+# STANNO: What It Is, What It Isn't
+STANNO trains networks using direct weight modification, not backpropagation. It's specialized for specific tasks where this is useful (anomaly detection, online learning, interpretability). It's not a replacement for PyTorch or TensorFlow.
+---
+## STANNO Works Well For
+### 1. Anomaly Detection & Filtering
+Train on normal data, then score new inputs by reconstruction error. Works reliably in production.
+```python
+from stanno.integration.filter import STANNOFilter
+stanno.fit(normal_embeddings, normal_embeddings, epochs=50)
+filter = STANNOFilter(stanno)
+score = filter.score(new_embedding)  # returns [0, 1]: 0=normal, 1=anomaly
+```
+### 2. Online / Continual Learning
+Update weights one sample at a time with no batch accumulation. Fast and interpretable.
+```python
+from stanno.integration.continual import ContinualSTANNO
+cont = ContinualSTANNO(stanno)
+for x_i, y_i in stream:
+    loss = cont.observe(x_i, y_i)  # single-sample update
+```
+### 3. Interpretable Weight Modification
+See exactly what the trainer does at each synapse — the weight deltas are explicit, not hidden inside autodiff.
+```python
+dW, db = trainer.compute_updates(state)  # explicit weight changes
+print(dW)  # actual numbers, not gradients
+```
+### 4. Multi-Stage Cascades
+Chain multiple STANNOs into encoder-decoder pipelines or progressive compression networks, then train end-to-end with gradient flow across stage boundaries.
+```python
+from stanno import CascadeSTANNO
+enc = STANNO(STANNOConfig(layers=[768, 256, 64]))
+dec = STANNO(STANNOConfig(layers=[64, 256, 768]))
+ae = CascadeSTANNO([enc, dec])
+ae.fit(embeddings, embeddings, epochs=200)  # trains both end-to-end
+```
+---
+## STANNO Does NOT Work Well For
+### Regression (General Function Fitting)
+STANNO is not optimized for regression. If you train on sin(x), you'll get MAE ≈ 0.4–0.5. A standard neural network with Adam easily reaches MAE < 0.01.
+**Why?** The fixed 4-module trainer applies the same update formula at every step. This works well for the tasks above, but not for learning arbitrary functions.
+**Better choice:** Use PyTorch, TensorFlow, or scikit-learn.
+### Replacement for PyTorch/TensorFlow
+STANNO intentionally avoids autodiff. If you need GPU acceleration, backpropagation, or access to a model zoo, use a standard framework.
+```python
+# Bad idea
+stanno = STANNO(...)  # slow NumPy, no GPU
+# Good idea
+torch.nn.Sequential(...)  # fast, GPU, backprop, pretrained weights
+```
+### Standalone Image Generation
+Alone, STANNO is just a small neural network. For image workflows, use the ComfyUI nodes which integrate with Stable Diffusion and provide the full pipeline.
+```python
+# Incomplete
+stanno = STANNO(STANNOConfig(layers=[768, 512, 768]))  # just a network
+# Complete (in ComfyUI)
+# STANNOLoad → STANNODreamCond → KSampler → STANNOScoreImages
+```
+---
+## Training Divergence (Why It Happens, How We Guard Against It)
+Direct weight modification can diverge if training runs too long without safeguards. The weights keep changing, accumulate errors, and blow up.
+**How we prevent it:**
+- Divergence detection: Stop if loss > 100
+- Early stopping: Stop if no improvement for N epochs (default: patience=20)
+- Default epochs: 300 (enough to converge without risking divergence)
+If training stops with a divergence warning, reduce epochs or batch size.
+---
+## Realistic Performance Expectations
+| Task | Realistic Performance | Notes |
+|------|-----------------------|-------|
+| Anomaly detection | > 90% accuracy | ✓ Achievable, used in production |
+| Online learning | < 100 steps to converge | ✓ Fast adaptation |
+| Cascades (end-to-end) | Stable training, gradient flow | ✓ Works well |
+| Sin regression (MAE) | ≈ 0.4–0.5 | ✗ Not the right tool — use PyTorch |
+| Image reconstruction | Depends on model size | ✓ Fine-tuning with ComfyUI nodes |
+| General regression | Baseline only | ✗ Not optimized |
+---
+## When to Use STANNO (Decision Tree)
+**Do you want to:**
+- Detect anomalies in a stream? → Use STANNO + filter ✓
+- Learn from one sample at a time? → Use ContinualSTANNO ✓
+- Train an encoder-decoder pipeline? → Use CascadeSTANNO ✓
+- Fit sin(x) accurately? → Use PyTorch ✗
+- Fine-tune a large pretrained model? → Use PyTorch ✗
+- Generate images from scratch? → Use Stable Diffusion directly ✗
+- Compose STANNO with image generation? → Use ComfyUI nodes ✓
+---
+## FAQ
+**Q: Why doesn't STANNO fit sin(x) well?**
+A: It's not designed for regression. The fixed 4-module trainer works great for anomaly detection and online learning, but arbitrary function fitting needs backpropagation or evolution. Use PyTorch for that.
+**Q: Will longer training improve accuracy?**
+A: No. Longer training will diverge. Training has built-in early stopping (patience parameter), so it stops when it's done learning. If you increase epochs, you risk overfitting and divergence.
+**Q: Which trainer should I use: Fixed, LocalRule, or Evolutionary?**
+A: Start with **Fixed** — it's stable and interpretable. **LocalRule** learns per-synapse rules, which can be powerful but also unstable. **Evolutionary** uses evolutionary strategies and is slower but novel. Experiment for your problem.
+**Q: Is STANNO production-ready?**
+A: For anomaly detection and online learning: **yes**. For regression or general purpose training: **no**. For ComfyUI image workflows: **yes, use the nodes**.
+---
+## Bottom Line
+STANNO is specialized for anomaly detection, online learning, cascading, and ComfyUI workflows. It's not a general-purpose neural network and not a replacement for PyTorch or TensorFlow. Use it where the strengths match your problem.

examples/anomaly_filter.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "stanno": {
+    "layers": [128, 64, 64, 128],
+    "trainer_type": "fixed",
+    "learning_rate": 0.001,
+    "feedback_projection": "linear"
+  },
+  "data": {
+    "path": "examples/normal_embeddings.npy",
+    "format": "numpy",
+    "split_ratio": 0.8,
+    "normalize": true
+  },
+  "fit": {
+    "epochs": 200,
+    "batch_size": 32,
+    "log_every": 20,
+    "patience": 15
+  },
+  "save_path": "examples/anomaly_filter.stanno.pkl",
+  "filter": {
+    "anomaly_threshold": 0.65,
+    "block_above_threshold": true,
+    "metadata_field": "stanno_filter"
+  },
+  "llm": {
+    "base_url": "http://localhost:11434",
+    "model": "llama3.2:3b",
+    "temperature": 0.7,
+    "max_tokens": 512,
+    "timeout_seconds": 60
+  }
+}

examples/cascade_autoencoder.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "_comment": "CascadeSTANNO autoencoder: encoder [16→8→4] + decoder [4→8→16]",
+  "stages": [
+    {
+      "layers": [16, 8, 4],
+      "trainer_type": "fixed",
+      "learning_rate": 0.05
+    },
+    {
+      "layers": [4, 8, 16],
+      "trainer_type": "fixed",
+      "learning_rate": 0.05
+    }
+  ],
+  "frozen": [false, false],
+  "mode": "endtoend",
+  "data": {
+    "format": "builtin:sin",
+    "n_samples": 256
+  },
+  "fit": {
+    "epochs": 300,
+    "batch_size": 32,
+    "patience": 30,
+    "log_every": 50
+  },
+  "save_path": "examples/cascade_autoencoder.cascade.pkl"
+}

examples/sin_regression.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_comment": "Sin regression example. STANNO approximates sin(2πx) on x∈[0,1]. FixedTrainerNet is equivalent to vanilla SGD — lr=0.05 converges to MAE~0.15. For MAE<0.01, use Adam (not available in FixedTrainer) or LocalRuleTrainerNet after meta-training.",
+  "stanno": {
+    "layers": [1, 32, 1],
+    "trainer_type": "fixed",
+    "learning_rate": 0.05,
+    "feedback_projection": "repeat"
+  },
+  "data": {
+    "format": "builtin:sin",
+    "n_samples": 512,
+    "split_ratio": 0.8,
+    "normalize": false
+  },
+  "fit": {
+    "epochs": 300,
+    "batch_size": 64,
+    "log_every": 30,
+    "patience": 20
+  },
+  "save_path": "examples/sin_regression.stanno.pkl",
+  "dream": [
+    {
+      "num_steps": 32,
+      "noise_sigma": 0.0,
+      "blind_inputs": false
+    },
+    {
+      "num_steps": 32,
+      "noise_sigma": 0.1,
+      "blind_inputs": false
+    },
+    {
+      "num_steps": 32,
+      "noise_sigma": 0.3,
+      "blind_inputs": true
+    }
+  ]
+}

examples/sin_regression.stanno.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ad6551ae1d978018f1ad04b08130f42ae39fe159b76a52295fe1b2a2b032c68
+size 2466

pyproject.toml ADDED Viewed

	@@ -0,0 +1,31 @@

+[build-system]
+requires = ["setuptools>=45", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "stanno"
+version = "0.1.0"
+description = "Self-Training Artificial Neural Network Object — biologically-inspired direct-weight-modification neural architecture"
+readme = "README.md"
+requires-python = ">=3.9"
+license = {text = "MIT"}
+keywords = ["neural-network", "stanno", "thaler", "meta-learning"]
+dependencies = [
+    "numpy>=1.24",
+]
+[project.optional-dependencies]
+data = ["pandas>=2.0"]
+llm  = ["httpx>=0.27"]
+torch = ["torch>=2.0"]
+all  = ["stanno[data,llm,torch]"]
+[project.scripts]
+stanno = "stanno.cli:main"
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["stanno*"]
+[tool.setuptools.package-data]
+stanno = ["py.typed"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+# Core (always required)
+numpy>=1.24
+# Data loading (optional but recommended)
+pandas>=2.0
+# LLM integration (optional)
+httpx>=0.27
+# PyTorch (optional — enables TorchBackend and Phase 2b meta-training)
+# torch>=2.0

scripts/generate_clip_embeddings.py ADDED Viewed

	@@ -0,0 +1,92 @@

+"""
+Generate CLIP image embeddings for a folder of reference images.
+These embeddings are then used to train a STANNO as a style autoencoder,
+which can be loaded into the ComfyUI STANNODreamCond or STANNODynamicLoRA
+nodes for conditioning/weight-patch injection.
+Usage:
+    python scripts/generate_clip_embeddings.py \
+        --dir  my_style_images/ \
+        --out  style_embeddings.npy \
+        [--model ViT-L-14]  [--pretrained openai]
+Requirements:
+    pip install open-clip-torch Pillow
+Outputs:
+    A .npy file of shape (N, 768) — one 768-dim CLIP embedding per image.
+    Compatible with SD 1.5 CLIP-L text encoder embedding space.
+"""
+from __future__ import annotations
+import argparse
+import sys
+from pathlib import Path
+import numpy as np
+def parse_args() -> argparse.Namespace:
+    p = argparse.ArgumentParser(description="Generate CLIP embeddings for a folder of images")
+    p.add_argument("--dir", required=True, help="Folder of input images (png, jpg, webp)")
+    p.add_argument("--out", required=True, help="Output .npy path")
+    p.add_argument("--model", default="ViT-L-14", help="OpenCLIP model name")
+    p.add_argument("--pretrained", default="openai", help="OpenCLIP pretrained weights")
+    p.add_argument("--batch", type=int, default=16, help="Batch size for encoding")
+    p.add_argument("--device", default="cuda", help="Device: cuda or cpu")
+    return p.parse_args()
+def main() -> None:
+    args = parse_args()
+    try:
+        import torch
+        import open_clip
+        from PIL import Image
+    except ImportError as e:
+        print(f"Missing dependency: {e}")
+        print("Install with: pip install open-clip-torch Pillow")
+        sys.exit(1)
+    image_paths = sorted(
+        p for ext in ("*.png", "*.jpg", "*.jpeg", "*.webp")
+        for p in Path(args.dir).glob(ext)
+    )
+    if not image_paths:
+        print(f"No images found in {args.dir}")
+        sys.exit(1)
+    print(f"Found {len(image_paths)} images in {args.dir}")
+    model, _, preprocess = open_clip.create_model_and_transforms(
+        args.model, pretrained=args.pretrained
+    )
+    model.eval().to(args.device)
+    all_embeddings: list[np.ndarray] = []
+    for i in range(0, len(image_paths), args.batch):
+        batch_paths = image_paths[i : i + args.batch]
+        imgs = torch.stack(
+            [preprocess(Image.open(str(p)).convert("RGB")) for p in batch_paths]
+        ).to(args.device)
+        with torch.no_grad():
+            feats = model.encode_image(imgs)
+        all_embeddings.append(feats.cpu().numpy())
+        print(f"  Encoded {min(i + args.batch, len(image_paths))}/{len(image_paths)}")
+    embeddings = np.concatenate(all_embeddings, axis=0).astype(np.float32)
+    out_path = Path(args.out)
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    np.save(str(out_path), embeddings)
+    print(f"\nSaved {embeddings.shape} embeddings → {out_path}")
+    print(f"Use this file with train_stanno_on_embeddings.py or STANNOTrainImages (ComfyUI).")
+if __name__ == "__main__":
+    main()

scripts/train_stanno_on_embeddings.py ADDED Viewed

	@@ -0,0 +1,112 @@

+"""
+Train a STANNO autoencoder on pre-computed CLIP embeddings.
+Run generate_clip_embeddings.py first to produce the .npy file, then run
+this script to train and save the STANNO.  The resulting .pkl file can be
+loaded into ComfyUI via the STANNOLoad node.
+Usage:
+    python scripts/train_stanno_on_embeddings.py \
+        --embeddings style_embeddings.npy \
+        --out         stanno_clip_style.pkl \
+        [--hidden     256]   \
+        [--epochs     300]   \
+        [--lr         0.005] \
+        [--trainer    fixed]
+The input/output dimension is inferred automatically from the embedding file
+(typically 768 for SD 1.5 / ViT-L-14 CLIP).
+"""
+from __future__ import annotations
+import argparse
+import pickle
+import sys
+from pathlib import Path
+import numpy as np
+def parse_args() -> argparse.Namespace:
+    p = argparse.ArgumentParser(description="Train a STANNO autoencoder on CLIP embeddings")
+    p.add_argument("--embeddings", required=True, help="Path to .npy file of shape (N, dim)")
+    p.add_argument("--out", required=True, help="Output .pkl path for the trained STANNO")
+    p.add_argument("--hidden", type=int, default=256,
+                   help="Hidden layer width (default 256 → [dim, 256, dim])")
+    p.add_argument("--extra-hidden", type=int, default=0,
+                   help="Add a second hidden layer of this width (0 = disabled)")
+    p.add_argument("--epochs", type=int, default=300, help="Training epochs (default 300)")
+    p.add_argument("--batch-size", type=int, default=32)
+    p.add_argument("--lr", type=float, default=0.005, help="Learning rate")
+    p.add_argument("--trainer", default="fixed",
+                   choices=["fixed", "local_rule", "evolutionary"])
+    return p.parse_args()
+def main() -> None:
+    args = parse_args()
+    embeddings_path = Path(args.embeddings)
+    if not embeddings_path.is_file():
+        print(f"File not found: {embeddings_path}")
+        sys.exit(1)
+    embeddings = np.load(str(embeddings_path)).astype(np.float32)
+    n, dim = embeddings.shape
+    print(f"Loaded {n} embeddings of dim={dim} from {embeddings_path}")
+    # Build layers list
+    layers = [dim, args.hidden]
+    if args.extra_hidden > 0:
+        layers.append(args.extra_hidden)
+    layers.append(dim)
+    print(f"Architecture: {layers}")
+    # Import STANNO (add repo root to path if needed)
+    repo_root = str(Path(__file__).parent.parent)
+    if repo_root not in sys.path:
+        sys.path.insert(0, repo_root)
+    from stanno.config.schema import STANNOConfig
+    from stanno.core.stanno import STANNO
+    config = STANNOConfig(
+        layers=layers,
+        trainer_type=args.trainer,
+        learning_rate=args.lr,
+    )
+    stanno = STANNO(config)
+    report_every = max(1, args.epochs // 10)
+    def log_cb(epoch: int, loss: float) -> None:
+        if (epoch + 1) % report_every == 0:
+            print(f"  epoch {epoch + 1:5d} / {args.epochs}  loss={loss:.5f}")
+    print(f"\nTraining STANNO ({args.trainer}) for {args.epochs} epochs …")
+    stanno.fit(
+        embeddings,
+        embeddings,
+        epochs=args.epochs,
+        batch_size=args.batch_size,
+        callback=log_cb,
+    )
+    out_path = Path(args.out)
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(str(out_path), "wb") as f:
+        pickle.dump(stanno, f)
+    # Quick sanity check
+    preds = stanno.predict(embeddings[:8])
+    mse = float(np.mean((preds - embeddings[:8]) ** 2))
+    print(f"\nFinal MSE on first 8 samples: {mse:.5f}")
+    print(f"Saved trained STANNO → {out_path}")
+    print("\nNext steps:")
+    print("  1. Load in ComfyUI: STANNO Loader node → model_path =", out_path)
+    print("  2. For Dream Conditioning: connect to 'STANNO Dream Conditioning' node")
+    print("  3. For Dynamic LoRA:       connect to 'STANNO Dynamic LoRA' node")
+if __name__ == "__main__":
+    main()

stanno.py ADDED Viewed

	@@ -0,0 +1,314 @@

+"""STANNO-style proof of concept
+This module implements a very simple Self-Training Artificial Neural Network Object (STANNO)
+loosely inspired by Thaler's description: two neural networks, one of which trains the other,
+optionally folded into a single object.[cite:1][cite:3]
+Design choices:
+- TraineeNet: a small multilayer perceptron (MLP) that learns a supervised mapping.
+- Trainer: training logic embedded inside STANNO using standard gradient descent.
+  Conceptually this plays the role of the "trainer" network described in the literature,
+  but here it is implemented as explicit code for simplicity.
+Features included for experimentation:
+- Supervised training on a toy dataset (e.g., y = sin(x)).
+- "Dreaming": run the trained net on a fixed or random latent input with inputs partially
+  or totally "blinded" (set to zero or constant) to observe internal dynamics.
+- Noise injection: add Gaussian noise with adjustable standard deviation to all weights,
+  to explore how output complexity changes with noise level (from "stupidity" to chaos).
+- Lesioning: randomly zero out a fraction of weights to mimic progressive "death" of
+  connections and observe degradation ("tunnel vision").[cite:2]
+The goal is not to reproduce the original spreadsheet implementation, but to give a
+simple, hackable playground in modern Python/NumPy that you can extend (including
+replacing the hard-coded trainer by a learned meta-network if desired).
+"""
+from __future__ import annotations
+import numpy as np
+from dataclasses import dataclass
+from typing import Tuple, Callable
+@dataclass
+class TraineeNet:
+    """Simple 2-layer MLP (input -> hidden -> output).
+    This is the network that will be trained by the STANNO object.
+    """
+    input_dim: int
+    hidden_dim: int
+    output_dim: int
+    def __post_init__(self) -> None:
+        rng = np.random.default_rng()
+        # Xavier-like initialization
+        self.W1 = rng.normal(0.0, 1.0 / np.sqrt(self.input_dim), (self.input_dim, self.hidden_dim))
+        self.b1 = np.zeros((1, self.hidden_dim))
+        self.W2 = rng.normal(0.0, 1.0 / np.sqrt(self.hidden_dim), (self.hidden_dim, self.output_dim))
+        self.b2 = np.zeros((1, self.output_dim))
+    def parameters(self):
+        return [self.W1, self.b1, self.W2, self.b2]
+    def forward(self, x: np.ndarray) -> Tuple[np.ndarray, dict]:
+        """Forward pass returning output and cache for backprop."""
+        z1 = x @ self.W1 + self.b1
+        a1 = np.tanh(z1)
+        z2 = a1 @ self.W2 + self.b2
+        y = z2  # regression; for classification you could add softmax
+        cache = {"x": x, "z1": z1, "a1": a1, "z2": z2}
+        return y, cache
+    def apply_parameter_noise(self, sigma: float, rng: np.random.Generator | None = None) -> None:
+        """Add Gaussian noise with std sigma to all parameters in-place."""
+        if sigma <= 0:
+            return
+        if rng is None:
+            rng = np.random.default_rng()
+        for p in self.parameters():
+            p += rng.normal(0.0, sigma, p.shape)
+    def lesion(self, fraction: float, rng: np.random.Generator | None = None) -> None:
+        """Randomly zero out a fraction of weights (simulated neuron/connection death).
+        fraction in [0, 1]. Only affects W1 and W2; biases remain.
+        """
+        fraction = float(np.clip(fraction, 0.0, 1.0))
+        if fraction <= 0:
+            return
+        if rng is None:
+            rng = np.random.default_rng()
+        for W in (self.W1, self.W2):
+            mask = rng.random(W.shape) < fraction
+            W[mask] = 0.0
+class STANNO:
+    """Self-Training Neural Network Object (STANNO-style).
+    Encapsula:
+    - Una red entrenable (TraineeNet).
+    - Un algoritmo de entrenamiento interno (gradient descent) que actúa como
+      "trainer" y actualiza los pesos a partir de ejemplos.
+    Esto sigue el espíritu de los STANNO descritos por Thaler: un objeto que
+    contiene la red y su mecanismo de entrenamiento, con capacidad de seguir
+    aprendiendo en línea.[cite:1][cite:3]
+    """
+    def __init__(
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        output_dim: int,
+        learning_rate: float = 1e-2,
+    ) -> None:
+        self.net = TraineeNet(input_dim, hidden_dim, output_dim)
+        self.learning_rate = learning_rate
+    # ---------------------- Core training logic ----------------------
+    def _loss_and_grads(self, x: np.ndarray, y_true: np.ndarray) -> Tuple[float, list]:
+        """Compute MSE loss and gradients via backprop for one batch."""
+        y_pred, cache = self.net.forward(x)
+        # Mean squared error
+        diff = y_pred - y_true
+        loss = float(np.mean(diff ** 2))
+        # Backprop
+        batch_size = x.shape[0]
+        dL_dy = (2.0 / batch_size) * diff  # dL/dy
+        # Layer 2
+        a1 = cache["a1"]
+        dL_dW2 = a1.T @ dL_dy
+        dL_db2 = np.sum(dL_dy, axis=0, keepdims=True)
+        # Through tanh
+        dz2 = dL_dy @ self.net.W2.T
+        da1 = dz2
+        dz1 = da1 * (1.0 - np.tanh(cache["z1"]) ** 2)
+        # Layer 1
+        x_batch = cache["x"]
+        dL_dW1 = x_batch.T @ dz1
+        dL_db1 = np.sum(dz1, axis=0, keepdims=True)
+        grads = [dL_dW1, dL_db1, dL_dW2, dL_db2]
+        return loss, grads
+    def trainer_step(self, x: np.ndarray, y_true: np.ndarray) -> float:
+        """One training step of the internal trainer over a mini-batch.
+        Conceptualmente, esto es el "trainer network" que ajusta pesos del
+        TraineeNet. Aquí se implementa como gradiente descendente directo.
+        """
+        loss, grads = self._loss_and_grads(x, y_true)
+        for param, grad in zip(self.net.parameters(), grads):
+            param -= self.learning_rate * grad
+        return loss
+    def fit(
+        self,
+        x: np.ndarray,
+        y: np.ndarray,
+        epochs: int = 1000,
+        batch_size: int = 32,
+        shuffle: bool = True,
+        callback: Callable[[int, float], None] | None = None,
+    ) -> None:
+        """Train on a dataset using internal trainer.
+        Args:
+            x: shape (N, input_dim)
+            y: shape (N, output_dim)
+            epochs: number of passes over the dataset
+            batch_size: mini-batch size
+            shuffle: whether to shuffle each epoch
+            callback: optional function(epoch, loss) for logging
+        """
+        N = x.shape[0]
+        rng = np.random.default_rng()
+        for epoch in range(epochs):
+            idx = np.arange(N)
+            if shuffle:
+                rng.shuffle(idx)
+            x_shuf = x[idx]
+            y_shuf = y[idx]
+            losses = []
+            for start in range(0, N, batch_size):
+                end = start + batch_size
+                xb = x_shuf[start:end]
+                yb = y_shuf[start:end]
+                loss = self.trainer_step(xb, yb)
+                losses.append(loss)
+            mean_loss = float(np.mean(losses))
+            if callback is not None:
+                callback(epoch, mean_loss)
+    # ---------------------- Inference & "dreaming" ----------------------
+    def predict(self, x: np.ndarray) -> np.ndarray:
+        y, _ = self.net.forward(x)
+        return y
+    def dream(
+        self,
+        num_steps: int = 128,
+        input_seed: np.ndarray | None = None,
+        noise_sigma: float = 0.0,
+        blind_inputs: bool = False,
+        rng: np.random.Generator | None = None,
+    ) -> np.ndarray:
+        """Generate a sequence of outputs by driving the net with a simple or blind input.
+        Args:
+            num_steps: length of the sequence to generate.
+            input_seed: initial input vector; if None, uses zeros.
+            noise_sigma: amount of noise to add to weights *once* before dreaming.
+            blind_inputs: if True, inputs are forced to zero every step.
+            rng: optional RNG.
+        Returns:
+            Array of generated outputs of shape (num_steps, output_dim).
+        """
+        if rng is None:
+            rng = np.random.default_rng()
+        # Work on a copy so as not to permanently corrupt the trained net
+        shadow = TraineeNet(self.net.input_dim, self.net.hidden_dim, self.net.output_dim)
+        shadow.W1 = self.net.W1.copy()
+        shadow.b1 = self.net.b1.copy()
+        shadow.W2 = self.net.W2.copy()
+        shadow.b2 = self.net.b2.copy()
+        shadow.apply_parameter_noise(noise_sigma, rng=rng)
+        if input_seed is None:
+            x = np.zeros((1, self.net.input_dim))
+        else:
+            x = input_seed.reshape(1, -1)
+        outputs = []
+        for _ in range(num_steps):
+            if blind_inputs:
+                x_step = np.zeros_like(x)
+            else:
+                x_step = x
+            y, _ = shadow.forward(x_step)
+            outputs.append(y.copy())
+            # Simple feedback: feed output (or part of él) as next input
+            # This makes the sequence sensitive to internal weights.
+            if self.net.output_dim == self.net.input_dim:
+                x = y
+            else:
+                # Project or tile to match input dim
+                x = np.repeat(y, self.net.input_dim // self.net.output_dim + 1, axis=1)[
+                    :, : self.net.input_dim
+                ]
+        return np.concatenate(outputs, axis=0)
+# ---------------------- Demo utilities ----------------------
+def make_sin_dataset(n_samples: int = 256) -> Tuple[np.ndarray, np.ndarray]:
+    """Simple 1D regression dataset: y = sin(x) on [0, 2π]."""
+    rng = np.random.default_rng()
+    x = rng.uniform(0.0, 2.0 * np.pi, size=(n_samples, 1))
+    y = np.sin(x)
+    return x, y
+def demo_train_and_dream() -> None:
+    """Train a STANNO on sin(x) and then explore noise/lesion effects.
+    Run this function directly ("python stanno_poc.py") to see numeric output.
+    """
+    x, y = make_sin_dataset(512)
+    stanno = STANNO(input_dim=1, hidden_dim=32, output_dim=1, learning_rate=5e-3)
+    print("Training STANNO on y = sin(x)...")
+    stanno.fit(
+        x,
+        y,
+        epochs=500,
+        batch_size=64,
+        callback=lambda e, l: print(f"Epoch {e:4d}  loss={l:.5f}") if (e + 1) % 100 == 0 else None,
+    )
+    # Evaluate basic fit
+    xs = np.linspace(0, 2 * np.pi, 16).reshape(-1, 1)
+    preds = stanno.predict(xs)
+    print("
+Sample predictions after training:")
+    for xi, yi, yi_hat in zip(xs.flatten(), np.sin(xs).flatten(), preds.flatten()):
+        print(f"x={xi:5.2f}  sin(x)={yi: .3f}  pred={yi_hat: .3f}")
+    # Dreaming with different noise levels
+    for sigma in [0.0, 0.05, 0.2, 0.5]:
+        seq = stanno.dream(num_steps=32, noise_sigma=sigma, blind_inputs=True)
+        print(f"
+Dreaming with noise_sigma={sigma} (first 10 outputs):")
+        print(np.round(seq[:10].flatten(), 3))
+    # Lesion experiment
+    print("
+Lesioning 70% of weights and evaluating error on test points...")
+    # Backup parameters
+    backup = [p.copy() for p in stanno.net.parameters()]
+    stanno.net.lesion(fraction=0.7)
+    preds_lesioned = stanno.predict(xs)
+    mse_lesioned = float(np.mean((preds_lesioned - np.sin(xs)) ** 2))
+    print(f"MSE after lesioning 70% of weights: {mse_lesioned:.4f}")
+    # Restore
+    for param, b in zip(stanno.net.parameters(), backup):
+        param[...] = b
+if __name__ == "__main__":
+    demo_train_and_dream()

stanno/__init__.py ADDED Viewed

	@@ -0,0 +1,24 @@

+"""
+STANNO — Self-Training Artificial Neural Network Objects.
+Inspired by Stephen Thaler's STANNO architecture (US patent 5852815A).
+A neural object in which one network (TrainerNet) directly updates the
+weights of another (TraineeNet), with support for dream mode, noise
+injection, and progressive lesioning.
+"""
+from stanno.core.stanno import STANNO
+from stanno.config.schema import STANNOConfig, DataConfig, LLMConfig, FilterConfig
+from stanno.integration.cascade import CascadeSTANNO
+from stanno.integration.dsanno import DSANNO, ScanResult
+__all__ = [
+    "STANNO",
+    "STANNOConfig",
+    "DataConfig",
+    "LLMConfig",
+    "FilterConfig",
+    "CascadeSTANNO",
+    "DSANNO",
+    "ScanResult",
+]

stanno/__main__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from stanno.cli import main
2	+ main()

stanno/cli.py ADDED Viewed

	@@ -0,0 +1,398 @@

+"""
+STANNO command-line interface.
+Usage:
+  stanno train    --config examples/sin_regression.json
+  stanno predict  --config examples/sin_regression.json --input 0.25
+  stanno dream    --config examples/sin_regression.json
+  stanno evaluate --config examples/sin_regression.json
+  stanno filter   --config examples/anomaly_filter.json [--file prompts.txt]
+  stanno cascade  --config examples/cascade_autoencoder.json
+  stanno scan     --config examples/scan_demo.json --data data.npy [--threshold 0.05] [--top-k 10]
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+import numpy as np
+# ─── helpers ─────────────────────────────────────────────────────────────────
+def _load_config(path: str) -> Dict[str, Any]:
+    with open(path, "r", encoding="utf-8") as f:
+        return json.load(f)
+def _build_stanno(cfg_dict: Dict[str, Any]):
+    """Instantiate a STANNO from the 'stanno' section of a config dict."""
+    from stanno.config.schema import STANNOConfig
+    from stanno.core.stanno import STANNO
+    s = cfg_dict.get("stanno", {})
+    config = STANNOConfig(
+        layers=s.get("layers", [1, 32, 1]),
+        trainer_type=s.get("trainer_type", "fixed"),
+        backend=s.get("backend", "numpy"),
+        learning_rate=s.get("learning_rate", 0.01),
+        feedback_projection=s.get("feedback_projection", "repeat"),
+        trainer_kwargs=s.get("trainer_kwargs", {}),
+    )
+    return STANNO(config)
+def _load_data(cfg_dict: Dict[str, Any]):
+    """Load data using the 'data' section of a config dict."""
+    from stanno.config.schema import DataConfig
+    from stanno.data.base import make_loader
+    d = cfg_dict.get("data", {})
+    data_config = DataConfig(
+        path=d.get("path", ""),
+        format=d.get("format", "builtin:sin"),
+        input_cols=d.get("input_cols"),
+        output_cols=d.get("output_cols"),
+        split_ratio=d.get("split_ratio", 0.8),
+        normalize=d.get("normalize", False),
+        n_samples=d.get("n_samples"),
+    )
+    return make_loader(data_config).load()
+# ─── sub-commands ─────────────────────────────────────────────────────────────
+def cmd_train(args: argparse.Namespace) -> None:
+    cfg = _load_config(args.config)
+    stanno = _build_stanno(cfg)
+    x, y = _load_data(cfg)
+    fit_cfg = cfg.get("fit", {})
+    epochs     = fit_cfg.get("epochs", 1000)
+    batch_size = fit_cfg.get("batch_size", 32)
+    log_every  = fit_cfg.get("log_every", max(1, epochs // 10))
+    patience   = fit_cfg.get("patience", 20)  # early stopping: stop if no improvement for N epochs
+    def callback(epoch: int, loss: float) -> None:
+        if (epoch + 1) % log_every == 0 or epoch == 0:
+            print(f"  epoch {epoch+1:5d}/{epochs}  loss={loss:.6f}")
+    print(f"Training {stanno} on {len(x)} samples …")
+    stanno.fit(x, y, epochs=epochs, batch_size=batch_size, callback=callback, patience=patience)
+    save_path = cfg.get("save_path") or args.config.replace(".json", ".stanno.pkl")
+    stanno.save(save_path)
+    print(f"Saved → {save_path}")
+def cmd_predict(args: argparse.Namespace) -> None:
+    cfg = _load_config(args.config)
+    load_path = cfg.get("save_path") or args.config.replace(".json", ".stanno.pkl")
+    from stanno.core.stanno import STANNO
+    stanno = STANNO.load(load_path)
+    x = np.array([[float(v) for v in args.input.split(",")]], dtype=np.float32)
+    y = stanno.predict(x)
+    print(f"input={x.ravel().tolist()}  →  output={y.ravel().tolist()}")
+def cmd_dream(args: argparse.Namespace) -> None:
+    cfg = _load_config(args.config)
+    load_path = cfg.get("save_path") or args.config.replace(".json", ".stanno.pkl")
+    from stanno.core.stanno import STANNO
+    stanno = STANNO.load(load_path)
+    dream_cfgs = cfg.get("dream")
+    if dream_cfgs is None:
+        dream_cfgs = [{}]
+    if isinstance(dream_cfgs, dict):
+        dream_cfgs = [dream_cfgs]
+    for i, dcfg in enumerate(dream_cfgs):
+        seq = stanno.dream(
+            num_steps=dcfg.get("num_steps", 64),
+            noise_sigma=dcfg.get("noise_sigma", 0.1),
+            blind_inputs=dcfg.get("blind_inputs", False),
+        )
+        print(f"\n── dream {i} (noise={dcfg.get('noise_sigma', 0.1)}) ──")
+        for step, row in enumerate(seq):
+            print(f"  step {step:3d}: {row.tolist()}")
+def cmd_evaluate(args: argparse.Namespace) -> None:
+    cfg = _load_config(args.config)
+    load_path = cfg.get("save_path") or args.config.replace(".json", ".stanno.pkl")
+    from stanno.core.stanno import STANNO
+    stanno = STANNO.load(load_path)
+    x, y = _load_data(cfg)
+    y_pred = stanno.predict(x)
+    mse  = float(np.mean((y_pred - y) ** 2))
+    mae  = float(np.mean(np.abs(y_pred - y)))
+    print(f"MSE={mse:.6f}  MAE={mae:.6f}  N={len(x)}")
+def cmd_filter(args: argparse.Namespace) -> None:
+    cfg = _load_config(args.config)
+    load_path = cfg.get("save_path") or args.config.replace(".json", ".stanno.pkl")
+    from stanno.core.stanno import STANNO
+    from stanno.config.schema import FilterConfig, LLMConfig
+    from stanno.integration.filter import STANNOFilter, FilteredRequestError
+    from stanno.integration.llm_client import LLMClient
+    stanno = STANNO.load(load_path)
+    fcfg = cfg.get("filter", {})
+    filter_config = FilterConfig(
+        anomaly_threshold=fcfg.get("anomaly_threshold", 0.7),
+        block_above_threshold=fcfg.get("block_above_threshold", True),
+        metadata_field=fcfg.get("metadata_field", "stanno_filter"),
+    )
+    lcfg = cfg.get("llm", {})
+    llm_config = LLMConfig(
+        base_url=lcfg.get("base_url", "http://localhost:11434"),
+        model=lcfg.get("model", "llama3.2:3b"),
+        api_key=lcfg.get("api_key"),
+        temperature=lcfg.get("temperature", 0.7),
+        max_tokens=lcfg.get("max_tokens", 512),
+        timeout_seconds=lcfg.get("timeout_seconds", 60),
+    )
+    llm = LLMClient(llm_config)
+    filt = STANNOFilter(stanno, filter_config, llm)
+    lines: List[str] = []
+    if args.file:
+        lines = Path(args.file).read_text(encoding="utf-8").splitlines()
+    else:
+        print("Interactive filter REPL (Ctrl-D to quit):")
+        try:
+            while True:
+                line = input("> ")
+                lines.append(line)
+        except EOFError:
+            pass
+    for line in lines:
+        if not line.strip():
+            continue
+        # Encode prompt as char-code vector (demo encoding)
+        x = np.array([ord(c) / 127.0 for c in line[:stanno.net.input_dim]],
+                     dtype=np.float32)
+        if len(x) < stanno.net.input_dim:
+            x = np.pad(x, (0, stanno.net.input_dim - len(x)))
+        score, meta = filt.score(x.reshape(1, -1))
+        blocked = meta["blocked"]
+        status = "BLOCKED" if blocked else "PASSED"
+        print(f"[{status}  score={score:.3f}]  {line[:80]}")
+def cmd_cascade(args: argparse.Namespace) -> None:
+    """Train or evaluate a CascadeSTANNO from a config file.
+    Config format (JSON):
+      {
+        "stages": [
+          {"layers": [784, 128], "trainer_type": "fixed", "learning_rate": 0.01},
+          {"layers": [128, 784], "trainer_type": "fixed", "learning_rate": 0.01}
+        ],
+        "frozen": [false, false],        // optional, default all false
+        "data": { ... same as regular config ... },
+        "fit": {"epochs": 200, "batch_size": 64, "patience": 20, "log_every": 20},
+        "mode": "endtoend",              // "endtoend" | "staged" (default: endtoend)
+        "save_path": "examples/cascade.stanno.pkl"
+      }
+    """
+    from stanno.config.schema import STANNOConfig
+    from stanno.core.stanno import STANNO
+    from stanno.integration.cascade import CascadeSTANNO
+    cfg = _load_config(args.config)
+    stage_cfgs = cfg.get("stages", [])
+    if not stage_cfgs:
+        print("Error: config must have a non-empty 'stages' list.", file=sys.stderr)
+        sys.exit(1)
+    frozen = cfg.get("frozen", [False] * len(stage_cfgs))
+    stages = []
+    for sc in stage_cfgs:
+        scfg = STANNOConfig(
+            layers=sc.get("layers", [1, 32, 1]),
+            trainer_type=sc.get("trainer_type", "fixed"),
+            backend=sc.get("backend", "numpy"),
+            learning_rate=sc.get("learning_rate", 0.01),
+            feedback_projection=sc.get("feedback_projection", "repeat"),
+            trainer_kwargs=sc.get("trainer_kwargs", {}),
+        )
+        stages.append(STANNO(scfg))
+    cascade = CascadeSTANNO(stages, frozen=frozen)
+    print(cascade)
+    x, y = _load_data(cfg)
+    fit_cfg = cfg.get("fit", {})
+    epochs     = fit_cfg.get("epochs", 100)
+    batch_size = fit_cfg.get("batch_size", 32)
+    patience   = fit_cfg.get("patience", 20)
+    log_every  = fit_cfg.get("log_every", max(1, epochs // 10))
+    mode       = cfg.get("mode", "endtoend")
+    print(f"Training cascade ({mode} mode) on {len(x)} samples …")
+    if mode == "staged":
+        # For staged mode, targets must be provided per stage — use the same y
+        # for all stages (user should provide intermediate_targets in code).
+        histories = cascade.staged_fit(
+            x,
+            intermediate_targets=[y] * len(stages),
+            epochs=epochs,
+            batch_size=batch_size,
+            patience=patience,
+            log_every=log_every,
+        )
+        for k, h in enumerate(histories):
+            if h:
+                print(f"  Stage {k} final loss: {h[-1]:.6f}")
+    else:
+        history = cascade.fit(
+            x, y,
+            epochs=epochs,
+            batch_size=batch_size,
+            patience=patience,
+            log_every=log_every,
+        )
+        if history:
+            print(f"  Final loss: {history[-1]:.6f}")
+    save_path = cfg.get("save_path") or args.config.replace(".json", ".cascade.pkl")
+    cascade.save(save_path)
+    print(f"Saved → {save_path}")
+def cmd_scan(args: argparse.Namespace) -> None:
+    """Scan a dataset with a trained STANNO (DSANNO mode).
+    Requires a trained model at --model (or loads from config save_path).
+    Scans --data (npy/npz file) and prints top-k matches or threshold-filtered results.
+    """
+    from stanno.core.stanno import STANNO
+    from stanno.integration.dsanno import DSANNO
+    # Load STANNO
+    model_path = getattr(args, "model", None)
+    if model_path is None and args.config:
+        cfg = _load_config(args.config)
+        model_path = cfg.get("save_path")
+    if not model_path or not Path(model_path).exists():
+        print(f"Error: model not found at {model_path!r}. Train first.", file=sys.stderr)
+        sys.exit(1)
+    stanno = STANNO.load(model_path)
+    mode = getattr(args, "mode", "reconstruction")
+    dsanno = DSANNO(stanno, mode=mode)
+    # Load data
+    data_path = getattr(args, "data", None)
+    if not data_path or not Path(data_path).exists():
+        print(f"Error: --data file not found: {data_path!r}", file=sys.stderr)
+        sys.exit(1)
+    x = np.load(data_path)
+    if isinstance(x, np.lib.npyio.NpzFile):
+        x = x["x"]  # convention: 'x' key
+    x = np.asarray(x, dtype=np.float32)
+    print(f"Scanning {x.shape[0]} rows with {dsanno} …")
+    top_k = getattr(args, "top_k", None)
+    threshold = getattr(args, "threshold", None)
+    if top_k:
+        indices, scores, preds = dsanno.top_k(x, k=top_k)
+        print(f"\nTop {top_k} matches (lowest reconstruction error):")
+        for rank, (idx, sc) in enumerate(zip(indices, scores)):
+            print(f"  #{rank+1:3d}  row={idx:6d}  score={sc:.6f}  x={x[idx].tolist()}")
+    elif threshold is not None:
+        result = dsanno.scan(x, threshold=float(threshold))
+        print(result)
+        matched = result.matched_indices()
+        print(f"\n{len(matched)} rows matched (score ≤ {threshold}):")
+        for idx in matched[:50]:  # show at most 50
+            print(f"  row={idx:6d}  score={result.scores[idx]:.6f}")
+        if len(matched) > 50:
+            print(f"  … and {len(matched) - 50} more")
+    else:
+        # Just print summary statistics
+        result = dsanno.scan(x)
+        print(result)
+        p5, p50, p95 = np.percentile(result.scores, [5, 50, 95])
+        print(f"  score p5={p5:.4f}  p50={p50:.4f}  p95={p95:.4f}")
+        suggested = dsanno.calibrate_threshold(x)
+        print(f"  Suggested threshold (p95 of this data): {suggested:.4f}")
+# ─── entry point ──────────────────────────────────────────────────────────────
+def main(argv: Optional[List[str]] = None) -> None:
+    parser = argparse.ArgumentParser(
+        prog="stanno",
+        description="STANNO — Self-Training Artificial Neural Network Object CLI",
+    )
+    parser.add_argument("--version", action="version", version="stanno 0.1.0")
+    sub = parser.add_subparsers(dest="command", required=True)
+    # train
+    p_train = sub.add_parser("train", help="Train a STANNO from a config file")
+    p_train.add_argument("--config", required=True, help="Path to JSON config")
+    # predict
+    p_pred = sub.add_parser("predict", help="Run a single prediction")
+    p_pred.add_argument("--config", required=True, help="Path to JSON config")
+    p_pred.add_argument("--input", required=True, help="Comma-separated input values")
+    # dream
+    p_dream = sub.add_parser("dream", help="Run dream (generative) mode")
+    p_dream.add_argument("--config", required=True, help="Path to JSON config")
+    # evaluate
+    p_eval = sub.add_parser("evaluate", help="Evaluate on held-out data")
+    p_eval.add_argument("--config", required=True, help="Path to JSON config")
+    # filter
+    p_filt = sub.add_parser("filter", help="Run anomaly filter on prompts")
+    p_filt.add_argument("--config", required=True, help="Path to JSON config")
+    p_filt.add_argument("--file", default=None, help="File of prompts (one per line)")
+    # cascade
+    p_casc = sub.add_parser("cascade", help="Train a CascadeSTANNO")
+    p_casc.add_argument("--config", required=True, help="Path to cascade JSON config")
+    # scan
+    p_scan = sub.add_parser("scan", help="Scan a dataset with a trained STANNO (DSANNO)")
+    p_scan.add_argument("--config", default=None, help="Config with save_path")
+    p_scan.add_argument("--model", default=None, help="Explicit path to .stanno.pkl model")
+    p_scan.add_argument("--data", required=True, help="Path to .npy / .npz file to scan")
+    p_scan.add_argument("--threshold", type=float, default=None, help="Match score threshold")
+    p_scan.add_argument("--top-k", type=int, default=None, dest="top_k",
+                        help="Return k best-matching rows")
+    p_scan.add_argument("--mode", default="reconstruction",
+                        choices=["reconstruction", "prediction"],
+                        help="Scoring mode (default: reconstruction)")
+    args = parser.parse_args(argv)
+    dispatch = {
+        "train":    cmd_train,
+        "predict":  cmd_predict,
+        "dream":    cmd_dream,
+        "evaluate": cmd_evaluate,
+        "filter":   cmd_filter,
+        "cascade":  cmd_cascade,
+        "scan":     cmd_scan,
+    }
+    dispatch[args.command](args)
+if __name__ == "__main__":
+    main()

stanno/config/__init__.py ADDED Viewed

File without changes

stanno/config/schema.py ADDED Viewed

	@@ -0,0 +1,125 @@

+"""
+Configuration dataclasses for the STANNO system.
+All configuration is expressed as plain Python dataclasses so no extra
+dependencies are needed. JSON config files are mapped to these objects
+by the CLI and any other entry points.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+@dataclass
+class STANNOConfig:
+    """
+    Complete architecture + training configuration for a STANNO object.
+    layers:
+        List of layer sizes including input and output dimensions.
+        Examples:
+          [1, 32, 1]           — 1-D regression (matches poc demo)
+          [784, 256, 128, 10]  — MNIST-scale classifier
+          [768, 256, 768]      — CLIP-embedding autoencoder (SD 1.5)
+          [512, 256, 256, 512] — deep autoencoder
+    trainer_type:
+        Which TrainerNet to use:
+          "fixed"       — 4-module patent-faithful implementation (default)
+          "local_rule"  — per-synapse learned update rule
+          "evolutionary"— ES-based, no autodiff required
+    backend:
+        Numerical backend ("numpy" | "torch").  Currently "numpy" is fully
+        implemented; "torch" requires PyTorch and enables meta-training.
+    learning_rate:
+        Base learning rate used by the active trainer.
+    feedback_projection:
+        How dream() feeds output back as the next input when
+        output_dim != input_dim:
+          "repeat"  — tile and truncate (default, preserves poc behaviour)
+          "linear"  — fixed random linear projection, initialized once
+          "zeros"   — zero-pad or truncate (no information fed back)
+    trainer_kwargs:
+        Optional extra keyword arguments forwarded to the TrainerNet
+        constructor (e.g. {"hidden_dim": 32} for LocalRuleTrainerNet).
+    """
+    layers: List[int] = field(default_factory=lambda: [1, 32, 1])
+    trainer_type: str = "fixed"
+    backend: str = "numpy"
+    learning_rate: float = 0.01
+    feedback_projection: str = "repeat"
+    trainer_kwargs: Dict[str, Any] = field(default_factory=dict)
+@dataclass
+class DataConfig:
+    """
+    Data source configuration.
+    format:
+        "csv"     — CSV or TSV flat file (requires pandas)
+        "json"    — JSON or JSONL file
+        "numpy"   — .npy or .npz file with pre-split arrays
+        "builtin:sin" — built-in sin(x) generator (no file needed)
+    input_cols / output_cols:
+        Column names (CSV/JSON) or indices to use as input / output.
+        If None, the loader uses its default split strategy.
+    input_dim / output_dim:
+        Override the inferred input/output dimensionality.
+    split_ratio:
+        Fraction of data to use for training (rest is test/validation).
+    normalize:
+        Whether to apply per-feature standardisation (zero mean, unit std).
+    n_samples:
+        Number of samples for built-in generators.
+    """
+    path: str = ""
+    format: str = "csv"
+    input_cols: Optional[List[str]] = None
+    output_cols: Optional[List[str]] = None
+    input_dim: Optional[int] = None
+    output_dim: Optional[int] = None
+    split_ratio: float = 0.8
+    normalize: bool = False
+    n_samples: int = 512
+@dataclass
+class LLMConfig:
+    """OpenAI-compatible LLM endpoint configuration.
+    Credentials are read from environment variables at runtime; the fields
+    here serve as fallbacks.  Priority: env var > field value.
+      STANNO_LLM_BASE_URL — overrides base_url
+      STANNO_LLM_API_KEY  — overrides api_key
+      STANNO_LLM_MODEL    — overrides model
+    """
+    base_url: str = "http://localhost:11434"
+    model: str = "mistral"
+    api_key: str = ""
+    temperature: float = 0.7
+    max_tokens: int = 200
+    timeout_seconds: int = 30
+@dataclass
+class FilterConfig:
+    """Configuration for the STANNOFilter anomaly-scoring layer."""
+    anomaly_threshold: float = 0.15
+    block_above_threshold: bool = False
+    metadata_field: str = "stanno_score"

stanno/core/__init__.py ADDED Viewed

File without changes

stanno/core/backend.py ADDED Viewed

	@@ -0,0 +1,162 @@

+"""
+Backend abstraction layer.
+Provides a BackendProtocol (structural typing) and a NumPyBackend concrete
+implementation.  A TorchBackend stub is included for future use; it will be
+wired in when meta-training via PyTorch is needed.
+All TraineeNet and TrainerNet operations go through the backend so the same
+algorithm can run on NumPy arrays or PyTorch tensors without changing the
+core logic.
+"""
+from __future__ import annotations
+from typing import Optional, Protocol, Tuple, runtime_checkable
+import numpy as np
+# ─── Protocol ────────────────────────────────────────────────────────────────
+@runtime_checkable
+class BackendProtocol(Protocol):
+    """Minimal set of array operations required by STANNO internals."""
+    def zeros(self, shape: Tuple[int, ...]) -> np.ndarray: ...
+    def ones(self, shape: Tuple[int, ...]) -> np.ndarray: ...
+    def normal(
+        self,
+        mean: float,
+        std: float,
+        shape: Tuple[int, ...],
+        rng: Optional[np.random.Generator] = None,
+    ) -> np.ndarray: ...
+    def tanh(self, x: np.ndarray) -> np.ndarray: ...
+    def matmul(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: ...
+    def sum(
+        self, x: np.ndarray, axis=None, keepdims: bool = False
+    ) -> np.ndarray: ...
+    def mean(self, x: np.ndarray, axis=None) -> np.ndarray: ...
+    def sqrt(self, x: np.ndarray) -> np.ndarray: ...
+    def clip(self, x: np.ndarray, min_val: float, max_val: float) -> np.ndarray: ...
+    def copy(self, x: np.ndarray) -> np.ndarray: ...
+    def to_numpy(self, x) -> np.ndarray: ...
+    def from_numpy(self, arr: np.ndarray) -> np.ndarray: ...
+# ─── NumPy backend ───────────────────────────────────────────────────────────
+class NumPyBackend:
+    """Default backend using NumPy.  No extra dependencies required."""
+    def zeros(self, shape):
+        return np.zeros(shape, dtype=np.float32)
+    def ones(self, shape):
+        return np.ones(shape, dtype=np.float32)
+    def normal(self, mean, std, shape, rng=None):
+        if rng is None:
+            rng = np.random.default_rng()
+        return rng.normal(mean, std, shape).astype(np.float32)
+    def tanh(self, x):
+        return np.tanh(x)
+    def matmul(self, a, b):
+        return a @ b
+    def sum(self, x, axis=None, keepdims=False):
+        return np.sum(x, axis=axis, keepdims=keepdims)
+    def mean(self, x, axis=None):
+        return np.mean(x, axis=axis)
+    def sqrt(self, x):
+        return np.sqrt(x)
+    def clip(self, x, min_val, max_val):
+        return np.clip(x, min_val, max_val)
+    def copy(self, x):
+        return x.copy()
+    def to_numpy(self, x):
+        return np.asarray(x, dtype=np.float32)
+    def from_numpy(self, arr):
+        return np.asarray(arr, dtype=np.float32)
+# ─── Torch backend stub ──────────────────────────────────────────────────────
+class TorchBackend:
+    """
+    PyTorch backend stub — available when torch is installed.
+    Used for meta-training LocalRuleTrainerNet via unrolled gradient descent.
+    Falls back gracefully: if PyTorch is not importable, raise a clear error
+    at construction time rather than deep inside training.
+    """
+    def __init__(self):
+        try:
+            import torch
+            self._torch = torch
+        except ImportError as exc:
+            raise ImportError(
+                "TorchBackend requires PyTorch. Install it with:\n"
+                "  pip install torch\n"
+                "or use the NumPy backend instead."
+            ) from exc
+    def zeros(self, shape):
+        return self._torch.zeros(shape, dtype=self._torch.float32)
+    def ones(self, shape):
+        return self._torch.ones(shape, dtype=self._torch.float32)
+    def normal(self, mean, std, shape, rng=None):
+        t = self._torch.zeros(shape, dtype=self._torch.float32)
+        return t.normal_(mean, std)
+    def tanh(self, x):
+        return self._torch.tanh(x)
+    def matmul(self, a, b):
+        return a @ b
+    def sum(self, x, axis=None, keepdims=False):
+        if axis is None:
+            return x.sum()
+        return x.sum(dim=axis, keepdim=keepdims)
+    def mean(self, x, axis=None):
+        if axis is None:
+            return x.mean()
+        return x.mean(dim=axis)
+    def sqrt(self, x):
+        return self._torch.sqrt(x)
+    def clip(self, x, min_val, max_val):
+        return self._torch.clamp(x, min_val, max_val)
+    def copy(self, x):
+        return x.clone()
+    def to_numpy(self, x):
+        return x.detach().cpu().numpy()
+    def from_numpy(self, arr):
+        return self._torch.from_numpy(np.asarray(arr, dtype=np.float32))
+# ─── factory ─────────────────────────────────────────────────────────────────
+def make_backend(name: str) -> BackendProtocol:
+    if name == "numpy":
+        return NumPyBackend()
+    if name == "torch":
+        return TorchBackend()
+    raise ValueError(f"Unknown backend: {name!r}.  Choose 'numpy' or 'torch'.")

stanno/core/stanno.py ADDED Viewed

	@@ -0,0 +1,317 @@

+"""
+STANNO — Self-Training Artificial Neural Network Object.
+Orchestrates the TraineeNet + AbstractTrainerNet pair.
+fit() drives the training loop:
+  1. forward pass on TraineeNet  →  y_pred + TraineeState
+  2. inject y_batch and loss history into state
+  3. call trainer.compute_updates(state)  →  (ΔW, Δb)
+  4. apply_updates() directly on TraineeNet weights
+No explicit backpropagation code lives here — the trainer handles all of
+that logic (or learns to handle it, in the case of LocalRuleTrainerNet).
+dream() generates sequences using a shadow copy of the trained net with
+optional weight noise and optional input blinding, implementing Thaler's
+"creative perturbation of internal state" concept.
+"""
+from __future__ import annotations
+import copy
+import pickle
+from typing import Callable, List, Optional, Tuple
+import numpy as np
+from stanno.config.schema import STANNOConfig
+from stanno.core.backend import make_backend
+from stanno.core.trainee import TraineeNet
+from stanno.core.trainer import AbstractTrainerNet
+# ─── trainer factory ─────────────────────────────────────────────────────────
+def _make_trainer(config: STANNOConfig) -> AbstractTrainerNet:
+    from stanno.trainers.fixed import FixedTrainerNet
+    from stanno.trainers.local_rule import LocalRuleTrainerNet
+    from stanno.trainers.evolutionary import EvolutionaryTrainerNet
+    kwargs = config.trainer_kwargs
+    lr = config.learning_rate
+    if config.trainer_type == "fixed":
+        return FixedTrainerNet(learning_rate=lr, **kwargs)
+    if config.trainer_type == "local_rule":
+        return LocalRuleTrainerNet(learning_rate=lr, **kwargs)
+    if config.trainer_type == "evolutionary":
+        return EvolutionaryTrainerNet(learning_rate=lr, **kwargs)
+    raise ValueError(
+        f"Unknown trainer_type {config.trainer_type!r}. "
+        "Choose 'fixed', 'local_rule', or 'evolutionary'."
+    )
+# ─── STANNO ──────────────────────────────────────────────────────────────────
+class STANNO:
+    """
+    Self-Training Artificial Neural Network Object.
+    Parameters
+    ----------
+    config : STANNOConfig
+        Full architecture and training configuration.
+    rng : np.random.Generator, optional
+        Seeded RNG for reproducibility.
+    """
+    def __init__(
+        self,
+        config: STANNOConfig,
+        rng: Optional[np.random.Generator] = None,
+    ) -> None:
+        self.config = config
+        self._rng = rng or np.random.default_rng()
+        self.backend = make_backend(config.backend)
+        self.net = TraineeNet(config.layers, backend=self.backend, rng=self._rng)
+        self.trainer: AbstractTrainerNet = _make_trainer(config)
+        self._loss_history: List[float] = []
+        self._train_mse_norm: Optional[float] = None  # used by filter for score normalisation
+        # Optional fixed projection layer for dream feedback when output_dim != input_dim
+        self._projection: Optional[np.ndarray] = None
+        if (
+            config.feedback_projection == "linear"
+            and config.layers[-1] != config.layers[0]
+        ):
+            self._projection = self._rng.normal(
+                0.0, 0.1, (config.layers[-1], config.layers[0])
+            ).astype(np.float32)
+    # ── training ─────────────────────────────────────────────────────────────
+    def fit(
+        self,
+        x: np.ndarray,
+        y: np.ndarray,
+        epochs: int = 1000,
+        batch_size: int = 32,
+        shuffle: bool = True,
+        callback: Optional[Callable[[int, float], None]] = None,
+        divergence_threshold: float = 100.0,
+        patience: int = 20,
+    ) -> None:
+        """
+        Train the STANNO using its internal TrainerNet.
+        Parameters
+        ----------
+        x, y : ndarray
+            Training data of shape (N, input_dim) and (N, output_dim).
+        epochs : int
+            Number of full passes over the dataset.
+        batch_size : int
+            Mini-batch size.
+        shuffle : bool
+            Shuffle data each epoch.
+        callback : callable(epoch, mean_loss), optional
+            Called after each epoch for logging.
+        divergence_threshold : float
+            If any loss exceeds this, training is halted immediately (divergence detected).
+        patience : int
+            Number of epochs without improvement before early stopping (0 = disabled).
+        """
+        x = np.asarray(x, dtype=np.float32)
+        y = np.asarray(y, dtype=np.float32)
+        N = x.shape[0]
+        best_loss = float("inf")
+        patience_counter = 0
+        for epoch in range(epochs):
+            idx = np.arange(N)
+            if shuffle:
+                self._rng.shuffle(idx)
+            x_shuf, y_shuf = x[idx], y[idx]
+            epoch_losses: List[float] = []
+            for start in range(0, N, batch_size):
+                xb = x_shuf[start: start + batch_size]
+                yb = y_shuf[start: start + batch_size]
+                loss = self._trainer_step(xb, yb)
+                epoch_losses.append(loss)
+            mean_loss = float(np.mean(epoch_losses))
+            self._loss_history.append(mean_loss)
+            if len(self._loss_history) > 100:
+                self._loss_history.pop(0)
+            # Divergence detection
+            if mean_loss > divergence_threshold:
+                print(
+                    f"  ⚠ DIVERGENCE DETECTED at epoch {epoch}: "
+                    f"loss={mean_loss:.6f} exceeds threshold {divergence_threshold}. "
+                    f"Halting training. Consider: reducing learning_rate, using fewer epochs, "
+                    f"or checking data normalization."
+                )
+                break
+            # Early stopping (patience-based)
+            if patience > 0:
+                if mean_loss < best_loss:
+                    best_loss = mean_loss
+                    patience_counter = 0
+                else:
+                    patience_counter += 1
+                    if patience_counter >= patience:
+                        print(
+                            f"  ⓘ Early stopping at epoch {epoch}: "
+                            f"no improvement for {patience} epochs. "
+                            f"Best loss: {best_loss:.6f}"
+                        )
+                        break
+            if callback is not None:
+                callback(epoch, mean_loss)
+        # Record mean training MSE for normalisation in STANNOFilter
+        if self._loss_history:
+            self._train_mse_norm = float(np.percentile(self._loss_history[-min(10, len(self._loss_history)):], 95)) or 1.0
+    def _trainer_step(self, x: np.ndarray, y: np.ndarray) -> float:
+        """One mini-batch update: forward → state → trainer → apply deltas."""
+        y_pred, state = self.net.forward(x)
+        state.y_batch = y
+        state.y_pred = y_pred
+        state.loss_history = self._loss_history.copy()
+        loss = float(np.mean((y_pred - y) ** 2))
+        weight_deltas, bias_deltas = self.trainer.compute_updates(state)
+        self.net.apply_updates(weight_deltas, bias_deltas)
+        return loss
+    # ── inference ────────────────────────────────────────────────────────────
+    def predict(self, x: np.ndarray) -> np.ndarray:
+        """Standard forward pass (no weight modification)."""
+        x = np.asarray(x, dtype=np.float32)
+        y_pred, _ = self.net.forward(x)
+        # Clip to prevent exploding activations (optional but recommended for stability)
+        # Tanh output range is [-1, 1]; many real functions also fall in this range.
+        # Remove this line if your specific task requires unbounded output.
+        # y_pred = np.clip(y_pred, -10.0, 10.0)  # Allow wider range than [-1, 1]
+        return y_pred
+    # ── dreaming ─────────────────────────────────────────────────────────────
+    def dream(
+        self,
+        num_steps: int = 128,
+        input_seed: Optional[np.ndarray] = None,
+        noise_sigma: float = 0.0,
+        blind_inputs: bool = False,
+        rng: Optional[np.random.Generator] = None,
+    ) -> np.ndarray:
+        """
+        Generate a sequence of outputs by driving a noisy shadow copy of the net.
+        The shadow copy is created once per dream() call; noise is injected
+        once into its weights before the loop starts.  The trained net is
+        never modified.
+        Parameters
+        ----------
+        num_steps : int
+            Number of output frames to generate.
+        input_seed : ndarray (1, input_dim), optional
+            Starting input.  Defaults to zeros.
+        noise_sigma : float
+            Gaussian noise std added to shadow weights.
+            0.0  = exact replay of learned patterns (quiet)
+            0.05–0.3 = creative variation (sweet spot per Thaler's IEI)
+            >0.5 = chaotic / exploratory
+        blind_inputs : bool
+            If True, force input to zero each step (pure internal dynamics).
+        rng : np.random.Generator, optional
+            RNG for reproducibility.
+        Returns
+        -------
+        ndarray of shape (num_steps, output_dim)
+        """
+        _rng = rng or np.random.default_rng()
+        # Shadow copy — noise applied once
+        shadow = self.net.clone()
+        shadow.apply_parameter_noise(noise_sigma, rng=_rng)
+        if input_seed is None:
+            x = np.zeros((1, self.net.input_dim), dtype=np.float32)
+        else:
+            x = np.asarray(input_seed, dtype=np.float32).reshape(1, -1)
+        outputs: List[np.ndarray] = []
+        for _ in range(num_steps):
+            x_step = np.zeros_like(x) if blind_inputs else x
+            y, _ = shadow.forward(x_step)
+            outputs.append(y.copy())
+            x = self._feedback_project(y)
+        return np.concatenate(outputs, axis=0)   # (num_steps, output_dim)
+    def _feedback_project(self, y: np.ndarray) -> np.ndarray:
+        """
+        Project dream output y back to input_dim for the next step.
+        Respects config.feedback_projection:
+          "repeat"  — tile and truncate (no extra parameters)
+          "linear"  — fixed random projection matrix
+          "zeros"   — always feed zeros (pure internal dynamics)
+        """
+        in_dim = self.net.input_dim
+        out_dim = self.net.output_dim
+        if in_dim == out_dim:
+            return y
+        mode = self.config.feedback_projection
+        if mode == "zeros":
+            return np.zeros((1, in_dim), dtype=np.float32)
+        if mode == "linear":
+            if self._projection is None:
+                # Initialise lazily if not done in __init__
+                self._projection = np.random.default_rng().normal(
+                    0.0, 0.1, (out_dim, in_dim)
+                ).astype(np.float32)
+            return y @ self._projection   # (1, in_dim)
+        # default: "repeat" — tile output vector to fill input_dim
+        factor = in_dim // out_dim + 1
+        return np.repeat(y, factor, axis=1)[:, :in_dim]
+    # ── serialisation ─────────────────────────────────────────────────────────
+    def save(self, path: str) -> None:
+        """Pickle the full STANNO to disk."""
+        with open(path, "wb") as f:
+            pickle.dump(self, f)
+    @staticmethod
+    def load(path: str) -> "STANNO":
+        """Load a pickled STANNO from disk."""
+        with open(path, "rb") as f:
+            return pickle.load(f)
+    # ── misc ─────────────────────────────────────────────────────────────────
+    def __repr__(self) -> str:
+        return (
+            f"STANNO(layers={self.config.layers}, "
+            f"trainer={self.trainer.__class__.__name__}, "
+            f"backend={self.config.backend})"
+        )

stanno/core/trainee.py ADDED Viewed

	@@ -0,0 +1,174 @@

+"""
+TraineeNet — the network that gets trained by a TrainerNet.
+Key differences from stanno_poc.py:
+  - Variable depth: layers: List[int] replaces input/hidden/output_dim
+  - Backend-injected: all ops go through BackendProtocol for NumPy/Torch compat
+  - forward() returns a TraineeState capturing activations AND pre-activations
+  - apply_updates() receives (weight_deltas, bias_deltas) from TrainerNet
+  - Xavier initialisation, tanh hidden activations, linear output (same as poc)
+"""
+from __future__ import annotations
+import copy
+from typing import List, Optional, Tuple
+import numpy as np
+from stanno.core.backend import BackendProtocol, NumPyBackend
+from stanno.core.trainer import TraineeState
+class TraineeNet:
+    """
+    N-layer MLP (input → hidden₁ → … → hiddenₙ₋₁ → output).
+    The output layer is always linear (no activation).
+    All hidden layers use tanh activation.
+    Parameters
+    ----------
+    layers : List[int]
+        Sizes of each layer including input and output.
+        Must have at least 2 elements.  Example: [1, 32, 1].
+    backend : BackendProtocol, optional
+        Numerical backend.  Defaults to NumPyBackend.
+    rng : np.random.Generator, optional
+        Seeded RNG for reproducibility.
+    """
+    def __init__(
+        self,
+        layers: List[int],
+        backend: Optional[BackendProtocol] = None,
+        rng: Optional[np.random.Generator] = None,
+    ) -> None:
+        if len(layers) < 2:
+            raise ValueError(f"layers must have at least 2 elements, got {layers}")
+        self.layers = layers
+        self.backend = backend or NumPyBackend()
+        self._rng = rng or np.random.default_rng()
+        self._init_weights()
+    # ── initialisation ───────────────────────────────────────────────────────
+    def _init_weights(self) -> None:
+        self.weights: List[np.ndarray] = []
+        self.biases: List[np.ndarray] = []
+        for i in range(len(self.layers) - 1):
+            fan_in = self.layers[i]
+            fan_out = self.layers[i + 1]
+            # Xavier uniform
+            limit = float(np.sqrt(6.0 / (fan_in + fan_out)))
+            W = self._rng.uniform(-limit, limit, (fan_in, fan_out)).astype(np.float32)
+            b = np.zeros((1, fan_out), dtype=np.float32)
+            self.weights.append(W)
+            self.biases.append(b)
+    # ── forward pass ─────────────────────────────────────────────────────────
+    def forward(self, x: np.ndarray) -> Tuple[np.ndarray, TraineeState]:
+        """
+        Run a forward pass.
+        Returns the prediction and a TraineeState populated with:
+          activations[0]     = x (input)
+          activations[i]     = tanh(z_{i-1})  for i = 1 … n-1
+          activations[n]     = z_{n-1}         (linear output = y_pred)
+          pre_activations[i] = z_i = activations[i] @ W_i + b_i
+        where n = len(self.weights).
+        """
+        x = np.asarray(x, dtype=np.float32)
+        activations: List[np.ndarray] = [x]
+        pre_activations: List[np.ndarray] = []
+        current = x
+        for i, (W, b) in enumerate(zip(self.weights, self.biases)):
+            z = current @ W + b
+            pre_activations.append(z)
+            if i < len(self.weights) - 1:
+                a = np.tanh(z)
+            else:
+                a = z          # linear output
+            activations.append(a)
+            current = a
+        y_pred = current
+        state = TraineeState(
+            weights=self.weights,          # live references — no copy
+            biases=self.biases,
+            activations=activations,
+            pre_activations=pre_activations,
+            x_batch=x,
+            y_batch=np.zeros_like(y_pred),  # filled by STANNO._trainer_step
+            y_pred=y_pred,
+        )
+        return y_pred, state
+    # ── weight manipulation ──────────────────────────────────────────────────
+    def apply_updates(
+        self,
+        weight_deltas: List[np.ndarray],
+        bias_deltas: List[np.ndarray],
+    ) -> None:
+        """Apply in-place updates produced by a TrainerNet."""
+        for i, (dW, db) in enumerate(zip(weight_deltas, bias_deltas)):
+            self.weights[i] += dW
+            self.biases[i] += db
+    def apply_parameter_noise(
+        self,
+        sigma: float,
+        rng: Optional[np.random.Generator] = None,
+    ) -> None:
+        """Add Gaussian noise (std=sigma) to all weights and biases in-place."""
+        if sigma <= 0.0:
+            return
+        _rng = rng or np.random.default_rng()
+        for W in self.weights:
+            W += _rng.normal(0.0, sigma, W.shape).astype(np.float32)
+        for b in self.biases:
+            b += _rng.normal(0.0, sigma, b.shape).astype(np.float32)
+    def lesion(
+        self,
+        fraction: float,
+        rng: Optional[np.random.Generator] = None,
+    ) -> None:
+        """
+        Randomly zero out a fraction of weight entries (not biases).
+        Simulates progressive connection death as described in the original
+        STANNO literature ("tunnel vision" → "brain death" progression).
+        """
+        fraction = float(np.clip(fraction, 0.0, 1.0))
+        if fraction <= 0.0:
+            return
+        _rng = rng or np.random.default_rng()
+        for W in self.weights:
+            mask = _rng.random(W.shape) < fraction
+            W[mask] = 0.0
+    def clone(self) -> "TraineeNet":
+        """Return a deep copy with the same weights."""
+        return copy.deepcopy(self)
+    # ── misc ─────────────────────────────────────────────────────────────────
+    @property
+    def input_dim(self) -> int:
+        return self.layers[0]
+    @property
+    def output_dim(self) -> int:
+        return self.layers[-1]
+    def parameter_count(self) -> int:
+        total = sum(W.size for W in self.weights)
+        total += sum(b.size for b in self.biases)
+        return total
+    def __repr__(self) -> str:
+        return f"TraineeNet(layers={self.layers}, params={self.parameter_count():,})"

stanno/core/trainer.py ADDED Viewed

	@@ -0,0 +1,126 @@

+"""
+TraineeState dataclass and AbstractTrainerNet ABC.
+TraineeState carries everything a TrainerNet may need:
+  - current weight matrices and biases
+  - forward-pass activations (post-activation) and pre-activations (z = Wx+b)
+  - the current batch inputs, targets, and predictions
+  - an optional loss history window
+  - a slot for pre-computed gradients (used by some trainers)
+AbstractTrainerNet defines the interface every TrainerNet must implement.
+The only required method is compute_updates(); meta_train, save, and load
+have default no-op implementations so simple trainers don't need them.
+"""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import List, Optional, Tuple
+import numpy as np
+# ─── TraineeState ─────────────────────────────────────────────────────────────
+@dataclass
+class TraineeState:
+    """
+    Snapshot of TraineeNet state after a forward pass, ready for the trainer.
+    Index convention (n = number of weight matrices):
+      activations[0]      = input x          shape (batch, layers[0])
+      activations[i]      = a_i              shape (batch, layers[i])
+      activations[n]      = y_pred           shape (batch, layers[n])
+      pre_activations[i]  = z_i = a_i @ W_i + b_i   shape (batch, layers[i+1])
+    So len(activations) == n + 1  and  len(pre_activations) == n.
+    """
+    weights: List[np.ndarray]               # [W_0, ..., W_{n-1}]
+    biases: List[np.ndarray]                # [b_0, ..., b_{n-1}]
+    activations: List[np.ndarray]           # [x, a_1, ..., y_pred]
+    pre_activations: List[np.ndarray]       # [z_0, ..., z_{n-1}]
+    x_batch: np.ndarray                     # current batch inputs
+    y_batch: np.ndarray                     # current batch targets (set by STANNO)
+    y_pred: np.ndarray                      # current predictions
+    loss_history: List[float] = field(default_factory=list)
+    gradients: Optional[List[np.ndarray]] = None   # optional pre-computed ∂L/∂W
+# ─── AbstractTrainerNet ───────────────────────────────────────────────────────
+class AbstractTrainerNet(ABC):
+    """
+    Base class for all TrainerNet implementations.
+    Subclasses must implement compute_updates().  meta_train, save, and load
+    are optional — the default implementations are harmless no-ops.
+    """
+    @abstractmethod
+    def compute_updates(
+        self, state: TraineeState
+    ) -> Tuple[List[np.ndarray], List[np.ndarray]]:
+        """
+        Compute weight and bias updates given the current TraineeState.
+        Returns:
+            weight_deltas: list of Δ arrays, same shapes as state.weights
+            bias_deltas:   list of Δ arrays, same shapes as state.biases
+        The STANNO orchestrator applies these as:
+            W_i += weight_deltas[i]
+            b_i += bias_deltas[i]
+        """
+    def compute_cascade_updates(
+        self,
+        state: "TraineeState",
+        output_delta: Optional[np.ndarray] = None,
+    ) -> Tuple[List[np.ndarray], List[np.ndarray], Optional[np.ndarray]]:
+        """
+        Like compute_updates(), but supports multi-stage cascade training.
+        output_delta : ndarray (batch, output_dim), optional
+            Pre-computed gradient at the output layer from a downstream stage.
+            When None the trainer computes the output delta from state.y_batch
+            as usual.
+        Returns
+        -------
+        weight_deltas : List[ndarray]
+        bias_deltas   : List[ndarray]
+        input_gradient : ndarray (batch, input_dim) or None
+            Gradient w.r.t. the network's input activations.  Pass this as
+            ``output_delta`` to the preceding stage in a CascadeSTANNO.
+            The default implementation returns ``None`` (no gradient flows
+            backward past this stage).  FixedTrainerNet overrides this.
+        """
+        dW, db = self.compute_updates(state)
+        return dW, db, None
+    def meta_train(self, tasks) -> None:
+        """
+        Optional: adapt this TrainerNet's own parameters on a list of tasks.
+        Each task is expected to be a dict with at least:
+            {"x_train": ndarray, "y_train": ndarray,
+             "x_test":  ndarray, "y_test":  ndarray}
+        Default: no-op.  Override in LocalRuleTrainerNet and
+        EvolutionaryTrainerNet.
+        """
+    def save(self, path: str) -> None:
+        """Persist trainer parameters to disk.  Default: pickle."""
+        import pickle
+        with open(path, "wb") as f:
+            pickle.dump(self, f)
+    def load(self, path: str) -> None:
+        """Load trainer parameters from disk.  Default: merge from pickle."""
+        import pickle
+        with open(path, "rb") as f:
+            loaded = pickle.load(f)
+        self.__dict__.update(loaded.__dict__)

stanno/data/__init__.py ADDED Viewed

File without changes

stanno/data/base.py ADDED Viewed

	@@ -0,0 +1,82 @@

+"""
+Data loading abstractions.
+DataSource is the ABC that all loaders implement.
+A single make_loader() factory creates the right loader from a DataConfig.
+"""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from typing import Tuple
+import numpy as np
+from stanno.config.schema import DataConfig
+class DataSource(ABC):
+    """Abstract base class for all data loaders."""
+    def __init__(self, config: DataConfig) -> None:
+        self.config = config
+    @abstractmethod
+    def load(self) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Load and return (X, Y) arrays of shape (N, input_dim) and (N, output_dim).
+        """
+    def _train_test_split(
+        self,
+        x: np.ndarray,
+        y: np.ndarray,
+    ) -> Tuple[np.ndarray, np.ndarray]:
+        """Apply config.split_ratio to return the training portion only."""
+        n = int(len(x) * self.config.split_ratio)
+        return x[:n], y[:n]
+    def _normalize(
+        self,
+        x: np.ndarray,
+        y: np.ndarray,
+    ) -> Tuple[np.ndarray, np.ndarray]:
+        """Z-score normalise inputs and outputs independently."""
+        x_mean, x_std = x.mean(axis=0), x.std(axis=0)
+        y_mean, y_std = y.mean(axis=0), y.std(axis=0)
+        x_std = np.where(x_std < 1e-8, 1.0, x_std)
+        y_std = np.where(y_std < 1e-8, 1.0, y_std)
+        return (x - x_mean) / x_std, (y - y_mean) / y_std
+def make_loader(config: DataConfig) -> DataSource:
+    """
+    Factory function: choose a DataSource implementation from config.format.
+    Supported formats:
+      "csv"         — CSV file at config.path
+      "json"        — JSON / JSONL file at config.path
+      "numpy"       — .npy or .npz file at config.path
+      "builtin:sin" — synthetic sinusoid (for quick tests)
+    """
+    fmt = (config.format or "").lower()
+    if fmt == "csv":
+        from stanno.data.csv_loader import CSVLoader
+        return CSVLoader(config)
+    if fmt == "json":
+        from stanno.data.json_loader import JSONLoader
+        return JSONLoader(config)
+    if fmt in ("numpy", "npy", "npz"):
+        from stanno.data.numpy_loader import NumpyLoader
+        return NumpyLoader(config)
+    if fmt.startswith("builtin:"):
+        from stanno.data.numpy_loader import BuiltinLoader
+        return BuiltinLoader(config)
+    raise ValueError(
+        f"Unknown data format {config.format!r}. "
+        "Supported: 'csv', 'json', 'numpy', 'builtin:sin'."
+    )

stanno/data/csv_loader.py ADDED Viewed

	@@ -0,0 +1,56 @@

+"""
+CSV data loader.
+Requires pandas.  Install with: pip install stanno[data]
+"""
+from __future__ import annotations
+from typing import Tuple
+import numpy as np
+from stanno.config.schema import DataConfig
+from stanno.data.base import DataSource
+class CSVLoader(DataSource):
+    """
+    Load supervised learning data from a CSV file.
+    Column selection
+    ────────────────
+    If DataConfig.input_cols and output_cols are specified (as lists of
+    column names or 0-based integers), those columns are used directly.
+    Otherwise the convention is:
+      • All but the last column → X (inputs)
+      • Last column             → Y (targets)
+    """
+    def load(self) -> Tuple[np.ndarray, np.ndarray]:
+        try:
+            import pandas as pd
+        except ImportError as exc:
+            raise ImportError(
+                "pandas is required for CSV loading.  "
+                "Install it with: pip install stanno[data]"
+            ) from exc
+        df = pd.read_csv(self.config.path)
+        if self.config.input_cols and self.config.output_cols:
+            x = df[self.config.input_cols].to_numpy(dtype=np.float32)
+            y = df[self.config.output_cols].to_numpy(dtype=np.float32)
+        else:
+            x = df.iloc[:, :-1].to_numpy(dtype=np.float32)
+            y = df.iloc[:, -1:].to_numpy(dtype=np.float32)
+        if self.config.n_samples is not None and self.config.n_samples < len(x):
+            x = x[: self.config.n_samples]
+            y = y[: self.config.n_samples]
+        if self.config.normalize:
+            x, y = self._normalize(x, y)
+        x, y = self._train_test_split(x, y)
+        return x, y

stanno/data/json_loader.py ADDED Viewed

	@@ -0,0 +1,98 @@

+"""
+JSON / JSONL data loader.
+Handles two formats:
+  • Regular JSON   — a list of dicts or a dict with "x"/"y" keys
+  • JSONL          — one JSON object per line (auto-detected)
+"""
+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Tuple
+import numpy as np
+from stanno.config.schema import DataConfig
+from stanno.data.base import DataSource
+class JSONLoader(DataSource):
+    """
+    Load supervised learning data from a JSON or JSONL file.
+    Accepted structures
+    ───────────────────
+    1. Dict with "x" and "y" keys:
+         {"x": [[...], ...], "y": [[...], ...]}
+    2. List of dicts using input_cols / output_cols as keys:
+         [{"feature_a": 1.0, "feature_b": 2.0, "label": 0.0}, ...]
+    3. JSONL (one JSON object per line) — same structure per line.
+    """
+    def load(self) -> Tuple[np.ndarray, np.ndarray]:
+        path = Path(self.config.path)
+        text = path.read_text(encoding="utf-8")
+        # Auto-detect JSONL
+        lines = [l for l in text.strip().splitlines() if l.strip()]
+        if len(lines) > 1:
+            try:
+                records = [json.loads(l) for l in lines]
+                data = records   # list of dicts
+            except json.JSONDecodeError:
+                data = json.loads(text)
+        else:
+            data = json.loads(text)
+        x, y = self._parse(data)
+        if self.config.n_samples is not None and self.config.n_samples < len(x):
+            x = x[: self.config.n_samples]
+            y = y[: self.config.n_samples]
+        if self.config.normalize:
+            x, y = self._normalize(x, y)
+        return self._train_test_split(x, y)
+    def _parse(self, data) -> Tuple[np.ndarray, np.ndarray]:
+        # Format 1: {"x": ..., "y": ...}
+        if isinstance(data, dict) and "x" in data and "y" in data:
+            x = np.array(data["x"], dtype=np.float32)
+            y = np.array(data["y"], dtype=np.float32)
+            if x.ndim == 1:
+                x = x.reshape(-1, 1)
+            if y.ndim == 1:
+                y = y.reshape(-1, 1)
+            return x, y
+        # Format 2 / 3: list of dicts
+        if isinstance(data, list) and len(data) > 0 and isinstance(data[0], dict):
+            if self.config.input_cols and self.config.output_cols:
+                x = np.array(
+                    [[rec[k] for k in self.config.input_cols] for rec in data],
+                    dtype=np.float32,
+                )
+                y = np.array(
+                    [[rec[k] for k in self.config.output_cols] for rec in data],
+                    dtype=np.float32,
+                )
+            else:
+                # fallback: all keys except last as X; last key as Y
+                keys = list(data[0].keys())
+                x = np.array([[rec[k] for k in keys[:-1]] for rec in data], dtype=np.float32)
+                y = np.array([[rec[keys[-1]]] for rec in data], dtype=np.float32)
+            return x, y
+        # Format: list of lists (raw matrix)
+        if isinstance(data, list) and len(data) > 0 and isinstance(data[0], (list, tuple)):
+            arr = np.array(data, dtype=np.float32)
+            return arr[:, :-1], arr[:, -1:]
+        raise ValueError(
+            "Unsupported JSON structure.  Expected {'x': ..., 'y': ...}, "
+            "list of dicts, or list of lists."
+        )

stanno/data/numpy_loader.py ADDED Viewed

	@@ -0,0 +1,112 @@

+"""
+NumPy / built-in data loaders.
+NumpyLoader handles .npy and .npz files.
+BuiltinLoader generates synthetic datasets for quick tests.
+"""
+from __future__ import annotations
+from typing import Tuple
+import numpy as np
+from stanno.config.schema import DataConfig
+from stanno.data.base import DataSource
+class NumpyLoader(DataSource):
+    """
+    Load data from a NumPy .npy or .npz file.
+    .npy files
+    ──────────
+    The array is treated as an autoencoder target: X = Y = loaded array.
+    Useful for unsupervised reconstruction or embedding normalisation tasks.
+    .npz files
+    ──────────
+    Expected keys: "x" and "y" (case-insensitive).
+    Falls back to autoencoder mode if only one key is present.
+    """
+    def load(self) -> Tuple[np.ndarray, np.ndarray]:
+        path = self.config.path
+        if path.endswith(".npz"):
+            archive = np.load(path)
+            keys = list(archive.files)
+            lower_keys = {k.lower(): k for k in keys}
+            if "x" in lower_keys and "y" in lower_keys:
+                x = archive[lower_keys["x"]].astype(np.float32)
+                y = archive[lower_keys["y"]].astype(np.float32)
+            elif len(keys) >= 2:
+                x = archive[keys[0]].astype(np.float32)
+                y = archive[keys[1]].astype(np.float32)
+            else:
+                arr = archive[keys[0]].astype(np.float32)
+                x = y = arr   # autoencoder
+        else:
+            arr = np.load(path).astype(np.float32)
+            x = y = arr   # autoencoder
+        if x.ndim == 1:
+            x = x.reshape(-1, 1)
+        if y.ndim == 1:
+            y = y.reshape(-1, 1)
+        if self.config.n_samples is not None and self.config.n_samples < len(x):
+            x = x[: self.config.n_samples]
+            y = y[: self.config.n_samples]
+        if self.config.normalize:
+            x, y = self._normalize(x, y)
+        return self._train_test_split(x, y)
+class BuiltinLoader(DataSource):
+    """
+    Synthetic data generator for built-in demo tasks.
+    Supported values for DataConfig.format:
+      "builtin:sin"    — y = sin(2π·x), x ∈ [0, 1]
+      "builtin:xor"    — 2D XOR classification
+      "builtin:spiral" — 2-class spiral (n_samples points)
+    """
+    def load(self) -> Tuple[np.ndarray, np.ndarray]:
+        name = self.config.format.split(":", 1)[-1].lower()
+        n = self.config.n_samples or 512
+        rng = np.random.default_rng(42)
+        if name == "sin":
+            x = np.linspace(0.0, 1.0, n, dtype=np.float32).reshape(-1, 1)
+            y = np.sin(2.0 * np.pi * x).astype(np.float32)
+        elif name == "xor":
+            pts = rng.choice(np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32),
+                             size=n, replace=True)
+            labels = (pts[:, 0].astype(int) ^ pts[:, 1].astype(int)).astype(np.float32)
+            x = pts + rng.normal(0.0, 0.05, pts.shape).astype(np.float32)
+            y = labels.reshape(-1, 1)
+        elif name == "spiral":
+            n_half = n // 2
+            theta = np.linspace(0.0, 4 * np.pi, n_half, dtype=np.float32)
+            r = np.linspace(0.1, 1.0, n_half, dtype=np.float32)
+            x1 = np.stack([r * np.cos(theta), r * np.sin(theta)], axis=1)
+            x2 = np.stack([r * np.cos(theta + np.pi), r * np.sin(theta + np.pi)], axis=1)
+            x = np.concatenate([x1, x2], axis=0).astype(np.float32)
+            x += rng.normal(0.0, 0.05, x.shape).astype(np.float32)
+            y = np.array([0.0] * n_half + [1.0] * n_half, dtype=np.float32).reshape(-1, 1)
+        else:
+            raise ValueError(
+                f"Unknown builtin dataset {name!r}. "
+                "Supported: 'builtin:sin', 'builtin:xor', 'builtin:spiral'."
+            )
+        if self.config.normalize:
+            x, y = self._normalize(x, y)
+        return self._train_test_split(x, y)

stanno/integration/__init__.py ADDED Viewed

File without changes

stanno/integration/cascade.py ADDED Viewed

	@@ -0,0 +1,354 @@

+"""
+CascadeSTANNO — chain of STANNO objects forming a composite system model.
+Implements the "cascading networks" concept from US5852815A (Thaler, 1998):
+    "Once multiple component-networks have been trained, they can be
+     cascaded — connected together — to form larger system models.
+     The outputs of one component-network feed into the inputs of another;
+     several components form a chain or a more complex topology representing
+     a device or system."
+The patent also describes hybrid static/dynamic cascades:
+    "A component-network might be locked once it reaches satisfactory
+     performance.  Another component (implemented as a STANNO) continues
+     to adapt to new operating conditions."
+This is captured by the `frozen` flag per stage.
+Training modes
+──────────────
+end-to-end (default)
+    Gradient flows backward from the last stage to the first using the
+    cascade-aware ``compute_cascade_updates()`` in FixedTrainerNet.
+    Frozen stages skip weight updates but still pass gradients through.
+staged
+    Each stage is trained independently on provided intermediate targets
+    (useful when you have supervision at every stage, e.g. multi-task
+    pipelines).
+"""
+from __future__ import annotations
+import copy
+import pickle
+from typing import List, Optional, Tuple
+import numpy as np
+from stanno.core.stanno import STANNO
+from stanno.config.schema import STANNOConfig
+class CascadeSTANNO:
+    """
+    Ordered chain of STANNO objects.
+    Parameters
+    ----------
+    stages : List[STANNO]
+        Pre-built STANNO objects in pipeline order.  Output dim of stage k
+        must equal input dim of stage k+1.
+    frozen : List[bool], optional
+        Per-stage freeze flags.  Frozen stages receive no weight updates.
+        Gradients still flow through them during end-to-end training.
+    Example
+    -------
+    >>> from stanno import STANNO, STANNOConfig
+    >>> from stanno.integration.cascade import CascadeSTANNO
+    >>> enc = STANNO(STANNOConfig(layers=[784, 128, 32]))
+    >>> dec = STANNO(STANNOConfig(layers=[32, 128, 784]))
+    >>> ae  = CascadeSTANNO([enc, dec])
+    >>> ae.fit(x_train, x_train, epochs=200)  # autoencoder
+    >>> codes = ae.intermediate_output(x_train, stage=0)  # encoder output
+    """
+    def __init__(
+        self,
+        stages: Optional[List[STANNO]] = None,
+        frozen: Optional[List[bool]] = None,
+    ) -> None:
+        self.stages: List[STANNO] = list(stages) if stages else []
+        self.frozen: List[bool] = list(frozen) if frozen else [False] * len(self.stages)
+        self._loss_history: List[float] = []
+    # ── builder helpers ───────────────────────────────────────────────────────
+    def add_stage(self, stanno: STANNO, frozen: bool = False) -> "CascadeSTANNO":
+        """Append a stage in-place.  Returns self for chaining."""
+        self.stages.append(stanno)
+        self.frozen.append(frozen)
+        return self
+    def freeze(self, stage: int) -> "CascadeSTANNO":
+        """Mark stage as frozen (no weight updates).  Returns self."""
+        self.frozen[stage] = True
+        return self
+    def unfreeze(self, stage: int) -> "CascadeSTANNO":
+        """Allow weight updates for this stage.  Returns self."""
+        self.frozen[stage] = False
+        return self
+    # ── forward pass ─────────────────────────────────────────────────────────
+    def predict(self, x: np.ndarray) -> np.ndarray:
+        """Forward pass through all stages.  Returns final output."""
+        h = np.asarray(x, dtype=np.float32)
+        for stage in self.stages:
+            h = stage.predict(h)
+        return h
+    def intermediate_output(self, x: np.ndarray, stage: int) -> np.ndarray:
+        """Return the output of stage ``stage`` (0-indexed).
+        Useful for extracting learned representations at any depth.
+        """
+        if stage < 0:
+            stage = len(self.stages) + stage
+        if not 0 <= stage < len(self.stages):
+            raise IndexError(
+                f"stage {stage} out of range for cascade with {len(self.stages)} stages"
+            )
+        h = np.asarray(x, dtype=np.float32)
+        for s in self.stages[: stage + 1]:
+            h = s.predict(h)
+        return h
+    def all_intermediate_outputs(self, x: np.ndarray) -> List[np.ndarray]:
+        """Return outputs at every stage boundary (including input)."""
+        outputs = [np.asarray(x, dtype=np.float32)]
+        h = outputs[0]
+        for stage in self.stages:
+            h = stage.predict(h)
+            outputs.append(h)
+        return outputs
+    # ── training ──────────────────────────────────────────���──────────────────
+    def fit(
+        self,
+        x: np.ndarray,
+        y: np.ndarray,
+        epochs: int = 100,
+        batch_size: int = 32,
+        shuffle: bool = True,
+        patience: int = 20,
+        divergence_threshold: float = 100.0,
+        log_every: int = 10,
+        callback=None,
+    ) -> List[float]:
+        """
+        End-to-end training with gradient cascade.
+        Gradient of the output loss flows backward from the last stage to
+        the first using ``compute_cascade_updates()``.  Frozen stages skip
+        weight updates but still propagate gradients.
+        Parameters
+        ----------
+        x, y : ndarray  (N, input_dim) and (N, output_dim)
+        epochs : int
+        batch_size : int
+        shuffle : bool
+        patience : int   — epochs without improvement before early stopping
+        divergence_threshold : float
+        log_every : int  — print loss every N epochs (0 = silent)
+        callback : callable(epoch, loss), optional
+        Returns
+        -------
+        loss_history : List[float]
+        """
+        x = np.asarray(x, dtype=np.float32)
+        y = np.asarray(y, dtype=np.float32)
+        N = x.shape[0]
+        best_loss = float("inf")
+        patience_counter = 0
+        for epoch in range(epochs):
+            idx = np.arange(N)
+            if shuffle:
+                np.random.shuffle(idx)
+            x_s, y_s = x[idx], y[idx]
+            epoch_losses: List[float] = []
+            for start in range(0, N, batch_size):
+                xb = x_s[start : start + batch_size]
+                yb = y_s[start : start + batch_size]
+                loss = self._endtoend_step(xb, yb)
+                epoch_losses.append(loss)
+            mean_loss = float(np.mean(epoch_losses))
+            self._loss_history.append(mean_loss)
+            if mean_loss > divergence_threshold:
+                print(
+                    f"  ⚠ CASCADE DIVERGENCE at epoch {epoch}: "
+                    f"loss={mean_loss:.6f} > {divergence_threshold}. Halting."
+                )
+                break
+            if patience > 0:
+                if mean_loss < best_loss:
+                    best_loss = mean_loss
+                    patience_counter = 0
+                else:
+                    patience_counter += 1
+                    if patience_counter >= patience:
+                        print(
+                            f"  ⓘ Cascade early stopping at epoch {epoch}: "
+                            f"no improvement for {patience} epochs. "
+                            f"Best loss: {best_loss:.6f}"
+                        )
+                        break
+            if log_every > 0 and epoch % log_every == 0:
+                print(f"  [cascade] epoch {epoch:>5}  loss={mean_loss:.6f}")
+            if callback is not None:
+                callback(epoch, mean_loss)
+        return list(self._loss_history)
+    def staged_fit(
+        self,
+        x: np.ndarray,
+        intermediate_targets: List[np.ndarray],
+        epochs: int = 100,
+        batch_size: int = 32,
+        patience: int = 20,
+        log_every: int = 10,
+    ) -> List[List[float]]:
+        """
+        Per-stage independent training.
+        ``intermediate_targets[k]`` is the target for stage k.  The last
+        entry is the final output target.  Use when you have supervision at
+        every stage (multi-task / layerwise pre-training).
+        Returns
+        -------
+        histories : List[List[float]] — one loss history per stage
+        """
+        if len(intermediate_targets) != len(self.stages):
+            raise ValueError(
+                f"Need one target per stage: got {len(intermediate_targets)} "
+                f"targets for {len(self.stages)} stages."
+            )
+        histories: List[List[float]] = []
+        h = np.asarray(x, dtype=np.float32)
+        for k, (stage, target) in enumerate(zip(self.stages, intermediate_targets)):
+            if self.frozen[k]:
+                # Still run forward to get output for the next stage
+                h = stage.predict(h)
+                histories.append([])
+                continue
+            target_k = np.asarray(target, dtype=np.float32)
+            def _log(epoch, loss, k=k, log_every=log_every):
+                if log_every > 0 and epoch % log_every == 0:
+                    print(f"  [cascade stage {k}] epoch {epoch:>5}  loss={loss:.6f}")
+            stage.fit(
+                h, target_k,
+                epochs=epochs,
+                batch_size=batch_size,
+                patience=patience,
+                callback=_log,
+            )
+            histories.append(list(stage._loss_history))
+            h = stage.predict(h)  # pass output to next stage
+        return histories
+    # ── internal ─────────────────────────────────────────────────────────���───
+    def _endtoend_step(self, x_batch: np.ndarray, y_batch: np.ndarray) -> float:
+        """Single mini-batch end-to-end update pass."""
+        # ── Forward: collect states ────────────────────────────────────────
+        stage_states: List[Tuple] = []  # (h_in, y_pred, state)
+        h = x_batch
+        for stage in self.stages:
+            y_pred, state = stage.net.forward(h)
+            state.y_pred = y_pred
+            state.x_batch = h
+            state.loss_history = stage._loss_history[-100:]
+            stage_states.append((h, y_pred, state))
+            h = y_pred
+        # ── Backward: gradient cascade ────────────────────────────────────
+        output_delta: Optional[np.ndarray] = None  # None → last stage uses y_batch
+        pending_updates: List[Optional[Tuple]] = [None] * len(self.stages)
+        for k in range(len(self.stages) - 1, -1, -1):
+            h_in, y_pred, state = stage_states[k]
+            trainer = self.stages[k].trainer
+            if k == len(self.stages) - 1:
+                # Last stage: set real target so trainer can compute delta
+                state.y_batch = y_batch
+            else:
+                # Earlier stage: inject a virtual target that makes the
+                # trainer reproduce the incoming output_delta.
+                # (2/N)(y_pred − y_virtual) = output_delta
+                # → y_virtual = y_pred − (N/2) * output_delta
+                batch_size = y_pred.shape[0]
+                if output_delta is not None:
+                    state.y_batch = y_pred - (batch_size / 2.0) * output_delta
+                else:
+                    state.y_batch = y_pred  # zero error (should not happen)
+            dW, db, input_grad = trainer.compute_cascade_updates(
+                state, output_delta=output_delta
+            )
+            pending_updates[k] = (dW, db)
+            output_delta = input_grad  # flow backward to previous stage
+        # ── Apply updates to non-frozen stages ────────────────────────────
+        for k, upd in enumerate(pending_updates):
+            if upd is not None and not self.frozen[k]:
+                dW, db = upd
+                self.stages[k].net.apply_updates(dW, db)
+        # Final output loss for monitoring
+        final_pred = stage_states[-1][1]
+        return float(np.mean((final_pred - y_batch) ** 2))
+    # ── persistence ──────────────────────────────────────────────────────────
+    def save(self, path: str) -> None:
+        """Pickle the full cascade to disk."""
+        with open(path, "wb") as f:
+            pickle.dump(self, f)
+    @classmethod
+    def load(cls, path: str) -> "CascadeSTANNO":
+        """Load a CascadeSTANNO from disk."""
+        with open(path, "rb") as f:
+            obj = pickle.load(f)
+        if not isinstance(obj, cls):
+            raise TypeError(f"Expected CascadeSTANNO, got {type(obj)}")
+        return obj
+    # ── introspection ─────────────────────────────────────────────────────────
+    def __len__(self) -> int:
+        return len(self.stages)
+    def __getitem__(self, idx: int) -> STANNO:
+        return self.stages[idx]
+    def __repr__(self) -> str:
+        stage_strs = []
+        for i, (s, f) in enumerate(zip(self.stages, self.frozen)):
+            layers = "→".join(str(d) for d in s.config.layers)
+            tag = " [frozen]" if f else ""
+            stage_strs.append(f"  {i}: [{layers}]{tag}")
+        return "CascadeSTANNO(\n" + "\n".join(stage_strs) + "\n)"

stanno/integration/continual.py ADDED Viewed

	@@ -0,0 +1,109 @@

+"""
+ContinualSTANNO — online / continual learning wrapper.
+Provides a one-sample-at-a-time observe() API and a held-out test_loss()
+evaluator for monitoring concept drift, both powered by the same
+STANNO._trainer_step() machinery.
+"""
+from __future__ import annotations
+from typing import List, Optional, Tuple
+import numpy as np
+from stanno.core.stanno import STANNO
+from stanno.config.schema import STANNOConfig
+class ContinualSTANNO:
+    """
+    Thin wrapper enabling online / continual learning on a STANNO.
+    Parameters
+    ----------
+    stanno : STANNO
+        A (possibly pre-trained) STANNO instance.
+    """
+    def __init__(self, stanno: STANNO) -> None:
+        self.stanno = stanno
+        self._step_count: int = 0
+        self._recent_losses: List[float] = []
+    # ── online update ─────────────────────────────────────────────────────────
+    def observe(
+        self,
+        x: np.ndarray,
+        y_true: np.ndarray,
+    ) -> float:
+        """
+        One online update step.
+        Parameters
+        ----------
+        x : array (1, input_dim) or (input_dim,)
+        y_true : array (1, output_dim) or (output_dim,)
+        Returns
+        -------
+        loss : float — MSE on this sample before the update.
+        """
+        x = np.asarray(x, dtype=np.float32).reshape(1, -1)
+        y = np.asarray(y_true, dtype=np.float32).reshape(1, -1)
+        loss = self.stanno._trainer_step(x, y)
+        self._step_count += 1
+        self._recent_losses.append(loss)
+        if len(self._recent_losses) > 1000:
+            self._recent_losses.pop(0)
+        return loss
+    # ── evaluation ────────────────────────────────────────────────────────────
+    def test_loss(
+        self,
+        x_test: np.ndarray,
+        y_test: np.ndarray,
+        batch_size: int = 256,
+    ) -> float:
+        """
+        Compute mean MSE on held-out data without updating any weights.
+        Parameters
+        ----------
+        x_test, y_test : arrays of shape (N, *)
+        batch_size : int — avoids OOM for large arrays.
+        Returns
+        -------
+        mean MSE : float
+        """
+        x_test = np.asarray(x_test, dtype=np.float32)
+        y_test = np.asarray(y_test, dtype=np.float32)
+        losses: List[float] = []
+        for start in range(0, len(x_test), batch_size):
+            xb = x_test[start: start + batch_size]
+            yb = y_test[start: start + batch_size]
+            y_pred = self.stanno.predict(xb)
+            losses.append(float(np.mean((y_pred - yb) ** 2)))
+        return float(np.mean(losses))
+    # ── convenience ───────────────────────────────────────────────────────────
+    @property
+    def steps(self) -> int:
+        """Number of observe() calls so far."""
+        return self._step_count
+    @property
+    def recent_loss(self) -> float:
+        """Mean loss over the last ≤1000 steps."""
+        if not self._recent_losses:
+            return float("nan")
+        return float(np.mean(self._recent_losses))
+    def __repr__(self) -> str:
+        return (
+            f"ContinualSTANNO(steps={self._step_count}, "
+            f"recent_loss={self.recent_loss:.5f})"
+        )

stanno/integration/dsanno.py ADDED Viewed

	@@ -0,0 +1,389 @@

+"""
+DSANNO — Data Scanning Artificial Neural Network Object.
+Implements the DSANNO concept from US5852815A (Thaler, 1998):
+    "A DSANNO scans large regions of the data space, looking for patterns
+     or relationships that match its learned internal representations."
+Modern reinterpretation
+───────────────────────
+In the patent, "scanning the data space" meant traversing spreadsheet cell
+ranges.  In a modern context the data space is a NumPy array, a pandas
+DataFrame, or any stream of observations.
+DSANNO wraps a trained STANNO and turns it into a *semantic scanner*:
+  • Low reconstruction error  → the STANNO recognises this row (good match)
+  • High reconstruction error → the row is outside the STANNO's domain
+Two canonical use-cases:
+  1. **Reconstruction / autoencoder mode**
+     The STANNO is trained to reconstruct its own input (layers[-1] == layers[0]).
+     DSANNO scores rows by ``||x - STANNO(x)||²``.
+  2. **Regression / classifier mode**
+     The STANNO is trained on (x, y_known).  DSANNO scores rows by
+     ``||y_known - STANNO(x)||²``.  Rows with low score are those the
+     model "knows about".
+Applications
+────────────
+  - Dataset curation: find which rows of a large table match the model's domain.
+  - Anomaly stream scanning: flag rows that are outside the STANNO's distribution.
+  - Semantic retrieval: return the top-k rows most similar to what the model learned.
+  - Cascaded filtering: chain DSANNOs to progressively narrow a large dataset.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Generator, Iterable, List, Optional, Tuple
+import numpy as np
+from stanno.core.stanno import STANNO
+# ─── result type ─────────────────────────────────────────────────────────────
+@dataclass
+class ScanResult:
+    """
+    Result of a DSANNO scan over a dataset.
+    Attributes
+    ----------
+    indices : ndarray (N,)
+        Original row indices (0-based) of the scanned data.
+    scores : ndarray (N,)
+        Reconstruction / prediction error per row (lower = better match).
+    predictions : ndarray (N, output_dim)
+        Raw STANNO output for each row.
+    threshold : float or None
+        The threshold used to generate ``matched_mask`` (if set_threshold was
+        called or a threshold was passed to scan()).
+    matched_mask : ndarray (N,) bool or None
+        True for rows where score <= threshold.
+    """
+    indices: np.ndarray
+    scores: np.ndarray
+    predictions: np.ndarray
+    threshold: Optional[float] = None
+    matched_mask: Optional[np.ndarray] = None
+    # ── convenience accessors ────────────────────────────────────────────────
+    def matched_indices(self) -> np.ndarray:
+        """Indices of rows that passed the match threshold."""
+        if self.matched_mask is None:
+            raise RuntimeError("No threshold set — call scan(threshold=…) or set_threshold()")
+        return self.indices[self.matched_mask]
+    def top_k_indices(self, k: int) -> np.ndarray:
+        """Indices of the k lowest-score (best-match) rows."""
+        order = np.argsort(self.scores)
+        return self.indices[order[:k]]
+    def top_k(self, k: int) -> Tuple[np.ndarray, np.ndarray]:
+        """Return (indices, scores) of the k best-matching rows."""
+        order = np.argsort(self.scores)[:k]
+        return self.indices[order], self.scores[order]
+    def set_threshold(self, threshold: float) -> "ScanResult":
+        """Apply a threshold and populate matched_mask.  Returns self."""
+        self.threshold = threshold
+        self.matched_mask = self.scores <= threshold
+        return self
+    def __len__(self) -> int:
+        return len(self.indices)
+    def __repr__(self) -> str:
+        n = len(self.indices)
+        mn = f"{self.scores.min():.4f}" if n else "—"
+        mx = f"{self.scores.max():.4f}" if n else "—"
+        matched = (
+            f"  matched={self.matched_mask.sum()}" if self.matched_mask is not None else ""
+        )
+        return f"ScanResult(n={n}, score_range=[{mn}, {mx}]{matched})"
+# ─── DSANNO ──────────────────────────────────────────────────────────────────
+class DSANNO:
+    """
+    Data Scanning Artificial Neural Network Object.
+    Wraps a trained STANNO and scans datasets to find rows that match its
+    learned representation.
+    Parameters
+    ----------
+    stanno : STANNO
+        A trained STANNO instance.
+    mode : {"reconstruction", "prediction"}
+        Scoring mode:
+        - ``"reconstruction"`` — compare STANNO(x) to x itself.
+          Requires output_dim == input_dim.
+        - ``"prediction"`` — compare STANNO(x) to an externally provided y.
+          Pass y_known to scan() / score_batch().
+    score_fn : callable(y_pred, y_ref) → float, optional
+        Custom per-row scoring function.  Default: mean squared error.
+    Example
+    -------
+    >>> # Reconstruction scanner (autoencoder mode)
+    >>> ae = STANNO(STANNOConfig(layers=[16, 8, 16]))
+    >>> ae.fit(x_normal, x_normal)
+    >>> scanner = DSANNO(ae, mode="reconstruction")
+    >>> result = scanner.scan(x_large_dataset, threshold=0.05)
+    >>> normal_rows = result.matched_indices()
+    """
+    def __init__(
+        self,
+        stanno: STANNO,
+        mode: str = "reconstruction",
+        score_fn=None,
+    ) -> None:
+        if mode not in ("reconstruction", "prediction"):
+            raise ValueError(
+                f"mode must be 'reconstruction' or 'prediction', got {mode!r}"
+            )
+        self.stanno = stanno
+        self.mode = mode
+        self._score_fn = score_fn or self._default_score
+    # ── core scoring ─────────────────────────────────────────────────────────
+    @staticmethod
+    def _default_score(y_pred: np.ndarray, y_ref: np.ndarray) -> np.ndarray:
+        """Mean squared error per row — shape (N,)."""
+        return np.mean((y_pred - y_ref) ** 2, axis=1)
+    def score_row(
+        self,
+        x: np.ndarray,
+        y_ref: Optional[np.ndarray] = None,
+    ) -> float:
+        """
+        Score a single row.
+        Parameters
+        ----------
+        x : array (input_dim,) or (1, input_dim)
+        y_ref : array (output_dim,) or (1, output_dim), optional
+            Required in 'prediction' mode.
+        Returns
+        -------
+        score : float — lower means the STANNO recognises this row.
+        """
+        x = np.asarray(x, dtype=np.float32).reshape(1, -1)
+        y_pred = self.stanno.predict(x)
+        if self.mode == "reconstruction":
+            y_ref_arr = x
+        else:
+            if y_ref is None:
+                raise ValueError("y_ref required in 'prediction' mode")
+            y_ref_arr = np.asarray(y_ref, dtype=np.float32).reshape(1, -1)
+        return float(self._score_fn(y_pred, y_ref_arr)[0])
+    def score_batch(
+        self,
+        x: np.ndarray,
+        y_ref: Optional[np.ndarray] = None,
+        batch_size: int = 256,
+    ) -> np.ndarray:
+        """
+        Score an array of rows efficiently in batches.
+        Parameters
+        ----------
+        x : ndarray (N, input_dim)
+        y_ref : ndarray (N, output_dim), optional — required for 'prediction' mode
+        batch_size : int
+        Returns
+        -------
+        scores : ndarray (N,) — per-row reconstruction / prediction error
+        """
+        x = np.asarray(x, dtype=np.float32)
+        N = x.shape[0]
+        all_scores: List[np.ndarray] = []
+        all_preds: List[np.ndarray] = []
+        for start in range(0, N, batch_size):
+            xb = x[start : start + batch_size]
+            y_pred = self.stanno.predict(xb)
+            if self.mode == "reconstruction":
+                ref = xb
+            else:
+                if y_ref is None:
+                    raise ValueError("y_ref required in 'prediction' mode")
+                ref = np.asarray(y_ref[start : start + batch_size], dtype=np.float32)
+            all_scores.append(self._score_fn(y_pred, ref))
+            all_preds.append(y_pred)
+        return np.concatenate(all_scores), np.concatenate(all_preds)
+    # ── scan ─────────────────────────────────────────────────────────────────
+    def scan(
+        self,
+        x: np.ndarray,
+        y_ref: Optional[np.ndarray] = None,
+        threshold: Optional[float] = None,
+        batch_size: int = 256,
+    ) -> ScanResult:
+        """
+        Scan a dataset and return a ScanResult.
+        Parameters
+        ----------
+        x : ndarray (N, input_dim)
+        y_ref : ndarray (N, output_dim), optional — required for 'prediction' mode
+        threshold : float, optional — rows with score <= threshold are flagged
+        batch_size : int
+        Returns
+        -------
+        ScanResult
+        """
+        scores, preds = self.score_batch(x, y_ref=y_ref, batch_size=batch_size)
+        indices = np.arange(len(scores))
+        result = ScanResult(
+            indices=indices,
+            scores=scores,
+            predictions=preds,
+        )
+        if threshold is not None:
+            result.set_threshold(threshold)
+        return result
+    def top_k(
+        self,
+        x: np.ndarray,
+        k: int = 10,
+        y_ref: Optional[np.ndarray] = None,
+        batch_size: int = 256,
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Return the k rows that best match the STANNO's learned representation.
+        Returns
+        -------
+        indices : ndarray (k,)  — original row indices
+        scores  : ndarray (k,)  — lowest error first
+        preds   : ndarray (k, output_dim)
+        """
+        scores, preds = self.score_batch(x, y_ref=y_ref, batch_size=batch_size)
+        order = np.argsort(scores)[:k]
+        return order, scores[order], preds[order]
+    def find_matches(
+        self,
+        x: np.ndarray,
+        threshold: float,
+        y_ref: Optional[np.ndarray] = None,
+        batch_size: int = 256,
+    ) -> np.ndarray:
+        """
+        Return a boolean mask of rows whose score is at or below threshold.
+        Parameters
+        ----------
+        x : ndarray (N, input_dim)
+        threshold : float — score cutoff (inclusive)
+        y_ref : ndarray (N, output_dim), optional
+        batch_size : int
+        Returns
+        -------
+        mask : ndarray (N,) bool
+        """
+        scores, _ = self.score_batch(x, y_ref=y_ref, batch_size=batch_size)
+        return scores <= threshold
+    # ── streaming scan ───────────────────────────────────────────────────────
+    def scan_stream(
+        self,
+        batches: Iterable[np.ndarray],
+        threshold: Optional[float] = None,
+        y_ref_batches: Optional[Iterable[np.ndarray]] = None,
+    ) -> Generator[ScanResult, None, None]:
+        """
+        Scan an iterable of batches lazily — suitable for large files or live feeds.
+        Parameters
+        ----------
+        batches : iterable of ndarray (batch, input_dim)
+        threshold : float, optional
+        y_ref_batches : iterable of ndarray (batch, output_dim), optional
+        Yields
+        ------
+        ScanResult for each incoming batch (with offset indices)
+        """
+        offset = 0
+        ref_iter = iter(y_ref_batches) if y_ref_batches is not None else None
+        for xb in batches:
+            xb = np.asarray(xb, dtype=np.float32)
+            y_ref_b = next(ref_iter) if ref_iter is not None else None
+            y_pred = self.stanno.predict(xb)
+            if self.mode == "reconstruction":
+                ref = xb
+            else:
+                if y_ref_b is None:
+                    raise ValueError("y_ref_batches required in 'prediction' mode")
+                ref = np.asarray(y_ref_b, dtype=np.float32)
+            scores = self._score_fn(y_pred, ref)
+            indices = np.arange(offset, offset + len(xb))
+            result = ScanResult(indices=indices, scores=scores, predictions=y_pred)
+            if threshold is not None:
+                result.set_threshold(threshold)
+            yield result
+            offset += len(xb)
+    # ── calibration ──────────────────────────────────────────────────────────
+    def calibrate_threshold(
+        self,
+        x_known: np.ndarray,
+        percentile: float = 95.0,
+        y_ref: Optional[np.ndarray] = None,
+        batch_size: int = 256,
+    ) -> float:
+        """
+        Estimate a threshold from known in-distribution data.
+        Scores ``x_known`` and returns the value at ``percentile``-th percentile.
+        Use the result as the ``threshold`` in subsequent scan() calls.
+        Parameters
+        ----------
+        x_known : ndarray (N, input_dim) — in-distribution examples
+        percentile : float — e.g. 95 means "flag the 5% worst reconstructions"
+        y_ref : ndarray (N, output_dim), optional — required for 'prediction' mode
+        Returns
+        -------
+        threshold : float
+        """
+        scores, _ = self.score_batch(x_known, y_ref=y_ref, batch_size=batch_size)
+        return float(np.percentile(scores, percentile))
+    # ── repr ─────────────────────────────────────────────────────────────────
+    def __repr__(self) -> str:
+        layers = "→".join(str(d) for d in self.stanno.config.layers)
+        return f"DSANNO(mode={self.mode!r}, stanno=[{layers}])"

stanno/integration/filter.py ADDED Viewed

	@@ -0,0 +1,158 @@

+"""
+STANNOFilter — anomaly-based pre-classifier / gating layer for LLM pipelines.
+How it works
+────────────
+1. Encode the incoming prompt/context as a numeric vector x (caller's job).
+2. Run STANNO.predict(x) and compare to the expected reconstruction y_true.
+3. The MSE of this comparison is the "anomaly score".
+4. Normalise to [0, 1] using the training MSE baseline.
+5. If score > threshold AND block_above_threshold is True, raise an exception
+   (or return a blocked sentinel) instead of forwarding to the LLM.
+For a typical autoencoder use-case (embedding reconstruction), a high MSE
+means the input is out-of-distribution — the STANNO never learned it.
+"""
+from __future__ import annotations
+from typing import Any, Dict, List, Optional, Tuple
+import numpy as np
+from stanno.config.schema import FilterConfig
+from stanno.core.stanno import STANNO
+class FilteredRequestError(Exception):
+    """Raised when a request is blocked by the STANNO anomaly filter."""
+class STANNOFilter:
+    """
+    Anomaly-based pre-filter / pre-classifier for LLM pipelines.
+    Parameters
+    ----------
+    stanno : STANNO
+        A trained STANNO instance used for anomaly scoring.
+    filter_config : FilterConfig
+        Threshold and blocking configuration.
+    llm_client : LLMClient, optional
+        If provided, filter_and_send() will call it after the check.
+    """
+    def __init__(
+        self,
+        stanno: STANNO,
+        filter_config: FilterConfig,
+        llm_client=None,
+    ) -> None:
+        self.stanno = stanno
+        self.config = filter_config
+        self.llm_client = llm_client
+    # ── scoring ───────────────────────────────────────────────────────────────
+    def score(
+        self,
+        x: np.ndarray,
+        y_true: Optional[np.ndarray] = None,
+    ) -> Tuple[float, Dict[str, Any]]:
+        """
+        Compute an anomaly score for input x.
+        If y_true is None the STANNO output is compared against x itself
+        (autoencoder mode: expects input_dim == output_dim, or the caller
+        passes the expected reconstruction explicitly).
+        Returns
+        -------
+        score : float in [0.0, 1.0]
+        metadata : dict with raw_mse, norm_baseline, threshold, blocked
+        """
+        x = np.asarray(x, dtype=np.float32)
+        if x.ndim == 1:
+            x = x.reshape(1, -1)
+        y_pred = self.stanno.predict(x)
+        if y_true is None:
+            # autoencoder: reconstruct x
+            if self.stanno.net.output_dim == self.stanno.net.input_dim:
+                y_true = x
+            else:
+                raise ValueError(
+                    "y_true must be provided when output_dim != input_dim."
+                )
+        else:
+            y_true = np.asarray(y_true, dtype=np.float32)
+            if y_true.ndim == 1:
+                y_true = y_true.reshape(1, -1)
+        raw_mse = float(np.mean((y_pred - y_true) ** 2))
+        # Normalise: use training MSE baseline if available
+        baseline = self.stanno._train_mse_norm or 1.0
+        score = float(min(1.0, raw_mse / max(baseline, 1e-8)))
+        metadata = {
+            "raw_mse": raw_mse,
+            "norm_baseline": baseline,
+            "threshold": self.config.anomaly_threshold,
+            "score": score,
+            "blocked": score > self.config.anomaly_threshold
+                       and self.config.block_above_threshold,
+        }
+        return score, metadata
+    # ── filter + forward ──────────────────────────────────────────────────────
+    def filter_and_send(
+        self,
+        messages: List[Dict[str, str]],
+        x: np.ndarray,
+        y_true: Optional[np.ndarray] = None,
+        **llm_kwargs: Any,
+    ) -> Dict[str, Any]:
+        """
+        Score x, optionally block the request, then send to the LLM.
+        Parameters
+        ----------
+        messages : list of {"role": ..., "content": ...}
+        x : numeric representation of the input (embedding, features, etc.)
+        y_true : expected reconstruction, or None for autoencoder mode
+        **llm_kwargs : forwarded to llm_client.chat_completion()
+        Returns
+        -------
+        LLM response dict, enriched with "stanno_filter" metadata key.
+        Raises
+        ------
+        FilteredRequestError
+            When score > threshold and block_above_threshold is True.
+        """
+        score, metadata = self.score(x, y_true)
+        if metadata["blocked"]:
+            raise FilteredRequestError(
+                f"Request blocked by STANNO filter: "
+                f"anomaly_score={score:.4f} > threshold={self.config.anomaly_threshold:.4f}"
+            )
+        if self.llm_client is None:
+            raise RuntimeError(
+                "No llm_client configured on this STANNOFilter. "
+                "Pass one at construction time."
+            )
+        response = self.llm_client.chat_completion(messages, **llm_kwargs)
+        response[self.config.metadata_field] = metadata
+        return response
+    def __repr__(self) -> str:
+        return (
+            f"STANNOFilter(threshold={self.config.anomaly_threshold}, "
+            f"block={self.config.block_above_threshold})"
+        )

stanno/integration/llm_client.py ADDED Viewed

	@@ -0,0 +1,123 @@

+"""
+LLM HTTP client.
+Thin wrapper around httpx that talks to any OpenAI-compatible endpoint.
+Credentials are read from environment variables (priority) or LLMConfig fields:
+  STANNO_LLM_BASE_URL  — e.g. http://localhost:11434
+  STANNO_LLM_API_KEY   — Bearer token (use "ollama" for Ollama)
+  STANNO_LLM_MODEL     — e.g. llama3.2:3b
+"""
+from __future__ import annotations
+import os
+from typing import Any, Dict, List, Optional
+from stanno.config.schema import LLMConfig
+class LLMClient:
+    """
+    Minimal OpenAI-compatible chat client.
+    Parameters
+    ----------
+    config : LLMConfig
+        Configuration object.  Environment variables override field values.
+    """
+    def __init__(self, config: LLMConfig) -> None:
+        try:
+            import httpx
+        except ImportError as exc:
+            raise ImportError(
+                "httpx is required for LLM integration.  "
+                "Install it with: pip install stanno[llm]"
+            ) from exc
+        self._base_url: str = (
+            os.environ.get("STANNO_LLM_BASE_URL") or config.base_url
+        ).rstrip("/")
+        self._api_key: str = (
+            os.environ.get("STANNO_LLM_API_KEY") or config.api_key or "none"
+        )
+        self._model: str = (
+            os.environ.get("STANNO_LLM_MODEL") or config.model
+        )
+        self._temperature: float = config.temperature
+        self._max_tokens: int = config.max_tokens
+        self._timeout: float = float(config.timeout_seconds)
+        self._http = httpx.Client(timeout=self._timeout)
+    # ── public API ────────────────────────────────────────────────────────────
+    def chat_completion(
+        self,
+        messages: List[Dict[str, str]],
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        """
+        Send a chat completion request.
+        Parameters
+        ----------
+        messages : list of {"role": str, "content": str}
+        **kwargs : overrides for temperature, max_tokens, model, etc.
+        Returns
+        -------
+        Full response dict (OpenAI format).
+        """
+        payload: Dict[str, Any] = {
+            "model": kwargs.pop("model", self._model),
+            "messages": messages,
+            "temperature": kwargs.pop("temperature", self._temperature),
+            "max_tokens": kwargs.pop("max_tokens", self._max_tokens),
+            **kwargs,
+        }
+        headers = {"Authorization": f"Bearer {self._api_key}"}
+        response = self._http.post(
+            f"{self._base_url}/v1/chat/completions",
+            json=payload,
+            headers=headers,
+        )
+        response.raise_for_status()
+        return response.json()
+    def complete(
+        self,
+        prompt: str,
+        system: Optional[str] = None,
+        **kwargs: Any,
+    ) -> str:
+        """
+        Convenience wrapper.  Returns just the assistant's reply text.
+        Parameters
+        ----------
+        prompt : str
+            User message.
+        system : str, optional
+            System prompt.
+        """
+        messages: List[Dict[str, str]] = []
+        if system:
+            messages.append({"role": "system", "content": system})
+        messages.append({"role": "user", "content": prompt})
+        result = self.chat_completion(messages, **kwargs)
+        return result["choices"][0]["message"]["content"]
+    def close(self) -> None:
+        """Release underlying HTTP connection pool."""
+        self._http.close()
+    def __enter__(self) -> "LLMClient":
+        return self
+    def __exit__(self, *args: Any) -> None:
+        self.close()
+    def __repr__(self) -> str:
+        return (
+            f"LLMClient(base_url={self._base_url!r}, "
+            f"model={self._model!r})"
+        )

stanno/trainers/__init__.py ADDED Viewed

File without changes

stanno/trainers/evolutionary.py ADDED Viewed

	@@ -0,0 +1,189 @@

+"""
+EvolutionaryTrainerNet — ES-based trainer, no autodiff required.
+Strategy
+────────
+The trainer maintains a set of per-layer learning-rate multipliers (the
+"genome").  At each meta-training step it generates a population of
+perturbed genomes, evaluates each by running K training steps on a task,
+and updates the genome toward the perturbations with the best fitness
+(lowest post-update test loss) using the OpenAI ES gradient estimator.
+During normal training (compute_updates) it delegates the gradient
+computation to FixedTrainerNet and then scales the updates per-layer using
+the evolved multipliers.  Before any meta-training the multipliers are all
+1.0, so behaviour is identical to FixedTrainerNet — making this a safe,
+always-functional fallback.
+"""
+from __future__ import annotations
+from typing import List, Optional, Tuple
+import numpy as np
+from stanno.core.trainer import AbstractTrainerNet, TraineeState
+from stanno.trainers.fixed import FixedTrainerNet
+class EvolutionaryTrainerNet(AbstractTrainerNet):
+    """
+    ES-based trainer with per-layer learning-rate multipliers.
+    Parameters
+    ----------
+    learning_rate : float
+        Base learning rate (passed to the internal FixedTrainerNet).
+    pop_size : int
+        ES population size.
+    sigma : float
+        ES perturbation standard deviation.
+    """
+    def __init__(
+        self,
+        learning_rate: float = 0.01,
+        pop_size: int = 20,
+        sigma: float = 0.05,
+        seed: Optional[int] = None,
+    ) -> None:
+        self.learning_rate = learning_rate
+        self.pop_size = pop_size
+        self.sigma = sigma
+        self._rng = np.random.default_rng(seed)
+        self._fixed = FixedTrainerNet(learning_rate=learning_rate)
+        # genome: per-layer scale factors, initialised to 1.0
+        # shape determined lazily on first compute_updates call
+        self._layer_scales: Optional[np.ndarray] = None
+    # ── compute_updates ───────────────────────────────────────────────────────
+    def compute_updates(
+        self, state: TraineeState
+    ) -> Tuple[List[np.ndarray], List[np.ndarray]]:
+        """
+        Delegate gradient computation to FixedTrainerNet, then scale
+        per-layer using evolved multipliers.
+        """
+        n = len(state.weights)
+        # Lazy initialisation of scales (depends on number of layers)
+        if self._layer_scales is None or len(self._layer_scales) != n:
+            self._layer_scales = np.ones(n, dtype=np.float32)
+        weight_deltas, bias_deltas = self._fixed.compute_updates(state)
+        for i, scale in enumerate(self._layer_scales):
+            weight_deltas[i] *= scale
+            bias_deltas[i] *= scale
+        return weight_deltas, bias_deltas
+    # ── meta_train ────────────────────────────────────────────────────────────
+    def meta_train(
+        self,
+        tasks,
+        k_steps: int = 10,
+        n_iterations: int = 50,
+    ) -> None:
+        """
+        Evolve per-layer learning-rate multipliers to minimise post-update loss.
+        Each task must be a dict:
+            {"x_train": ndarray, "y_train": ndarray,
+             "x_test":  ndarray, "y_test":  ndarray,
+             "stanno_config": STANNOConfig}   ← optional
+        """
+        if not tasks:
+            return
+        # Infer n_layers from the first task's config or default
+        n_layers = self._infer_n_layers(tasks)
+        if self._layer_scales is None or len(self._layer_scales) != n_layers:
+            self._layer_scales = np.ones(n_layers, dtype=np.float32)
+        params = self._layer_scales.copy()
+        for iteration in range(n_iterations):
+            perturbations = self._rng.normal(0.0, self.sigma, (self.pop_size, n_layers)).astype(np.float32)
+            fitnesses = np.zeros(self.pop_size, dtype=np.float32)
+            for p_idx, pert in enumerate(perturbations):
+                candidate_scales = np.clip(params + pert, 0.0, 10.0)
+                loss = self._evaluate_tasks(tasks, k_steps, candidate_scales)
+                fitnesses[p_idx] = -loss   # negate: higher = better
+            # Normalise fitness
+            std = fitnesses.std()
+            if std > 1e-8:
+                w = (fitnesses - fitnesses.mean()) / std
+            else:
+                w = np.zeros_like(fitnesses)
+            # ES gradient estimate
+            grad = (perturbations * w[:, np.newaxis]).mean(axis=0)
+            params = np.clip(params + self.sigma * grad, 0.0, 10.0)
+            if (iteration + 1) % 10 == 0:
+                best_loss = self._evaluate_tasks(tasks, k_steps, params)
+                print(f"[ES meta-train] iter {iteration+1:3d}  loss={best_loss:.5f}")
+        self._layer_scales = params
+    # ── helpers ───────────────────────────────────────────────────────────────
+    def _infer_n_layers(self, tasks) -> int:
+        from stanno.config.schema import STANNOConfig
+        cfg = tasks[0].get("stanno_config")
+        if cfg is not None:
+            return len(cfg.layers) - 1
+        # Fallback: infer from data dimensionality
+        x = tasks[0]["x_train"]
+        y = tasks[0]["y_train"]
+        return 2  # default 2-layer net
+    def _evaluate_tasks(
+        self,
+        tasks,
+        k_steps: int,
+        layer_scales: np.ndarray,
+    ) -> float:
+        """Clone self with candidate scales and evaluate mean test loss."""
+        from stanno.config.schema import STANNOConfig
+        from stanno.core.stanno import STANNO
+        import copy
+        candidate = copy.deepcopy(self)
+        candidate._layer_scales = layer_scales
+        total_loss = 0.0
+        for task in tasks:
+            x_dim = task["x_train"].shape[1] if task["x_train"].ndim > 1 else 1
+            y_dim = task["y_train"].shape[1] if task["y_train"].ndim > 1 else 1
+            cfg = task.get(
+                "stanno_config",
+                STANNOConfig(layers=[x_dim, 32, y_dim]),
+            )
+            stanno = STANNO(cfg)
+            stanno.trainer = candidate
+            stanno.fit(
+                task["x_train"],
+                task["y_train"],
+                epochs=k_steps,
+                batch_size=min(32, len(task["x_train"])),
+            )
+            preds = stanno.predict(task["x_test"])
+            total_loss += float(np.mean((preds - task["y_test"]) ** 2))
+        return total_loss / max(len(tasks), 1)
+    def __repr__(self) -> str:
+        scales = (
+            np.round(self._layer_scales, 3).tolist()
+            if self._layer_scales is not None
+            else "uninitialised"
+        )
+        return (
+            f"EvolutionaryTrainerNet(lr={self.learning_rate}, "
+            f"pop={self.pop_size}, scales={scales})"
+        )

stanno/trainers/fixed.py ADDED Viewed

	@@ -0,0 +1,180 @@

+"""
+FixedTrainerNet — patent-faithful 4-module trainer.
+Implements the four modules described in US patent 5852815A (Thaler, 1998):
+  Module 1 — Activation sensitivity:
+      How do activations vary with small changes in pre-activation inputs?
+      Answers: tanh'(z_i) = 1 − tanh(z_i)²  per hidden layer.
+  Module 2 — Activation derivatives:
+      The pre-synaptic activations feeding into each weight matrix.
+      These are activations[i] for weight W_i.
+  Module 3 — Error terms:
+      Delta (δ) signals propagated backward from the output error:
+        δ_{n-1} = (2/N) · (y_pred − y_true)          [linear output]
+        δ_i     = (δ_{i+1} @ W_{i+1}.T) · tanh'(z_i) [hidden layers]
+  Module 4 — Weight corrections:
+        ΔW_i = −lr · activations[i].T @ δ_i
+        Δb_i = −lr · Σ_batch(δ_i)
+This is mathematically identical to standard backpropagation + SGD, but
+structured as four named, encapsulated operations — making it faithful to
+the STANNO concept of a network object that trains another network object.
+"""
+from __future__ import annotations
+from typing import List, Optional, Tuple
+import numpy as np
+from stanno.core.trainer import AbstractTrainerNet, TraineeState
+class FixedTrainerNet(AbstractTrainerNet):
+    """
+    4-module trainer.  Works with any depth TraineeNet.  NumPy only.
+    Parameters
+    ----------
+    learning_rate : float
+        Step size for weight updates.
+    """
+    def __init__(self, learning_rate: float = 0.01) -> None:
+        self.learning_rate = learning_rate
+    # ── Module 1: activation sensitivity ────────────────────────────────────
+    def _module1_activation_sensitivity(
+        self, pre_activations: List[np.ndarray]
+    ) -> List[np.ndarray]:
+        """
+        Compute tanh'(z_i) = 1 − tanh(z_i)² for each hidden layer.
+        Returns a list of the same length as pre_activations.
+        For the output layer (linear) this is 1.0 everywhere, so the last
+        entry is an array of ones — no special casing needed.
+        """
+        n = len(pre_activations)
+        sensitivities: List[np.ndarray] = []
+        for i, z in enumerate(pre_activations):
+            if i < n - 1:
+                # hidden layer: tanh activation
+                sensitivities.append(1.0 - np.tanh(z) ** 2)
+            else:
+                # output layer: linear activation, derivative = 1
+                sensitivities.append(np.ones_like(z))
+        return sensitivities
+    # ── Module 2: activation derivatives (pre-synaptic values) ──────────────
+    def _module2_pre_synaptic(
+        self, activations: List[np.ndarray]
+    ) -> List[np.ndarray]:
+        """
+        Return the pre-synaptic activation for each weight matrix.
+        activations[i] feeds into W_i, so pre_synaptic[i] = activations[i].
+        """
+        return activations[:-1]   # drop the final output; len == n_weights
+    # ── Module 3: error terms ────────────────────────────────────────────────
+    def _module3_error_terms(
+        self,
+        state: TraineeState,
+        sensitivities: List[np.ndarray],
+        output_delta: Optional[np.ndarray] = None,
+    ) -> List[np.ndarray]:
+        """
+        Compute delta (δ) error signals, propagating from output to input.
+        output_delta : ndarray (batch, output_dim), optional
+            If provided, use this as the pre-computed output-layer delta
+            (e.g. gradient from a downstream stage in a CascadeSTANNO).
+            When None the delta is computed from state.y_batch as usual.
+        Returns deltas[i] for each weight layer i (0 … n-1).
+        """
+        n = len(state.weights)
+        deltas: List[np.ndarray] = [None] * n   # type: ignore[list-item]
+        batch_size = state.y_pred.shape[0]
+        if output_delta is not None:
+            deltas[n - 1] = output_delta
+        else:
+            # Output layer delta: ∂MSE/∂y_pred, times output-layer sensitivity (=1)
+            deltas[n - 1] = (2.0 / batch_size) * (state.y_pred - state.y_batch)
+        # Propagate backward through hidden layers
+        for i in range(n - 2, -1, -1):
+            propagated = deltas[i + 1] @ state.weights[i + 1].T
+            deltas[i] = propagated * sensitivities[i]
+        return deltas
+    # ── Module 4: weight corrections ─────────────────────────────────────────
+    def _module4_weight_corrections(
+        self,
+        pre_synaptic: List[np.ndarray],
+        deltas: List[np.ndarray],
+    ) -> Tuple[List[np.ndarray], List[np.ndarray]]:
+        """
+        Compute ΔW_i and Δb_i for each layer.
+        ΔW_i = −lr · pre_synaptic[i].T @ δ_i
+        Δb_i = −lr · Σ_batch(δ_i)
+        """
+        weight_deltas: List[np.ndarray] = []
+        bias_deltas: List[np.ndarray] = []
+        for pre, delta in zip(pre_synaptic, deltas):
+            weight_deltas.append(-self.learning_rate * (pre.T @ delta))
+            bias_deltas.append(-self.learning_rate * np.sum(delta, axis=0, keepdims=True))
+        return weight_deltas, bias_deltas
+    # ── public API ───────────────────────────────────────────────────────────
+    def compute_updates(
+        self, state: TraineeState
+    ) -> Tuple[List[np.ndarray], List[np.ndarray]]:
+        """Run all four modules and return (weight_deltas, bias_deltas)."""
+        sensitivities = self._module1_activation_sensitivity(state.pre_activations)
+        pre_synaptic  = self._module2_pre_synaptic(state.activations)
+        deltas        = self._module3_error_terms(state, sensitivities)
+        return self._module4_weight_corrections(pre_synaptic, deltas)
+    def compute_cascade_updates(
+        self,
+        state: TraineeState,
+        output_delta: Optional[np.ndarray] = None,
+    ) -> Tuple[List[np.ndarray], List[np.ndarray], np.ndarray]:
+        """
+        Cascade-aware variant: accepts an upstream output_delta and returns
+        the input_gradient for the preceding stage.
+        Parameters
+        ----------
+        state : TraineeState
+        output_delta : ndarray (batch, output_dim), optional
+            Gradient from the downstream stage.  None → compute from y_batch.
+        Returns
+        -------
+        weight_deltas, bias_deltas, input_gradient
+            input_gradient shape (batch, input_dim) — pass as output_delta to
+            the stage before this one in a CascadeSTANNO.
+        """
+        sensitivities = self._module1_activation_sensitivity(state.pre_activations)
+        pre_synaptic  = self._module2_pre_synaptic(state.activations)
+        deltas        = self._module3_error_terms(state, sensitivities, output_delta)
+        dW, db        = self._module4_weight_corrections(pre_synaptic, deltas)
+        # Gradient at input layer: propagate first hidden delta back through W[0]
+        input_gradient = deltas[0] @ state.weights[0].T
+        return dW, db, input_gradient
+    def __repr__(self) -> str:
+        return f"FixedTrainerNet(lr={self.learning_rate})"

stanno/trainers/local_rule.py ADDED Viewed

	@@ -0,0 +1,259 @@

+"""
+LocalRuleTrainerNet — per-synapse learned update rule.
+Architecture
+────────────
+A small shared MLP (the "rule network") takes a 4-element feature vector
+per synapse and outputs the weight delta for that synapse:
+  Input features per synapse W_i[j,k]:
+    [0] pre_j   — mean pre-synaptic activation  activations[i][:, j].mean()
+    [1] error_k — mean error signal at post-syn  delta[i][:, k].mean()
+    [2] w_jk    — current weight value           W_i[j, k]
+    [3] is_bias — 0.0 for weights, 1.0 for biases
+  Output: Δw_jk (scalar weight delta)
+The rule MLP is shared across ALL synapses in the network, which means it
+generalises across layers and architectures.
+Error signals (delta) are computed by FixedTrainerNet's Module 3 — the
+local rule only replaces Module 4 (the actual weight correction formula).
+Meta-training (Phase 2b)
+─────────────────────────
+meta_train() adapts the rule MLP itself.  If PyTorch is available it uses
+an unrolled K-step differentiation loop.  Otherwise it falls back to the
+EvolutionaryTrainerNet's ES routine on the rule MLP parameters.
+"""
+from __future__ import annotations
+from typing import List, Optional, Tuple
+import numpy as np
+from stanno.core.trainer import AbstractTrainerNet, TraineeState
+class LocalRuleTrainerNet(AbstractTrainerNet):
+    """
+    Per-synapse learned update rule.
+    Parameters
+    ----------
+    learning_rate : float
+        Scaling factor applied to all rule MLP outputs.
+    hidden_dim : int
+        Width of the rule MLP's single hidden layer.
+    seed : int, optional
+        RNG seed for reproducibility.
+    """
+    # Feature vector size: [pre, error, weight, is_bias]
+    _FEATURE_DIM = 4
+    def __init__(
+        self,
+        learning_rate: float = 0.01,
+        hidden_dim: int = 16,
+        seed: Optional[int] = None,
+    ) -> None:
+        self.learning_rate = learning_rate
+        self.hidden_dim = hidden_dim
+        rng = np.random.default_rng(seed)
+        scale = 0.1
+        self._W1 = rng.normal(0.0, scale, (self._FEATURE_DIM, hidden_dim)).astype(np.float32)
+        self._b1 = np.zeros((hidden_dim,), dtype=np.float32)
+        self._W2 = rng.normal(0.0, scale, (hidden_dim, 1)).astype(np.float32)
+        self._b2 = np.zeros((1,), dtype=np.float32)
+    # ── rule MLP ─────────────────────────────────────────────────────────────
+    def _rule_forward(self, features: np.ndarray) -> np.ndarray:
+        """
+        Forward pass through the rule MLP.
+        features : (N_synapses, 4)
+        returns  : (N_synapses,)  — one delta per synapse
+        """
+        h = np.tanh(features @ self._W1 + self._b1)   # (N, hidden)
+        out = h @ self._W2 + self._b2                  # (N, 1)
+        return out.ravel()
+    def _build_features(
+        self,
+        pre_mean: np.ndarray,    # (in_dim,)
+        error_mean: np.ndarray,  # (out_dim,)
+        weight: np.ndarray,      # (in_dim, out_dim) or (1, out_dim) for biases
+        is_bias: float,
+    ) -> np.ndarray:
+        """
+        Build the (N_synapses, 4) feature matrix for one weight tensor.
+        For a weight matrix W (in_dim × out_dim), W is stored row-major so
+        W.ravel()[j * out_dim + k] = W[j, k].  Accordingly:
+          pre_grid[j*out + k] = pre_mean[j]     (np.repeat)
+          err_grid[j*out + k] = error_mean[k]   (np.tile)
+        """
+        flat_w = weight.ravel()
+        n_syn = flat_w.size
+        if is_bias:
+            # bias: pre is always 1, one entry per output neuron
+            pre_col = np.ones(n_syn, dtype=np.float32)
+            err_col = error_mean.ravel()[:n_syn].astype(np.float32)
+        else:
+            in_dim, out_dim = weight.shape
+            pre_col = np.repeat(pre_mean, out_dim).astype(np.float32)  # (in*out,)
+            err_col = np.tile(error_mean, in_dim).astype(np.float32)   # (in*out,)
+        bias_col = np.full(n_syn, is_bias, dtype=np.float32)
+        return np.stack([pre_col, err_col, flat_w.astype(np.float32), bias_col], axis=1)
+    # ── error terms (reuse Fixed module 3) ───────────────────────────────────
+    def _compute_deltas(self, state: TraineeState) -> List[np.ndarray]:
+        """Compute backward error signals using the same math as FixedTrainerNet."""
+        n = len(state.weights)
+        deltas: List[np.ndarray] = [None] * n  # type: ignore[list-item]
+        batch_size = state.y_pred.shape[0]
+        deltas[n - 1] = (2.0 / batch_size) * (state.y_pred - state.y_batch)
+        for i in range(n - 2, -1, -1):
+            propagated = deltas[i + 1] @ state.weights[i + 1].T
+            # Use pre_activations (z) for derivative: tanh'(z) = 1 - tanh(z)²
+            # This is exact; using activations[i+1] is equivalent but less precise.
+            deltas[i] = propagated * (1.0 - np.tanh(state.pre_activations[i]) ** 2)
+        return deltas
+    # ── public API ───────────────────────────────────────────────────────────
+    def compute_updates(
+        self, state: TraineeState
+    ) -> Tuple[List[np.ndarray], List[np.ndarray]]:
+        deltas = self._compute_deltas(state)
+        weight_deltas: List[np.ndarray] = []
+        bias_deltas: List[np.ndarray] = []
+        for i, (W, b, delta) in enumerate(
+            zip(state.weights, state.biases, deltas)
+        ):
+            pre_mean = state.activations[i].mean(axis=0)     # (in_dim,)
+            error_mean = delta.mean(axis=0)                   # (out_dim,)
+            # Weight update
+            feat_w = self._build_features(pre_mean, error_mean, W, is_bias=0.0)
+            dw = self._rule_forward(feat_w).reshape(W.shape) * self.learning_rate
+            weight_deltas.append(-dw)
+            # Bias update
+            feat_b = self._build_features(pre_mean, error_mean, b, is_bias=1.0)
+            db = self._rule_forward(feat_b).reshape(b.shape) * self.learning_rate
+            bias_deltas.append(-db)
+        return weight_deltas, bias_deltas
+    # ── meta-training (Phase 2b) ──────────────────────────────────────────────
+    def meta_train(self, tasks, k_steps: int = 5, meta_lr: float = 1e-3) -> None:
+        """
+        Adapt the rule MLP to minimise post-update task loss.
+        If PyTorch is available: unrolled K-step gradient loop.
+        Otherwise: ES fallback on the rule MLP parameters.
+        Each task must be a dict:
+            {"x_train": ndarray, "y_train": ndarray,
+             "x_test":  ndarray, "y_test":  ndarray,
+             "stanno_config": STANNOConfig}
+        """
+        try:
+            import torch
+            self._meta_train_torch(tasks, k_steps=k_steps, meta_lr=meta_lr)
+        except ImportError:
+            self._meta_train_es(tasks, k_steps=k_steps)
+    def _meta_train_es(
+        self,
+        tasks,
+        k_steps: int = 5,
+        pop_size: int = 20,
+        sigma: float = 0.05,
+        n_iterations: int = 50,
+    ) -> None:
+        """ES-based meta-training: perturb rule MLP params, keep best."""
+        from stanno.core.trainee import TraineeNet
+        from stanno.core.stanno import STANNO
+        from stanno.config.schema import STANNOConfig
+        rng = np.random.default_rng()
+        params = self._flat_params()
+        for _ in range(n_iterations):
+            perturbations = rng.normal(0.0, sigma, (pop_size, len(params))).astype(np.float32)
+            fitnesses = np.zeros(pop_size, dtype=np.float32)
+            for p_idx, pert in enumerate(perturbations):
+                candidate = params + pert
+                self._set_flat_params(candidate)
+                loss = self._evaluate_tasks(tasks, k_steps)
+                fitnesses[p_idx] = -loss   # higher = better
+            # Fitness-weighted update
+            w = (fitnesses - fitnesses.mean()) / (fitnesses.std() + 1e-8)
+            gradient_estimate = (perturbations * w[:, np.newaxis]).mean(axis=0)
+            params = params + sigma * gradient_estimate
+        self._set_flat_params(params)
+    def _evaluate_tasks(self, tasks, k_steps: int) -> float:
+        """Run k_steps of training on each task; return mean test MSE."""
+        from stanno.core.trainee import TraineeNet
+        from stanno.config.schema import STANNOConfig
+        import copy
+        total_loss = 0.0
+        for task in tasks:
+            cfg = task.get("stanno_config", STANNOConfig(layers=[task["x_train"].shape[1], 32, task["y_train"].shape[1]]))
+            net = TraineeNet(cfg.layers)
+            trainer = copy.copy(self)   # use current rule params
+            from stanno.core.stanno import STANNO
+            stanno = STANNO(cfg)
+            stanno.net = net
+            stanno.trainer = trainer
+            stanno.fit(task["x_train"], task["y_train"], epochs=k_steps, batch_size=min(32, len(task["x_train"])))
+            preds = stanno.predict(task["x_test"])
+            total_loss += float(np.mean((preds - task["y_test"]) ** 2))
+        return total_loss / max(len(tasks), 1)
+    def _meta_train_torch(self, tasks, k_steps: int = 5, meta_lr: float = 1e-3) -> None:
+        """Unrolled K-step meta-training via PyTorch autograd (Phase 2b)."""
+        # NOTE: Full implementation requires converting TraineeNet to
+        # differentiable PyTorch ops and running the update chain through
+        # autograd.  This is left as a Phase 2b extension.
+        # For now, fall back to ES even when PyTorch is available.
+        self._meta_train_es(tasks, k_steps=k_steps)
+    # ── parameter serialisation helpers ────────────────────────��─────────────
+    def _flat_params(self) -> np.ndarray:
+        return np.concatenate([
+            self._W1.ravel(), self._b1.ravel(),
+            self._W2.ravel(), self._b2.ravel(),
+        ])
+    def _set_flat_params(self, flat: np.ndarray) -> None:
+        idx = 0
+        for arr in [self._W1, self._b1, self._W2, self._b2]:
+            n = arr.size
+            arr.ravel()[:] = flat[idx: idx + n]
+            idx += n
+    def __repr__(self) -> str:
+        return (
+            f"LocalRuleTrainerNet(lr={self.learning_rate}, "
+            f"hidden_dim={self.hidden_dim})"
+        )

stanno_poc.py ADDED Viewed

	@@ -0,0 +1,314 @@

+"""STANNO-style proof of concept
+This module implements a very simple Self-Training Artificial Neural Network Object (STANNO)
+loosely inspired by Thaler's description: two neural networks, one of which trains the other,
+optionally folded into a single object.[cite:1][cite:3]
+Design choices:
+- TraineeNet: a small multilayer perceptron (MLP) that learns a supervised mapping.
+- Trainer: training logic embedded inside STANNO using standard gradient descent.
+  Conceptually this plays the role of the "trainer" network described in the literature,
+  but here it is implemented as explicit code for simplicity.
+Features included for experimentation:
+- Supervised training on a toy dataset (e.g., y = sin(x)).
+- "Dreaming": run the trained net on a fixed or random latent input with inputs partially
+  or totally "blinded" (set to zero or constant) to observe internal dynamics.
+- Noise injection: add Gaussian noise with adjustable standard deviation to all weights,
+  to explore how output complexity changes with noise level (from "stupidity" to chaos).
+- Lesioning: randomly zero out a fraction of weights to mimic progressive "death" of
+  connections and observe degradation ("tunnel vision").[cite:2]
+The goal is not to reproduce the original spreadsheet implementation, but to give a
+simple, hackable playground in modern Python/NumPy that you can extend (including
+replacing the hard-coded trainer by a learned meta-network if desired).
+"""
+from __future__ import annotations
+import numpy as np
+from dataclasses import dataclass
+from typing import Tuple, Callable
+@dataclass
+class TraineeNet:
+    """Simple 2-layer MLP (input -> hidden -> output).
+    This is the network that will be trained by the STANNO object.
+    """
+    input_dim: int
+    hidden_dim: int
+    output_dim: int
+    def __post_init__(self) -> None:
+        rng = np.random.default_rng()
+        # Xavier-like initialization
+        self.W1 = rng.normal(0.0, 1.0 / np.sqrt(self.input_dim), (self.input_dim, self.hidden_dim))
+        self.b1 = np.zeros((1, self.hidden_dim))
+        self.W2 = rng.normal(0.0, 1.0 / np.sqrt(self.hidden_dim), (self.hidden_dim, self.output_dim))
+        self.b2 = np.zeros((1, self.output_dim))
+    def parameters(self):
+        return [self.W1, self.b1, self.W2, self.b2]
+    def forward(self, x: np.ndarray) -> Tuple[np.ndarray, dict]:
+        """Forward pass returning output and cache for backprop."""
+        z1 = x @ self.W1 + self.b1
+        a1 = np.tanh(z1)
+        z2 = a1 @ self.W2 + self.b2
+        y = z2  # regression; for classification you could add softmax
+        cache = {"x": x, "z1": z1, "a1": a1, "z2": z2}
+        return y, cache
+    def apply_parameter_noise(self, sigma: float, rng: np.random.Generator | None = None) -> None:
+        """Add Gaussian noise with std sigma to all parameters in-place."""
+        if sigma <= 0:
+            return
+        if rng is None:
+            rng = np.random.default_rng()
+        for p in self.parameters():
+            p += rng.normal(0.0, sigma, p.shape)
+    def lesion(self, fraction: float, rng: np.random.Generator | None = None) -> None:
+        """Randomly zero out a fraction of weights (simulated neuron/connection death).
+        fraction in [0, 1]. Only affects W1 and W2; biases remain.
+        """
+        fraction = float(np.clip(fraction, 0.0, 1.0))
+        if fraction <= 0:
+            return
+        if rng is None:
+            rng = np.random.default_rng()
+        for W in (self.W1, self.W2):
+            mask = rng.random(W.shape) < fraction
+            W[mask] = 0.0
+class STANNO:
+    """Self-Training Neural Network Object (STANNO-style).
+    Encapsula:
+    - Una red entrenable (TraineeNet).
+    - Un algoritmo de entrenamiento interno (gradient descent) que actúa como
+      "trainer" y actualiza los pesos a partir de ejemplos.
+    Esto sigue el espíritu de los STANNO descritos por Thaler: un objeto que
+    contiene la red y su mecanismo de entrenamiento, con capacidad de seguir
+    aprendiendo en línea.[cite:1][cite:3]
+    """
+    def __init__(
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        output_dim: int,
+        learning_rate: float = 1e-2,
+    ) -> None:
+        self.net = TraineeNet(input_dim, hidden_dim, output_dim)
+        self.learning_rate = learning_rate
+    # ---------------------- Core training logic ----------------------
+    def _loss_and_grads(self, x: np.ndarray, y_true: np.ndarray) -> Tuple[float, list]:
+        """Compute MSE loss and gradients via backprop for one batch."""
+        y_pred, cache = self.net.forward(x)
+        # Mean squared error
+        diff = y_pred - y_true
+        loss = float(np.mean(diff ** 2))
+        # Backprop
+        batch_size = x.shape[0]
+        dL_dy = (2.0 / batch_size) * diff  # dL/dy
+        # Layer 2
+        a1 = cache["a1"]
+        dL_dW2 = a1.T @ dL_dy
+        dL_db2 = np.sum(dL_dy, axis=0, keepdims=True)
+        # Through tanh
+        dz2 = dL_dy @ self.net.W2.T
+        da1 = dz2
+        dz1 = da1 * (1.0 - np.tanh(cache["z1"]) ** 2)
+        # Layer 1
+        x_batch = cache["x"]
+        dL_dW1 = x_batch.T @ dz1
+        dL_db1 = np.sum(dz1, axis=0, keepdims=True)
+        grads = [dL_dW1, dL_db1, dL_dW2, dL_db2]
+        return loss, grads
+    def trainer_step(self, x: np.ndarray, y_true: np.ndarray) -> float:
+        """One training step of the internal trainer over a mini-batch.
+        Conceptualmente, esto es el "trainer network" que ajusta pesos del
+        TraineeNet. Aquí se implementa como gradiente descendente directo.
+        """
+        loss, grads = self._loss_and_grads(x, y_true)
+        for param, grad in zip(self.net.parameters(), grads):
+            param -= self.learning_rate * grad
+        return loss
+    def fit(
+        self,
+        x: np.ndarray,
+        y: np.ndarray,
+        epochs: int = 1000,
+        batch_size: int = 32,
+        shuffle: bool = True,
+        callback: Callable[[int, float], None] | None = None,
+    ) -> None:
+        """Train on a dataset using internal trainer.
+        Args:
+            x: shape (N, input_dim)
+            y: shape (N, output_dim)
+            epochs: number of passes over the dataset
+            batch_size: mini-batch size
+            shuffle: whether to shuffle each epoch
+            callback: optional function(epoch, loss) for logging
+        """
+        N = x.shape[0]
+        rng = np.random.default_rng()
+        for epoch in range(epochs):
+            idx = np.arange(N)
+            if shuffle:
+                rng.shuffle(idx)
+            x_shuf = x[idx]
+            y_shuf = y[idx]
+            losses = []
+            for start in range(0, N, batch_size):
+                end = start + batch_size
+                xb = x_shuf[start:end]
+                yb = y_shuf[start:end]
+                loss = self.trainer_step(xb, yb)
+                losses.append(loss)
+            mean_loss = float(np.mean(losses))
+            if callback is not None:
+                callback(epoch, mean_loss)
+    # ---------------------- Inference & "dreaming" ----------------------
+    def predict(self, x: np.ndarray) -> np.ndarray:
+        y, _ = self.net.forward(x)
+        return y
+    def dream(
+        self,
+        num_steps: int = 128,
+        input_seed: np.ndarray | None = None,
+        noise_sigma: float = 0.0,
+        blind_inputs: bool = False,
+        rng: np.random.Generator | None = None,
+    ) -> np.ndarray:
+        """Generate a sequence of outputs by driving the net with a simple or blind input.
+        Args:
+            num_steps: length of the sequence to generate.
+            input_seed: initial input vector; if None, uses zeros.
+            noise_sigma: amount of noise to add to weights *once* before dreaming.
+            blind_inputs: if True, inputs are forced to zero every step.
+            rng: optional RNG.
+        Returns:
+            Array of generated outputs of shape (num_steps, output_dim).
+        """
+        if rng is None:
+            rng = np.random.default_rng()
+        # Work on a copy so as not to permanently corrupt the trained net
+        shadow = TraineeNet(self.net.input_dim, self.net.hidden_dim, self.net.output_dim)
+        shadow.W1 = self.net.W1.copy()
+        shadow.b1 = self.net.b1.copy()
+        shadow.W2 = self.net.W2.copy()
+        shadow.b2 = self.net.b2.copy()
+        shadow.apply_parameter_noise(noise_sigma, rng=rng)
+        if input_seed is None:
+            x = np.zeros((1, self.net.input_dim))
+        else:
+            x = input_seed.reshape(1, -1)
+        outputs = []
+        for _ in range(num_steps):
+            if blind_inputs:
+                x_step = np.zeros_like(x)
+            else:
+                x_step = x
+            y, _ = shadow.forward(x_step)
+            outputs.append(y.copy())
+            # Simple feedback: feed output (or part of él) as next input
+            # This makes the sequence sensitive to internal weights.
+            if self.net.output_dim == self.net.input_dim:
+                x = y
+            else:
+                # Project or tile to match input dim
+                x = np.repeat(y, self.net.input_dim // self.net.output_dim + 1, axis=1)[
+                    :, : self.net.input_dim
+                ]
+        return np.concatenate(outputs, axis=0)
+# ---------------------- Demo utilities ----------------------
+def make_sin_dataset(n_samples: int = 256) -> Tuple[np.ndarray, np.ndarray]:
+    """Simple 1D regression dataset: y = sin(x) on [0, 2π]."""
+    rng = np.random.default_rng()
+    x = rng.uniform(0.0, 2.0 * np.pi, size=(n_samples, 1))
+    y = np.sin(x)
+    return x, y
+def demo_train_and_dream() -> None:
+    """Train a STANNO on sin(x) and then explore noise/lesion effects.
+    Run this function directly ("python stanno_poc.py") to see numeric output.
+    """
+    x, y = make_sin_dataset(512)
+    stanno = STANNO(input_dim=1, hidden_dim=32, output_dim=1, learning_rate=5e-3)
+    print("Training STANNO on y = sin(x)...")
+    stanno.fit(
+        x,
+        y,
+        epochs=500,
+        batch_size=64,
+        callback=lambda e, l: print(f"Epoch {e:4d}  loss={l:.5f}") if (e + 1) % 100 == 0 else None,
+    )
+    # Evaluate basic fit
+    xs = np.linspace(0, 2 * np.pi, 16).reshape(-1, 1)
+    preds = stanno.predict(xs)
+    print("
+Sample predictions after training:")
+    for xi, yi, yi_hat in zip(xs.flatten(), np.sin(xs).flatten(), preds.flatten()):
+        print(f"x={xi:5.2f}  sin(x)={yi: .3f}  pred={yi_hat: .3f}")
+    # Dreaming with different noise levels
+    for sigma in [0.0, 0.05, 0.2, 0.5]:
+        seq = stanno.dream(num_steps=32, noise_sigma=sigma, blind_inputs=True)
+        print(f"
+Dreaming with noise_sigma={sigma} (first 10 outputs):")
+        print(np.round(seq[:10].flatten(), 3))
+    # Lesion experiment
+    print("
+Lesioning 70% of weights and evaluating error on test points...")
+    # Backup parameters
+    backup = [p.copy() for p in stanno.net.parameters()]
+    stanno.net.lesion(fraction=0.7)
+    preds_lesioned = stanno.predict(xs)
+    mse_lesioned = float(np.mean((preds_lesioned - np.sin(xs)) ** 2))
+    print(f"MSE after lesioning 70% of weights: {mse_lesioned:.4f}")
+    # Restore
+    for param, b in zip(stanno.net.parameters(), backup):
+        param[...] = b
+if __name__ == "__main__":
+    demo_train_and_dream()