Commit ·
4c0cf4e
0
Parent(s):
Initial commit
Browse files- .gitignore +7 -0
- README.md +31 -0
- app.py +484 -0
- baseline_results.json +44 -0
- nuwave/__init__.py +16 -0
- nuwave/adapters/__init__.py +2 -0
- nuwave/adapters/huggingface.py +251 -0
- nuwave/benchmarks/__init__.py +2 -0
- nuwave/benchmarks/harness.py +221 -0
- nuwave/demo/__init__.py +1 -0
- nuwave/kiss/__init__.py +2 -0
- nuwave/kiss/filter.py +222 -0
- nuwave/lenia_splat.py +328 -0
- nuwave/organism.py +662 -0
- nuwave/pith/__init__.py +2 -0
- nuwave/pith/pipeline.py +300 -0
- nuwave/splat_engine.py +429 -0
- nuwave/substrate/__init__.py +1 -0
- nuwave/substrate/neuro_foundation.py +0 -0
- nuwave/substrate/ng_autonomic.py +99 -0
- nuwave/substrate/ng_ecosystem.py +598 -0
- nuwave/substrate/ng_embed.py +626 -0
- nuwave/substrate/ng_lite.py +1494 -0
- requirements.txt +5 -0
- rust_lenia/Cargo.lock +270 -0
- rust_lenia/Cargo.toml +18 -0
- rust_lenia/src/engine.rs +255 -0
- rust_lenia/src/kernel.rs +118 -0
- rust_lenia/src/lib.rs +24 -0
.gitignore
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
rust_lenia/target/
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
*.pyo
|
| 5 |
+
*.so
|
| 6 |
+
*.whl
|
| 7 |
+
tests/
|
README.md
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: NuWave
|
| 3 |
+
emoji: 🧠
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: "5.29.0"
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: agpl-3.0
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# NuWave — Your Model Gets Smarter Over Time
|
| 14 |
+
|
| 15 |
+
CPU-native AI through compound context optimization.
|
| 16 |
+
|
| 17 |
+
Powered by **BitNet b1.58-2B-4T** — Microsoft's ternary weight model (1.58 bits per weight, addition-only inference, 0.4GB memory). No GPU needed.
|
| 18 |
+
|
| 19 |
+
## How It Works
|
| 20 |
+
|
| 21 |
+
- **KISS** (Keep Input Simple, Substrate) — filters redundant context. System prompt skipped when unchanged. Old conversation history compressed to summaries.
|
| 22 |
+
- **Pith** (The Living Context Lens) — manages the context window as a cache hierarchy. Strips clutter, evicts cold entries, promotes what's relevant.
|
| 23 |
+
- **BitNet** — ternary weights {-1, 0, +1} replace floating-point multiplication with addition. CPU-native by design.
|
| 24 |
+
|
| 25 |
+
Together they compound: cleaner input → less processing → better context → sharper reasoning → both improve.
|
| 26 |
+
|
| 27 |
+
## Demo
|
| 28 |
+
|
| 29 |
+
Two tabs:
|
| 30 |
+
1. **Live Chat** — talk to the model, see KISS/Pith metrics in real time (tokens saved, system skipped, clutter stripped)
|
| 31 |
+
2. **A/B Benchmark** — same conversation through baseline vs NuWave. Watch tokens drop and time decrease as KISS learns what's redundant.
|
app.py
ADDED
|
@@ -0,0 +1,484 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
NuWave — HuggingFace Spaces Demo
|
| 3 |
+
|
| 4 |
+
The organism. NeuroGraph substrate + KISS bucket + Pith bucket +
|
| 5 |
+
Splat-Lenia + BitNet model. On CPU. Gets smarter over time.
|
| 6 |
+
|
| 7 |
+
# ---- Changelog ----
|
| 8 |
+
# [2026-04-06] Claude Code (Opus 4.6) — Full NeuroGraph organism integration
|
| 9 |
+
# [2026-03-31] Claude Code (Opus 4.6) — Switch to BitNet 2B for CPU-native inference
|
| 10 |
+
# [2026-03-29] Claude Code (Opus 4.6) — ZeroGPU compatible, model at startup
|
| 11 |
+
# [2026-03-28] Claude Code (Opus 4.6) — Initial Gradio demo
|
| 12 |
+
# -------------------
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
# Install ng_tract wheel before any imports that need it
|
| 16 |
+
import subprocess, sys, os
|
| 17 |
+
_whl = os.path.join(os.path.dirname(__file__), "ng_tract-0.1.0-cp312-abi3-manylinux_2_34_x86_64.whl")
|
| 18 |
+
if os.path.exists(_whl):
|
| 19 |
+
try:
|
| 20 |
+
import ng_tract
|
| 21 |
+
except ImportError:
|
| 22 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", _whl, "--quiet"])
|
| 23 |
+
|
| 24 |
+
import gradio as gr
|
| 25 |
+
import json
|
| 26 |
+
import logging
|
| 27 |
+
import torch
|
| 28 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 29 |
+
|
| 30 |
+
try:
|
| 31 |
+
import spaces
|
| 32 |
+
except ImportError:
|
| 33 |
+
class _FakeSpaces:
|
| 34 |
+
@staticmethod
|
| 35 |
+
def GPU(fn=None, **kwargs):
|
| 36 |
+
return fn if fn else lambda f: f
|
| 37 |
+
spaces = _FakeSpaces()
|
| 38 |
+
|
| 39 |
+
logging.basicConfig(level=logging.INFO)
|
| 40 |
+
logger = logging.getLogger("nuwave")
|
| 41 |
+
|
| 42 |
+
# ── Load Model at Startup (guard against Gradio double-import) ────
|
| 43 |
+
|
| 44 |
+
MODEL_NAME = "microsoft/bitnet-b1.58-2B-4T-bf16"
|
| 45 |
+
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
| 46 |
+
|
| 47 |
+
logger.info("Loading model: %s (token: %s)", MODEL_NAME, "present" if HF_TOKEN else "MISSING")
|
| 48 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
|
| 49 |
+
if tokenizer.pad_token is None:
|
| 50 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 51 |
+
|
| 52 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 53 |
+
MODEL_NAME,
|
| 54 |
+
torch_dtype=torch.bfloat16,
|
| 55 |
+
device_map="cpu",
|
| 56 |
+
low_cpu_mem_usage=True,
|
| 57 |
+
token=HF_TOKEN,
|
| 58 |
+
)
|
| 59 |
+
model.eval()
|
| 60 |
+
logger.info("Model loaded: %d params", sum(p.numel() for p in model.parameters()))
|
| 61 |
+
|
| 62 |
+
# ── NuWave Components ─────────────────────────────────────────────
|
| 63 |
+
|
| 64 |
+
from nuwave.organism import NuWaveOrganism
|
| 65 |
+
from nuwave.kiss import KISSFilter, KISSConfig
|
| 66 |
+
from nuwave.pith import PithPipeline, PithConfig
|
| 67 |
+
from nuwave.splat_engine import decompose_layer, SplatConfig, GaussianSplats
|
| 68 |
+
from nuwave.lenia_splat import LeniaSplatEngine, LeniaSplatConfig
|
| 69 |
+
|
| 70 |
+
# The organism — substrate + KISS bucket + Pith bucket
|
| 71 |
+
# Use /data/ for persistence if available (HF persistent storage), else /tmp/
|
| 72 |
+
_persist_dir = "/data/nuwave_substrate" if os.path.isdir("/data") else "/tmp/nuwave_substrate"
|
| 73 |
+
organism = NuWaveOrganism(state_path=_persist_dir)
|
| 74 |
+
|
| 75 |
+
# String-level KISS still runs alongside for comparison
|
| 76 |
+
kiss_nw = KISSFilter()
|
| 77 |
+
pith_nw = PithPipeline()
|
| 78 |
+
|
| 79 |
+
messages_nw = []
|
| 80 |
+
messages_bl = []
|
| 81 |
+
system_prompt = "You are a helpful assistant. Be concise and clear."
|
| 82 |
+
|
| 83 |
+
# ── Splat-Lenia Setup ────────────────────────────────────────────
|
| 84 |
+
# Decompose a few attention layers to splats at startup.
|
| 85 |
+
# Lenia evolves them between turns. The compression is alive.
|
| 86 |
+
|
| 87 |
+
splat_config = SplatConfig(
|
| 88 |
+
splat_ratio=0.02, # 50x compression — aggressive but fast to fit
|
| 89 |
+
max_splats=256, # small enough for CPU-basic startup
|
| 90 |
+
init_sigma=2.0,
|
| 91 |
+
fit_iterations=50, # fewer iterations — speed over precision at startup
|
| 92 |
+
fit_lr=0.02,
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
lenia_config = LeniaSplatConfig(
|
| 96 |
+
growth_mu=0.15,
|
| 97 |
+
growth_sigma=0.015,
|
| 98 |
+
growth_scale=0.0003, # small dt — proven stable
|
| 99 |
+
interaction_radius=5.0,
|
| 100 |
+
activation_coupling=2.0,
|
| 101 |
+
conserve_mass=True,
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
lenia_engine = LeniaSplatEngine(lenia_config)
|
| 105 |
+
splat_layers = {}
|
| 106 |
+
splat_metrics_history = []
|
| 107 |
+
|
| 108 |
+
# Splat decomposition deferred to first use — avoids memory spike during startup
|
| 109 |
+
# Splat state persists to disk so Lenia evolution survives restarts
|
| 110 |
+
_splats_initialized = False
|
| 111 |
+
_splat_save_path = os.path.join(_persist_dir, "splat_state.pt")
|
| 112 |
+
|
| 113 |
+
def _init_splats_if_needed():
|
| 114 |
+
"""Load persisted splats or decompose from scratch on first use."""
|
| 115 |
+
global _splats_initialized
|
| 116 |
+
if _splats_initialized:
|
| 117 |
+
return
|
| 118 |
+
_splats_initialized = True
|
| 119 |
+
|
| 120 |
+
import gc
|
| 121 |
+
gc.collect()
|
| 122 |
+
|
| 123 |
+
# Try to restore persisted splat state first
|
| 124 |
+
if os.path.exists(_splat_save_path):
|
| 125 |
+
try:
|
| 126 |
+
saved = torch.load(_splat_save_path, weights_only=False)
|
| 127 |
+
for name, sd in saved.get('layers', {}).items():
|
| 128 |
+
splats = GaussianSplats.from_state_dict(sd)
|
| 129 |
+
splat_layers[name] = splats
|
| 130 |
+
lenia_engine.register_layer(name, splats)
|
| 131 |
+
lenia_step_count = saved.get('lenia_steps', 0)
|
| 132 |
+
logger.info(
|
| 133 |
+
f"Splats restored: {len(splat_layers)} layers, "
|
| 134 |
+
f"{sum(s.n_splats for s in splat_layers.values())} splats, "
|
| 135 |
+
f"{lenia_step_count} Lenia steps evolved"
|
| 136 |
+
)
|
| 137 |
+
return
|
| 138 |
+
except Exception as exc:
|
| 139 |
+
logger.warning(f"Splat restore failed (redecomposing): {exc}")
|
| 140 |
+
|
| 141 |
+
# Fresh decomposition
|
| 142 |
+
logger.info("Decomposing attention layers to Gaussian splats (first use)...")
|
| 143 |
+
try:
|
| 144 |
+
for name, param in model.named_parameters():
|
| 145 |
+
if any(k in name for k in ['layers.0.self_attn.k_proj', 'layers.0.self_attn.v_proj']):
|
| 146 |
+
if param.dim() >= 2:
|
| 147 |
+
w = param.data.float()
|
| 148 |
+
if w.dim() > 2:
|
| 149 |
+
w = w.reshape(w.shape[0], -1)
|
| 150 |
+
logger.info(f" Decomposing {name} ({w.shape[0]}x{w.shape[1]})...")
|
| 151 |
+
splats, metrics = decompose_layer(w, splat_config)
|
| 152 |
+
splat_layers[name] = splats
|
| 153 |
+
lenia_engine.register_layer(name, splats)
|
| 154 |
+
logger.info(f" {metrics['n_splats']} splats, {metrics['compression_ratio']:.1f}x, MSE={metrics['final_mse']:.4f}")
|
| 155 |
+
del w
|
| 156 |
+
gc.collect()
|
| 157 |
+
|
| 158 |
+
logger.info(f"Splat decomposition complete: {len(splat_layers)} layers")
|
| 159 |
+
except Exception as exc:
|
| 160 |
+
logger.warning(f"Splat decomposition failed (non-fatal): {exc}")
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def _save_splat_state():
|
| 164 |
+
"""Persist evolved splat parameters to disk."""
|
| 165 |
+
if not splat_layers:
|
| 166 |
+
return
|
| 167 |
+
try:
|
| 168 |
+
os.makedirs(os.path.dirname(_splat_save_path), exist_ok=True)
|
| 169 |
+
state = {
|
| 170 |
+
'layers': {name: splats.state_dict() for name, splats in splat_layers.items()},
|
| 171 |
+
'lenia_steps': lenia_engine.state.step_count,
|
| 172 |
+
}
|
| 173 |
+
torch.save(state, _splat_save_path)
|
| 174 |
+
except Exception as exc:
|
| 175 |
+
logger.debug(f"Splat save failed: {exc}")
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
# ── Inference ─────────────────────────────────────────────────────
|
| 179 |
+
|
| 180 |
+
def do_generate(prompt_text: str, max_new_tokens: int = 256) -> tuple:
|
| 181 |
+
"""Run inference on CPU with Lenia step after generation."""
|
| 182 |
+
_init_splats_if_needed()
|
| 183 |
+
import time
|
| 184 |
+
t0 = time.time()
|
| 185 |
+
|
| 186 |
+
inputs = tokenizer(prompt_text, return_tensors="pt", truncation=True, max_length=4096)
|
| 187 |
+
input_ids = inputs["input_ids"]
|
| 188 |
+
in_count = input_ids.shape[1]
|
| 189 |
+
|
| 190 |
+
with torch.no_grad():
|
| 191 |
+
outputs = model.generate(
|
| 192 |
+
input_ids,
|
| 193 |
+
max_new_tokens=max_new_tokens,
|
| 194 |
+
do_sample=False,
|
| 195 |
+
pad_token_id=tokenizer.pad_token_id,
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
new_tokens = outputs[0][input_ids.shape[1]:]
|
| 199 |
+
response = tokenizer.decode(new_tokens, skip_special_tokens=True)
|
| 200 |
+
|
| 201 |
+
# Run Lenia step on splats after inference
|
| 202 |
+
lenia_result = {}
|
| 203 |
+
if splat_layers:
|
| 204 |
+
try:
|
| 205 |
+
lenia_result = lenia_engine.step()
|
| 206 |
+
splat_metrics_history.append(lenia_result)
|
| 207 |
+
_save_splat_state()
|
| 208 |
+
except Exception as exc:
|
| 209 |
+
logger.warning(f"Lenia step failed: {exc}")
|
| 210 |
+
|
| 211 |
+
elapsed = time.time() - t0
|
| 212 |
+
out_count = len(new_tokens)
|
| 213 |
+
tok_per_sec = out_count / elapsed if elapsed > 0 else 0
|
| 214 |
+
|
| 215 |
+
return response, in_count, out_count, round(elapsed, 2), round(tok_per_sec, 1), lenia_result
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
# ── Chat Handler ──────────────────────────────────────────────────
|
| 219 |
+
|
| 220 |
+
def on_send(message, history):
|
| 221 |
+
if not message:
|
| 222 |
+
return "", history, ""
|
| 223 |
+
|
| 224 |
+
global messages_nw
|
| 225 |
+
messages_nw.append({"role": "user", "content": message})
|
| 226 |
+
|
| 227 |
+
# ── 1. Deposit raw experience into substrate (Law 7) ──
|
| 228 |
+
organism.deposit_experience(message)
|
| 229 |
+
|
| 230 |
+
# ── 2. Substrate processes ──
|
| 231 |
+
step_result = organism.step()
|
| 232 |
+
|
| 233 |
+
# ── 3. KISS bucket — extract what changed from the River ──
|
| 234 |
+
kiss_extract = organism.kiss_extract(step_result)
|
| 235 |
+
|
| 236 |
+
# Also run string-level KISS for comparison metrics
|
| 237 |
+
kiss_string_result = kiss_nw.filter_context(messages_nw, system_prompt)
|
| 238 |
+
sys_ctx = kiss_string_result.get("system_context", system_prompt)
|
| 239 |
+
|
| 240 |
+
# ── 4. Pith bucket — extract relevant context from the River ──
|
| 241 |
+
pith_context = organism.pith_extract(message, max_context=5)
|
| 242 |
+
|
| 243 |
+
# Pith context REPLACES old message history, not adds to it.
|
| 244 |
+
# The substrate carries what the older messages contained — the model
|
| 245 |
+
# doesn't need both. Recent messages pass verbatim (the model needs
|
| 246 |
+
# immediate context). Older messages are replaced by substrate context.
|
| 247 |
+
if pith_context:
|
| 248 |
+
substrate_ctx = "\n".join(pith_context)
|
| 249 |
+
if sys_ctx:
|
| 250 |
+
sys_ctx = substrate_ctx + "\n\n" + sys_ctx
|
| 251 |
+
else:
|
| 252 |
+
sys_ctx = substrate_ctx
|
| 253 |
+
|
| 254 |
+
# Build prompt — Pith context replaces old history
|
| 255 |
+
# Only send recent messages. The substrate carries the rest.
|
| 256 |
+
recent_window = 6 # 3 turns of user+assistant
|
| 257 |
+
if len(messages_nw) > recent_window and pith_context:
|
| 258 |
+
# Substrate has the older context — trim messages to recent only
|
| 259 |
+
recent_msgs = messages_nw[-recent_window:]
|
| 260 |
+
else:
|
| 261 |
+
recent_msgs = messages_nw
|
| 262 |
+
|
| 263 |
+
prompt_msgs = []
|
| 264 |
+
if sys_ctx:
|
| 265 |
+
prompt_msgs.append({"role": "system", "content": sys_ctx})
|
| 266 |
+
prompt_msgs.extend(recent_msgs)
|
| 267 |
+
|
| 268 |
+
prompt = tokenizer.apply_chat_template(
|
| 269 |
+
prompt_msgs, tokenize=False, add_generation_prompt=True,
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
# ── 5. Model generates ──
|
| 273 |
+
response, in_tok, out_tok, elapsed, tok_s, lenia_result = do_generate(prompt)
|
| 274 |
+
|
| 275 |
+
messages_nw.append({"role": "assistant", "content": response})
|
| 276 |
+
|
| 277 |
+
# ── 6. Outcome feeds back into substrate (Law 7) ──
|
| 278 |
+
organism.record_outcome(message, response, success=True)
|
| 279 |
+
|
| 280 |
+
# Stats — both substrate and string-level
|
| 281 |
+
kiss_stats = kiss_nw.stats.to_dict()
|
| 282 |
+
org_stats = organism.get_stats()
|
| 283 |
+
|
| 284 |
+
lenia_info = ""
|
| 285 |
+
if lenia_result:
|
| 286 |
+
lenia_info = (
|
| 287 |
+
f" | Lenia step {lenia_result.get('step', 0)}: "
|
| 288 |
+
f"Δα={lenia_result.get('total_alpha_delta', 0):.6f} "
|
| 289 |
+
f"Δμ={lenia_result.get('total_position_delta', 0):.6f} "
|
| 290 |
+
f"({lenia_result.get('time_ms', 0):.0f}ms)"
|
| 291 |
+
)
|
| 292 |
+
|
| 293 |
+
substrate_info = (
|
| 294 |
+
f" | Substrate: {org_stats.get('nodes', 0)} nodes, "
|
| 295 |
+
f"{org_stats.get('synapses', 0)} syn, "
|
| 296 |
+
f"{org_stats.get('fired_nodes', 0)} fired"
|
| 297 |
+
)
|
| 298 |
+
|
| 299 |
+
kiss_bucket_info = (
|
| 300 |
+
f" | KISS bucket: {kiss_extract.get('action', '?')} "
|
| 301 |
+
f"({kiss_extract.get('reason', '')})"
|
| 302 |
+
)
|
| 303 |
+
if kiss_extract.get('surprise_ratio', 0) > 0:
|
| 304 |
+
kiss_bucket_info += f" surprise={kiss_extract['surprise_ratio']}"
|
| 305 |
+
|
| 306 |
+
stats_text = (
|
| 307 |
+
f"**Turn {len(messages_nw)//2}** | "
|
| 308 |
+
f"{out_tok} tokens in {elapsed}s ({tok_s} tok/s) | "
|
| 309 |
+
f"Input: {in_tok} tokens | "
|
| 310 |
+
f"String KISS: {kiss_stats.get('tokens_saved', 0)} saved ({kiss_stats.get('efficiency', 0):.1%})"
|
| 311 |
+
f"{substrate_info}"
|
| 312 |
+
f"{kiss_bucket_info}"
|
| 313 |
+
f" | Pith river: {len(pith_context)} contexts"
|
| 314 |
+
f"{lenia_info}"
|
| 315 |
+
)
|
| 316 |
+
|
| 317 |
+
history = history + [
|
| 318 |
+
{"role": "user", "content": message},
|
| 319 |
+
{"role": "assistant", "content": response},
|
| 320 |
+
]
|
| 321 |
+
|
| 322 |
+
return "", history, stats_text
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
def on_reset():
|
| 326 |
+
global messages_nw, kiss_nw, pith_nw
|
| 327 |
+
messages_nw = []
|
| 328 |
+
kiss_nw = KISSFilter()
|
| 329 |
+
pith_nw = PithPipeline()
|
| 330 |
+
return [], "Chat reset."
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
# ── Benchmark ─────────────────────────────────────────────────────
|
| 334 |
+
|
| 335 |
+
SAMPLE_CONVERSATIONS = [
|
| 336 |
+
"What is machine learning?",
|
| 337 |
+
"How does it differ from traditional programming?",
|
| 338 |
+
"Can you give me a simple example of supervised learning?",
|
| 339 |
+
"What about unsupervised learning?",
|
| 340 |
+
"How would I choose between them for a new project?",
|
| 341 |
+
"What are neural networks?",
|
| 342 |
+
"How deep is 'deep learning'?",
|
| 343 |
+
"What's the relationship between AI, ML, and deep learning?",
|
| 344 |
+
"What are transformers in the context of AI?",
|
| 345 |
+
"How does attention work in a transformer?",
|
| 346 |
+
"Why are transformers better than RNNs for many tasks?",
|
| 347 |
+
"What is transfer learning and why does it matter?",
|
| 348 |
+
"How do I fine-tune a pre-trained model?",
|
| 349 |
+
"What are the ethical considerations in AI?",
|
| 350 |
+
"Where do you see AI heading in the next 5 years?",
|
| 351 |
+
]
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
def on_benchmark(num_turns):
|
| 355 |
+
turns = min(int(num_turns), len(SAMPLE_CONVERSATIONS))
|
| 356 |
+
conversation = SAMPLE_CONVERSATIONS[:turns]
|
| 357 |
+
|
| 358 |
+
# Fresh state for clean test
|
| 359 |
+
bl_msgs = []
|
| 360 |
+
nw_kiss = KISSFilter()
|
| 361 |
+
nw_pith = PithPipeline()
|
| 362 |
+
nw_msgs = []
|
| 363 |
+
|
| 364 |
+
results = []
|
| 365 |
+
|
| 366 |
+
for i, prompt_text in enumerate(conversation):
|
| 367 |
+
# Baseline — no KISS/Pith
|
| 368 |
+
bl_msgs.append({"role": "user", "content": prompt_text})
|
| 369 |
+
prompt_bl = tokenizer.apply_chat_template(
|
| 370 |
+
[{"role": "system", "content": system_prompt}] + bl_msgs,
|
| 371 |
+
tokenize=False, add_generation_prompt=True,
|
| 372 |
+
)
|
| 373 |
+
resp_bl, in_bl, out_bl, time_bl, tps_bl, _ = do_generate(prompt_bl, max_new_tokens=128)
|
| 374 |
+
bl_msgs.append({"role": "assistant", "content": resp_bl})
|
| 375 |
+
|
| 376 |
+
# NuWave — KISS + Pith
|
| 377 |
+
nw_msgs.append({"role": "user", "content": prompt_text})
|
| 378 |
+
kiss_r = nw_kiss.filter_context(nw_msgs, system_prompt)
|
| 379 |
+
sys_ctx = kiss_r.get("system_context", system_prompt)
|
| 380 |
+
if sys_ctx:
|
| 381 |
+
chunks = [c.strip() for c in sys_ctx.split("\n\n") if c.strip()]
|
| 382 |
+
if chunks:
|
| 383 |
+
optimized = nw_pith.extract(chunks, query=prompt_text)
|
| 384 |
+
sys_ctx = "\n\n".join(optimized)
|
| 385 |
+
prompt_nw = tokenizer.apply_chat_template(
|
| 386 |
+
[{"role": "system", "content": sys_ctx}] + nw_msgs if sys_ctx else nw_msgs,
|
| 387 |
+
tokenize=False, add_generation_prompt=True,
|
| 388 |
+
)
|
| 389 |
+
resp_nw, in_nw, out_nw, time_nw, tps_nw, lenia_r = do_generate(prompt_nw, max_new_tokens=128)
|
| 390 |
+
nw_msgs.append({"role": "assistant", "content": resp_nw})
|
| 391 |
+
|
| 392 |
+
ks = nw_kiss.stats.to_dict()
|
| 393 |
+
ps = nw_pith.stats.to_dict()
|
| 394 |
+
|
| 395 |
+
results.append({
|
| 396 |
+
"turn": i + 1,
|
| 397 |
+
"baseline": {"tokens": in_bl, "time": time_bl, "tok_s": tps_bl},
|
| 398 |
+
"nuwave": {"tokens": in_nw, "time": time_nw, "tok_s": tps_nw},
|
| 399 |
+
"tokens_saved": max(0, in_bl - in_nw),
|
| 400 |
+
"time_saved": round(max(0, time_bl - time_nw), 2),
|
| 401 |
+
"kiss_efficiency": ks.get("efficiency", 0),
|
| 402 |
+
"pith_l1_size": ps.get("l1_current_size", 0),
|
| 403 |
+
"pith_clutter": ps.get("clutter_stripped", 0),
|
| 404 |
+
})
|
| 405 |
+
|
| 406 |
+
# Summary
|
| 407 |
+
total_time_bl = sum(r["baseline"]["time"] for r in results)
|
| 408 |
+
total_time_nw = sum(r["nuwave"]["time"] for r in results)
|
| 409 |
+
total_tok_bl = sum(r["baseline"]["tokens"] for r in results)
|
| 410 |
+
total_tok_nw = sum(r["nuwave"]["tokens"] for r in results)
|
| 411 |
+
|
| 412 |
+
summary = {
|
| 413 |
+
"model": MODEL_NAME,
|
| 414 |
+
"turns": turns,
|
| 415 |
+
"baseline_total_tokens": total_tok_bl,
|
| 416 |
+
"nuwave_total_tokens": total_tok_nw,
|
| 417 |
+
"tokens_saved": total_tok_bl - total_tok_nw,
|
| 418 |
+
"baseline_total_time": round(total_time_bl, 2),
|
| 419 |
+
"nuwave_total_time": round(total_time_nw, 2),
|
| 420 |
+
"time_saved": round(total_time_bl - total_time_nw, 2),
|
| 421 |
+
"final_kiss_efficiency": results[-1]["kiss_efficiency"] if results else 0,
|
| 422 |
+
"final_pith_l1": results[-1]["pith_l1_size"] if results else 0,
|
| 423 |
+
}
|
| 424 |
+
|
| 425 |
+
return json.dumps(summary, indent=2), json.dumps(results, indent=2)
|
| 426 |
+
|
| 427 |
+
|
| 428 |
+
# ── Gradio App ────────────────────────────────────────────────────
|
| 429 |
+
|
| 430 |
+
with gr.Blocks(
|
| 431 |
+
title="NuWave — Your Model Gets Smarter Over Time",
|
| 432 |
+
theme=gr.themes.Soft(),
|
| 433 |
+
) as demo:
|
| 434 |
+
gr.Markdown(
|
| 435 |
+
f"""
|
| 436 |
+
# NuWave — Your Model Gets Smarter Over Time
|
| 437 |
+
|
| 438 |
+
**Context optimization through compound substrate dynamics.**
|
| 439 |
+
|
| 440 |
+
- **KISS** filters redundant context — system prompt skipped when unchanged, old history compressed to summary
|
| 441 |
+
- **Pith** manages context as a cache hierarchy — clutter stripped, cold entries evicted, relevant context promoted
|
| 442 |
+
- **Splat-Lenia** — weight layers decomposed to Gaussian splats, Lenia dynamics evolve them between turns
|
| 443 |
+
|
| 444 |
+
Model: `{MODEL_NAME}` | Inference: CPU | Splat layers: {len(splat_layers)} | Total splats: {sum(s.n_splats for s in splat_layers.values()) if splat_layers else 0}
|
| 445 |
+
"""
|
| 446 |
+
)
|
| 447 |
+
|
| 448 |
+
with gr.Tabs():
|
| 449 |
+
with gr.Tab("Live Chat"):
|
| 450 |
+
chatbot = gr.Chatbot(height=400, type="messages")
|
| 451 |
+
stats_display = gr.Markdown("*Send a message to see NuWave metrics*")
|
| 452 |
+
|
| 453 |
+
with gr.Row():
|
| 454 |
+
msg = gr.Textbox(placeholder="Type a message...", show_label=False, scale=4)
|
| 455 |
+
send_btn = gr.Button("Send", scale=1)
|
| 456 |
+
reset_btn = gr.Button("Reset", scale=1)
|
| 457 |
+
|
| 458 |
+
send_btn.click(on_send, [msg, chatbot], [msg, chatbot, stats_display])
|
| 459 |
+
msg.submit(on_send, [msg, chatbot], [msg, chatbot, stats_display])
|
| 460 |
+
reset_btn.click(on_reset, outputs=[chatbot, stats_display])
|
| 461 |
+
|
| 462 |
+
with gr.Tab("A/B Benchmark"):
|
| 463 |
+
gr.Markdown(
|
| 464 |
+
"""
|
| 465 |
+
### Baseline vs NuWave
|
| 466 |
+
|
| 467 |
+
Same conversation, same model, same CPU. Baseline sends full context every turn.
|
| 468 |
+
NuWave compresses history and skips redundant system context.
|
| 469 |
+
|
| 470 |
+
Watch: tokens decrease, time decreases, KISS efficiency climbs.
|
| 471 |
+
"""
|
| 472 |
+
)
|
| 473 |
+
|
| 474 |
+
with gr.Row():
|
| 475 |
+
num_turns = gr.Slider(minimum=3, maximum=15, value=8, step=1, label="Turns")
|
| 476 |
+
run_btn = gr.Button("Run Benchmark", variant="primary")
|
| 477 |
+
|
| 478 |
+
summary_output = gr.Code(label="Summary", language="json")
|
| 479 |
+
curve_output = gr.Code(label="Per-Turn Data", language="json")
|
| 480 |
+
|
| 481 |
+
run_btn.click(on_benchmark, [num_turns], [summary_output, curve_output])
|
| 482 |
+
|
| 483 |
+
if __name__ == "__main__":
|
| 484 |
+
demo.launch(server_name="0.0.0.0", ssr_mode=False)
|
baseline_results.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"test": "NuWave Phase 1 Baseline — KISS only (crude string compression)",
|
| 3 |
+
"model": "microsoft/bitnet-b1.58-2B-4T-bf16",
|
| 4 |
+
"hardware": "HuggingFace CPU-basic (2 vCPU, 16GB RAM)",
|
| 5 |
+
"date": "2026-04-06",
|
| 6 |
+
"kiss_version": "string hash + history summarization (operations 1+5 only)",
|
| 7 |
+
"pith_version": "minimal (clutter strip on system context chunks only)",
|
| 8 |
+
"splat_lenia": "2 attention layers, 256 splats each, Lenia stepping each turn",
|
| 9 |
+
"neuro_graph": "NOT CONNECTED — this is the before picture",
|
| 10 |
+
"turns": [
|
| 11 |
+
{"turn": 1, "input_tokens": 43, "output_tokens": 42, "time_s": 90.91, "tok_s": 0.5, "tokens_saved": 0, "efficiency": 0.0, "kiss_sys_skipped": 0, "pith_l1": 1, "clutter_stripped": 0},
|
| 12 |
+
{"turn": 2, "input_tokens": 121, "output_tokens": 88, "time_s": 127.21, "tok_s": 0.7, "tokens_saved": 0, "efficiency": 0.0, "kiss_sys_skipped": 0, "pith_l1": 1, "clutter_stripped": 1},
|
| 13 |
+
{"turn": 4, "input_tokens": 534, "output_tokens": 123, "time_s": 332.85, "tok_s": 0.4, "tokens_saved": 13, "efficiency": 1.9, "kiss_sys_skipped": 1, "pith_l1": 2, "clutter_stripped": 2},
|
| 14 |
+
{"turn": 5, "input_tokens": 732, "output_tokens": 104, "time_s": 287.12, "tok_s": 0.4, "tokens_saved": 58, "efficiency": 4.9, "kiss_sys_skipped": 2, "pith_l1": 3, "clutter_stripped": 2},
|
| 15 |
+
{"turn": 6, "input_tokens": 948, "output_tokens": 149, "time_s": 404.98, "tok_s": 0.4, "tokens_saved": 172, "efficiency": 9.6, "kiss_sys_skipped": 3, "pith_l1": 4, "clutter_stripped": 2},
|
| 16 |
+
{"turn": 7, "input_tokens": 1118, "output_tokens": 147, "time_s": 417.50, "tok_s": 0.4, "tokens_saved": 490, "efficiency": 19.3, "kiss_sys_skipped": 4, "pith_l1": 4, "clutter_stripped": 3},
|
| 17 |
+
{"turn": 8, "input_tokens": 1426, "output_tokens": 256, "time_s": 468.51, "tok_s": 0.5, "tokens_saved": 893, "efficiency": 26.1, "kiss_sys_skipped": 5, "pith_l1": 5, "clutter_stripped": 3},
|
| 18 |
+
{"turn": 9, "input_tokens": 1727, "output_tokens": 240, "time_s": 483.56, "tok_s": 0.5, "tokens_saved": 1368, "efficiency": 30.1, "kiss_sys_skipped": 6, "pith_l1": 5, "clutter_stripped": 4},
|
| 19 |
+
{"turn": 10, "input_tokens": 2222, "output_tokens": 93, "time_s": 301.35, "tok_s": 0.3, "tokens_saved": 1953, "efficiency": 33.1, "kiss_sys_skipped": 7, "pith_l1": 6, "clutter_stripped": 4},
|
| 20 |
+
{"turn": 11, "input_tokens": 2364, "output_tokens": 256, "time_s": 554.52, "tok_s": 0.5, "tokens_saved": 2648, "efficiency": 35.9, "kiss_sys_skipped": 8, "pith_l1": 6, "clutter_stripped": 5},
|
| 21 |
+
{"turn": 12, "input_tokens": 2918, "output_tokens": 256, "time_s": 608.90, "tok_s": 0.4, "tokens_saved": 3552, "efficiency": 39.2, "kiss_sys_skipped": 9, "pith_l1": 7, "clutter_stripped": 5},
|
| 22 |
+
{"turn": 13, "input_tokens": 3198, "output_tokens": 256, "time_s": 664.85, "tok_s": 0.4, "tokens_saved": 4682, "efficiency": 42.7, "kiss_sys_skipped": 10, "pith_l1": 7, "clutter_stripped": 6},
|
| 23 |
+
{"turn": 14, "input_tokens": 3477, "output_tokens": 256, "time_s": 748.87, "tok_s": 0.3, "tokens_saved": 5897, "efficiency": 45.0, "kiss_sys_skipped": 11, "pith_l1": 7, "clutter_stripped": 7},
|
| 24 |
+
{"turn": 15, "input_tokens": 4096, "output_tokens": 35, "time_s": 375.36, "tok_s": 0.1, "tokens_saved": 7306, "efficiency": 47.2, "kiss_sys_skipped": 12, "pith_l1": 8, "clutter_stripped": 7}
|
| 25 |
+
],
|
| 26 |
+
"summary": {
|
| 27 |
+
"peak_efficiency": 47.2,
|
| 28 |
+
"total_tokens_saved": 7306,
|
| 29 |
+
"ceiling_hit": "BitNet 4096 context window at turn 15",
|
| 30 |
+
"savings_outpaced_growth_at": "turn 11",
|
| 31 |
+
"lenia_step_time_avg_ms": 9,
|
| 32 |
+
"lenia_delta_alpha": 0.000599,
|
| 33 |
+
"lenia_delta_mu": 0.000007
|
| 34 |
+
},
|
| 35 |
+
"notes": [
|
| 36 |
+
"KISS using only operations 1 (delta gate) and 5 (temporal compress)",
|
| 37 |
+
"Operations 2, 3, 4, 6 not implemented — room for significant improvement",
|
| 38 |
+
"Pith doing minimal clutter strip only — not managing conversation as cache hierarchy",
|
| 39 |
+
"No NeuroGraph substrate — KISS computing own metrics instead of reading topology",
|
| 40 |
+
"No binary tract format — string-level operations only",
|
| 41 |
+
"Splat-Lenia running but not yet integrated into inference path",
|
| 42 |
+
"THIS IS THE BEFORE PICTURE — everything improves from here"
|
| 43 |
+
]
|
| 44 |
+
}
|
nuwave/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
NuWave — CPU-First AI Through Compound Substrate Optimization
|
| 3 |
+
|
| 4 |
+
Ship a model. Let it run. It gets more efficient over time.
|
| 5 |
+
|
| 6 |
+
Phase 1: KISS + Pith on the context window (frozen model)
|
| 7 |
+
Phase 2: Lenia dynamics on unfrozen weights (compound optimization)
|
| 8 |
+
|
| 9 |
+
Usage:
|
| 10 |
+
from nuwave import NuWave
|
| 11 |
+
nw = NuWave(model_name="microsoft/Phi-4-mini")
|
| 12 |
+
response = nw.chat("Hello, how are you?")
|
| 13 |
+
print(nw.stats()) # KISS skip rate, Pith cache hits, compound curve
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
__version__ = "0.1.0"
|
nuwave/adapters/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Model adapters for wrapping HuggingFace models with NuWave."""
|
| 2 |
+
from nuwave.adapters.huggingface import HuggingFaceAdapter
|
nuwave/adapters/huggingface.py
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
HuggingFace Adapter — Wrap any HF model with NuWave
|
| 3 |
+
|
| 4 |
+
KISS manages what goes into the context (CPU).
|
| 5 |
+
Pith manages what the model actually sees (CPU).
|
| 6 |
+
The model does the thinking (GPU via ZeroGPU).
|
| 7 |
+
NuWave makes the thinking efficient.
|
| 8 |
+
|
| 9 |
+
ZeroGPU compatible: GPU allocated only during model.generate().
|
| 10 |
+
KISS and Pith run on CPU always.
|
| 11 |
+
|
| 12 |
+
# ---- Changelog ----
|
| 13 |
+
# [2026-03-29] Claude Code (Opus 4.6) — ZeroGPU compatible
|
| 14 |
+
# What: Separated CPU (KISS/Pith) from GPU (inference) paths
|
| 15 |
+
# Why: HF ZeroGPU allocates GPU only during @spaces.GPU calls.
|
| 16 |
+
# KISS/Pith are CPU. Only model inference needs GPU.
|
| 17 |
+
# How: _generate() is the GPU-only path. chat() orchestrates
|
| 18 |
+
# CPU context optimization around the GPU call.
|
| 19 |
+
# [2026-03-28] Claude Code (Opus 4.6) — Initial implementation
|
| 20 |
+
# -------------------
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
from __future__ import annotations
|
| 24 |
+
|
| 25 |
+
import logging
|
| 26 |
+
import time
|
| 27 |
+
from typing import Any, Dict, List, Optional
|
| 28 |
+
|
| 29 |
+
logger = logging.getLogger("nuwave.adapter.hf")
|
| 30 |
+
|
| 31 |
+
# Global model + tokenizer — loaded once, shared across calls.
|
| 32 |
+
# ZeroGPU moves them to GPU only during @spaces.GPU decorated calls.
|
| 33 |
+
_model = None
|
| 34 |
+
_tokenizer = None
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _ensure_model(model_name: str):
|
| 38 |
+
"""Load model and tokenizer once globally."""
|
| 39 |
+
global _model, _tokenizer
|
| 40 |
+
if _model is not None:
|
| 41 |
+
return
|
| 42 |
+
|
| 43 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 44 |
+
import torch
|
| 45 |
+
|
| 46 |
+
logger.info("Loading %s...", model_name)
|
| 47 |
+
|
| 48 |
+
_tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 49 |
+
if _tokenizer.pad_token is None:
|
| 50 |
+
_tokenizer.pad_token = _tokenizer.eos_token
|
| 51 |
+
|
| 52 |
+
_model = AutoModelForCausalLM.from_pretrained(
|
| 53 |
+
model_name,
|
| 54 |
+
dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 55 |
+
device_map="auto",
|
| 56 |
+
)
|
| 57 |
+
_model.eval()
|
| 58 |
+
|
| 59 |
+
logger.info(
|
| 60 |
+
"Loaded %s (%d params, device=%s)",
|
| 61 |
+
model_name,
|
| 62 |
+
sum(p.numel() for p in _model.parameters()),
|
| 63 |
+
next(_model.parameters()).device,
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def generate(
|
| 68 |
+
prompt: str,
|
| 69 |
+
max_new_tokens: int = 512,
|
| 70 |
+
temperature: float = 0.7,
|
| 71 |
+
do_sample: bool = True,
|
| 72 |
+
) -> tuple:
|
| 73 |
+
"""GPU-only inference. Called inside @spaces.GPU context.
|
| 74 |
+
|
| 75 |
+
Args:
|
| 76 |
+
prompt: Fully formatted prompt string (already KISS/Pith optimized)
|
| 77 |
+
max_new_tokens: Max tokens to generate
|
| 78 |
+
temperature: Sampling temperature
|
| 79 |
+
do_sample: Whether to sample
|
| 80 |
+
|
| 81 |
+
Returns:
|
| 82 |
+
(response_text, input_token_count, output_token_count)
|
| 83 |
+
"""
|
| 84 |
+
import torch
|
| 85 |
+
|
| 86 |
+
inputs = _tokenizer(prompt, return_tensors="pt", truncation=True)
|
| 87 |
+
input_ids = inputs["input_ids"].to(_model.device)
|
| 88 |
+
input_count = input_ids.shape[1]
|
| 89 |
+
|
| 90 |
+
with torch.no_grad():
|
| 91 |
+
outputs = _model.generate(
|
| 92 |
+
input_ids,
|
| 93 |
+
max_new_tokens=max_new_tokens,
|
| 94 |
+
temperature=temperature,
|
| 95 |
+
do_sample=do_sample,
|
| 96 |
+
pad_token_id=_tokenizer.pad_token_id,
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
new_tokens = outputs[0][input_ids.shape[1]:]
|
| 100 |
+
response = _tokenizer.decode(new_tokens, skip_special_tokens=True)
|
| 101 |
+
output_count = len(new_tokens)
|
| 102 |
+
|
| 103 |
+
return response, input_count, output_count
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
class HuggingFaceAdapter:
|
| 107 |
+
"""Wraps a HuggingFace model with NuWave context optimization.
|
| 108 |
+
|
| 109 |
+
KISS and Pith run on CPU. Model inference uses ZeroGPU.
|
| 110 |
+
The generate() function is called separately so it can be
|
| 111 |
+
wrapped with @spaces.GPU in the Gradio app.
|
| 112 |
+
|
| 113 |
+
Usage:
|
| 114 |
+
adapter = HuggingFaceAdapter("microsoft/Phi-4-mini-instruct")
|
| 115 |
+
prompt = adapter.prepare("Hello!") # CPU: KISS + Pith
|
| 116 |
+
response = generate(prompt) # GPU: model inference
|
| 117 |
+
adapter.record(response) # CPU: update state
|
| 118 |
+
"""
|
| 119 |
+
|
| 120 |
+
def __init__(
|
| 121 |
+
self,
|
| 122 |
+
model_name: str,
|
| 123 |
+
nuwave_enabled: bool = True,
|
| 124 |
+
kiss_config=None,
|
| 125 |
+
pith_config=None,
|
| 126 |
+
system_prompt: str = "",
|
| 127 |
+
):
|
| 128 |
+
self.model_name = model_name
|
| 129 |
+
self.nuwave_enabled = nuwave_enabled
|
| 130 |
+
self.system_prompt = system_prompt
|
| 131 |
+
|
| 132 |
+
self._kiss = None
|
| 133 |
+
self._pith = None
|
| 134 |
+
if nuwave_enabled:
|
| 135 |
+
from nuwave.kiss import KISSFilter, KISSConfig
|
| 136 |
+
from nuwave.pith import PithPipeline, PithConfig
|
| 137 |
+
self._kiss = KISSFilter(kiss_config or KISSConfig())
|
| 138 |
+
self._pith = PithPipeline(pith_config or PithConfig())
|
| 139 |
+
|
| 140 |
+
self._messages: List[Dict[str, str]] = []
|
| 141 |
+
self._turn_count = 0
|
| 142 |
+
self._total_input_tokens = 0
|
| 143 |
+
self._total_output_tokens = 0
|
| 144 |
+
self._total_time = 0.0
|
| 145 |
+
self._nuwave_time = 0.0
|
| 146 |
+
|
| 147 |
+
def prepare(self, user_message: str) -> str:
|
| 148 |
+
"""CPU side: KISS + Pith context optimization → formatted prompt.
|
| 149 |
+
|
| 150 |
+
Call this BEFORE the GPU inference. Returns the full prompt
|
| 151 |
+
string ready for generate().
|
| 152 |
+
"""
|
| 153 |
+
_ensure_model(self.model_name)
|
| 154 |
+
|
| 155 |
+
self._turn_count += 1
|
| 156 |
+
self._messages.append({"role": "user", "content": user_message})
|
| 157 |
+
|
| 158 |
+
# KISS + Pith (CPU only)
|
| 159 |
+
nuwave_start = time.time()
|
| 160 |
+
system_context = self.system_prompt
|
| 161 |
+
self._kiss_mode = "disabled"
|
| 162 |
+
|
| 163 |
+
if self.nuwave_enabled and self._kiss and self._pith:
|
| 164 |
+
kiss_result = self._kiss.filter_context(
|
| 165 |
+
self._messages,
|
| 166 |
+
system_context,
|
| 167 |
+
)
|
| 168 |
+
system_context = kiss_result.get("system_context", system_context)
|
| 169 |
+
self._kiss_mode = kiss_result.get("kiss_mode", "full")
|
| 170 |
+
|
| 171 |
+
if system_context:
|
| 172 |
+
context_chunks = [
|
| 173 |
+
chunk.strip()
|
| 174 |
+
for chunk in system_context.split("\n\n")
|
| 175 |
+
if chunk.strip()
|
| 176 |
+
]
|
| 177 |
+
if context_chunks:
|
| 178 |
+
optimized = self._pith.extract(
|
| 179 |
+
context_chunks,
|
| 180 |
+
query=user_message,
|
| 181 |
+
)
|
| 182 |
+
system_context = "\n\n".join(optimized)
|
| 183 |
+
|
| 184 |
+
self._nuwave_time += time.time() - nuwave_start
|
| 185 |
+
|
| 186 |
+
# Build prompt
|
| 187 |
+
prompt_messages = []
|
| 188 |
+
if system_context:
|
| 189 |
+
prompt_messages.append({"role": "system", "content": system_context})
|
| 190 |
+
prompt_messages.extend(self._messages)
|
| 191 |
+
|
| 192 |
+
prompt = _tokenizer.apply_chat_template(
|
| 193 |
+
prompt_messages,
|
| 194 |
+
tokenize=False,
|
| 195 |
+
add_generation_prompt=True,
|
| 196 |
+
)
|
| 197 |
+
return prompt
|
| 198 |
+
|
| 199 |
+
def record(self, response: str, input_tokens: int, output_tokens: int):
|
| 200 |
+
"""CPU side: record response and update state.
|
| 201 |
+
|
| 202 |
+
Call this AFTER GPU inference.
|
| 203 |
+
"""
|
| 204 |
+
self._messages.append({"role": "assistant", "content": response})
|
| 205 |
+
self._total_input_tokens += input_tokens
|
| 206 |
+
self._total_output_tokens += output_tokens
|
| 207 |
+
|
| 208 |
+
def chat(self, user_message: str, max_new_tokens: int = 512) -> str:
|
| 209 |
+
"""Convenience: prepare + generate + record in one call.
|
| 210 |
+
|
| 211 |
+
For non-ZeroGPU use (local testing, dedicated GPU).
|
| 212 |
+
For ZeroGPU, use prepare()/generate()/record() separately.
|
| 213 |
+
"""
|
| 214 |
+
prompt = self.prepare(user_message)
|
| 215 |
+
start = time.time()
|
| 216 |
+
response, in_tok, out_tok = generate(prompt, max_new_tokens=max_new_tokens)
|
| 217 |
+
self._total_time += time.time() - start
|
| 218 |
+
self.record(response, in_tok, out_tok)
|
| 219 |
+
return response
|
| 220 |
+
|
| 221 |
+
def reset(self):
|
| 222 |
+
"""Reset conversation state. Model stays loaded."""
|
| 223 |
+
self._messages = []
|
| 224 |
+
self._turn_count = 0
|
| 225 |
+
self._total_input_tokens = 0
|
| 226 |
+
self._total_output_tokens = 0
|
| 227 |
+
self._total_time = 0.0
|
| 228 |
+
self._nuwave_time = 0.0
|
| 229 |
+
if self._kiss:
|
| 230 |
+
from nuwave.kiss import KISSFilter
|
| 231 |
+
self._kiss = KISSFilter(self._kiss._config)
|
| 232 |
+
if self._pith:
|
| 233 |
+
from nuwave.pith import PithPipeline
|
| 234 |
+
self._pith = PithPipeline(self._pith._config)
|
| 235 |
+
|
| 236 |
+
def stats(self) -> Dict[str, Any]:
|
| 237 |
+
result = {
|
| 238 |
+
"model": self.model_name,
|
| 239 |
+
"nuwave_enabled": self.nuwave_enabled,
|
| 240 |
+
"turns": self._turn_count,
|
| 241 |
+
"total_input_tokens": self._total_input_tokens,
|
| 242 |
+
"total_output_tokens": self._total_output_tokens,
|
| 243 |
+
"total_time": round(self._total_time, 3),
|
| 244 |
+
"nuwave_overhead": round(self._nuwave_time, 3),
|
| 245 |
+
"avg_tokens_per_turn": self._total_input_tokens // max(self._turn_count, 1),
|
| 246 |
+
}
|
| 247 |
+
if self._kiss:
|
| 248 |
+
result["kiss"] = self._kiss.stats.to_dict()
|
| 249 |
+
if self._pith:
|
| 250 |
+
result["pith"] = self._pith.stats.to_dict()
|
| 251 |
+
return result
|
nuwave/benchmarks/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""NuWave benchmarks — A/B testing and compound curve measurement."""
|
| 2 |
+
from nuwave.benchmarks.harness import BenchmarkHarness
|
nuwave/benchmarks/harness.py
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Benchmark Harness — A/B Testing NuWave vs Baseline
|
| 3 |
+
|
| 4 |
+
Runs the same conversation through two adapters:
|
| 5 |
+
A: Stock model (no NuWave)
|
| 6 |
+
B: Same model + NuWave (KISS + Pith)
|
| 7 |
+
|
| 8 |
+
Measures context efficiency, token usage, and quality over N turns.
|
| 9 |
+
Produces the compound curve — NuWave's efficiency improving over time.
|
| 10 |
+
|
| 11 |
+
# ---- Changelog ----
|
| 12 |
+
# [2026-03-28] Claude Code (Opus 4.6) — Initial harness
|
| 13 |
+
# What: A/B benchmark for HuggingFace demo
|
| 14 |
+
# Why: Demonstrate NuWave compound returns empirically
|
| 15 |
+
# How: Run same conversation through both adapters, compare metrics per turn
|
| 16 |
+
# -------------------
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
from __future__ import annotations
|
| 20 |
+
|
| 21 |
+
import json
|
| 22 |
+
import logging
|
| 23 |
+
import time
|
| 24 |
+
from dataclasses import dataclass, field
|
| 25 |
+
from typing import Any, Dict, List, Optional, Callable
|
| 26 |
+
|
| 27 |
+
logger = logging.getLogger("nuwave.benchmark")
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
@dataclass
|
| 31 |
+
class TurnResult:
|
| 32 |
+
"""Result from a single conversation turn."""
|
| 33 |
+
turn: int
|
| 34 |
+
prompt: str
|
| 35 |
+
response_a: str # baseline
|
| 36 |
+
response_b: str # nuwave
|
| 37 |
+
tokens_a: int
|
| 38 |
+
tokens_b: int
|
| 39 |
+
time_a: float
|
| 40 |
+
time_b: float
|
| 41 |
+
kiss_mode: str = ""
|
| 42 |
+
kiss_skip_rate: float = 0.0
|
| 43 |
+
pith_l1_size: int = 0
|
| 44 |
+
pith_avg_heat: float = 0.0
|
| 45 |
+
tokens_saved: int = 0
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
@dataclass
|
| 49 |
+
class BenchmarkResult:
|
| 50 |
+
"""Complete benchmark result."""
|
| 51 |
+
model_name: str
|
| 52 |
+
total_turns: int
|
| 53 |
+
turns: List[TurnResult]
|
| 54 |
+
stats_a: Dict[str, Any] # baseline stats
|
| 55 |
+
stats_b: Dict[str, Any] # nuwave stats
|
| 56 |
+
|
| 57 |
+
def summary(self) -> Dict[str, Any]:
|
| 58 |
+
total_tokens_a = sum(t.tokens_a for t in self.turns)
|
| 59 |
+
total_tokens_b = sum(t.tokens_b for t in self.turns)
|
| 60 |
+
total_time_a = sum(t.time_a for t in self.turns)
|
| 61 |
+
total_time_b = sum(t.time_b for t in self.turns)
|
| 62 |
+
total_saved = sum(t.tokens_saved for t in self.turns)
|
| 63 |
+
|
| 64 |
+
return {
|
| 65 |
+
"model": self.model_name,
|
| 66 |
+
"turns": self.total_turns,
|
| 67 |
+
"baseline_tokens": total_tokens_a,
|
| 68 |
+
"nuwave_tokens": total_tokens_b,
|
| 69 |
+
"tokens_saved": total_saved,
|
| 70 |
+
"token_efficiency": round(total_saved / max(total_tokens_a, 1), 4),
|
| 71 |
+
"baseline_time": round(total_time_a, 3),
|
| 72 |
+
"nuwave_time": round(total_time_b, 3),
|
| 73 |
+
"time_overhead": round((total_time_b - total_time_a) / max(total_time_a, 1), 4),
|
| 74 |
+
"final_kiss_skip_rate": self.turns[-1].kiss_skip_rate if self.turns else 0,
|
| 75 |
+
"final_pith_l1_size": self.turns[-1].pith_l1_size if self.turns else 0,
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
def compound_curve(self) -> List[Dict[str, Any]]:
|
| 79 |
+
"""Per-turn efficiency data for plotting the compound curve."""
|
| 80 |
+
curve = []
|
| 81 |
+
cumulative_saved = 0
|
| 82 |
+
cumulative_total = 0
|
| 83 |
+
for t in self.turns:
|
| 84 |
+
cumulative_saved += t.tokens_saved
|
| 85 |
+
cumulative_total += t.tokens_a
|
| 86 |
+
curve.append({
|
| 87 |
+
"turn": t.turn,
|
| 88 |
+
"kiss_skip_rate": t.kiss_skip_rate,
|
| 89 |
+
"pith_l1_size": t.pith_l1_size,
|
| 90 |
+
"pith_avg_heat": t.pith_avg_heat,
|
| 91 |
+
"tokens_saved_this_turn": t.tokens_saved,
|
| 92 |
+
"cumulative_efficiency": round(
|
| 93 |
+
cumulative_saved / max(cumulative_total, 1), 4
|
| 94 |
+
),
|
| 95 |
+
"tokens_a": t.tokens_a,
|
| 96 |
+
"tokens_b": t.tokens_b,
|
| 97 |
+
})
|
| 98 |
+
return curve
|
| 99 |
+
|
| 100 |
+
def to_json(self, path: str):
|
| 101 |
+
"""Save full results to JSON."""
|
| 102 |
+
data = {
|
| 103 |
+
"summary": self.summary(),
|
| 104 |
+
"compound_curve": self.compound_curve(),
|
| 105 |
+
"stats_baseline": self.stats_a,
|
| 106 |
+
"stats_nuwave": self.stats_b,
|
| 107 |
+
}
|
| 108 |
+
with open(path, "w") as f:
|
| 109 |
+
json.dump(data, f, indent=2)
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
class BenchmarkHarness:
|
| 113 |
+
"""A/B benchmark harness for NuWave vs baseline.
|
| 114 |
+
|
| 115 |
+
Usage:
|
| 116 |
+
harness = BenchmarkHarness("microsoft/Phi-4-mini")
|
| 117 |
+
result = harness.run(conversations)
|
| 118 |
+
print(result.summary())
|
| 119 |
+
result.to_json("benchmark_results.json")
|
| 120 |
+
"""
|
| 121 |
+
|
| 122 |
+
def __init__(
|
| 123 |
+
self,
|
| 124 |
+
model_name: str,
|
| 125 |
+
system_prompt: str = "",
|
| 126 |
+
device: str = "auto",
|
| 127 |
+
):
|
| 128 |
+
self.model_name = model_name
|
| 129 |
+
self.system_prompt = system_prompt
|
| 130 |
+
self.device = device
|
| 131 |
+
|
| 132 |
+
def run(
|
| 133 |
+
self,
|
| 134 |
+
conversations: List[str],
|
| 135 |
+
max_new_tokens: int = 256,
|
| 136 |
+
) -> BenchmarkResult:
|
| 137 |
+
"""Run A/B benchmark.
|
| 138 |
+
|
| 139 |
+
Args:
|
| 140 |
+
conversations: List of user messages to send sequentially
|
| 141 |
+
max_new_tokens: Max tokens per response
|
| 142 |
+
|
| 143 |
+
Returns:
|
| 144 |
+
BenchmarkResult with per-turn data and compound curve
|
| 145 |
+
"""
|
| 146 |
+
from nuwave.adapters.huggingface import HuggingFaceAdapter
|
| 147 |
+
|
| 148 |
+
logger.info("Starting A/B benchmark: %s (%d turns)", self.model_name, len(conversations))
|
| 149 |
+
|
| 150 |
+
# A: Baseline (no NuWave)
|
| 151 |
+
adapter_a = HuggingFaceAdapter(
|
| 152 |
+
self.model_name,
|
| 153 |
+
nuwave_enabled=False,
|
| 154 |
+
device=self.device,
|
| 155 |
+
system_prompt=self.system_prompt,
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
# B: NuWave enabled
|
| 159 |
+
adapter_b = HuggingFaceAdapter(
|
| 160 |
+
self.model_name,
|
| 161 |
+
nuwave_enabled=True,
|
| 162 |
+
device=self.device,
|
| 163 |
+
system_prompt=self.system_prompt,
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
turns = []
|
| 167 |
+
|
| 168 |
+
for i, prompt in enumerate(conversations):
|
| 169 |
+
turn_num = i + 1
|
| 170 |
+
logger.info("Turn %d/%d: %s...", turn_num, len(conversations), prompt[:50])
|
| 171 |
+
|
| 172 |
+
# Baseline
|
| 173 |
+
t0 = time.time()
|
| 174 |
+
response_a = adapter_a.chat(prompt, max_new_tokens=max_new_tokens)
|
| 175 |
+
time_a = time.time() - t0
|
| 176 |
+
stats_a_now = adapter_a.stats()
|
| 177 |
+
|
| 178 |
+
# NuWave
|
| 179 |
+
t0 = time.time()
|
| 180 |
+
response_b = adapter_b.chat(prompt, max_new_tokens=max_new_tokens)
|
| 181 |
+
time_b = time.time() - t0
|
| 182 |
+
stats_b_now = adapter_b.stats()
|
| 183 |
+
|
| 184 |
+
# Extract KISS/Pith metrics
|
| 185 |
+
kiss_stats = stats_b_now.get("kiss", {})
|
| 186 |
+
pith_stats = stats_b_now.get("pith", {})
|
| 187 |
+
|
| 188 |
+
turn = TurnResult(
|
| 189 |
+
turn=turn_num,
|
| 190 |
+
prompt=prompt,
|
| 191 |
+
response_a=response_a,
|
| 192 |
+
response_b=response_b,
|
| 193 |
+
tokens_a=stats_a_now.get("total_input_tokens", 0),
|
| 194 |
+
tokens_b=stats_b_now.get("total_input_tokens", 0),
|
| 195 |
+
time_a=time_a,
|
| 196 |
+
time_b=time_b,
|
| 197 |
+
kiss_mode=kiss_stats.get("kiss_mode", ""),
|
| 198 |
+
kiss_skip_rate=kiss_stats.get("skip_rate", 0),
|
| 199 |
+
pith_l1_size=pith_stats.get("l1_current_size", 0),
|
| 200 |
+
pith_avg_heat=pith_stats.get("l1_avg_heat", 0),
|
| 201 |
+
tokens_saved=kiss_stats.get("tokens_saved", 0),
|
| 202 |
+
)
|
| 203 |
+
turns.append(turn)
|
| 204 |
+
|
| 205 |
+
logger.info(
|
| 206 |
+
" A: %d tokens, %.2fs | B: %d tokens, %.2fs (KISS skip: %.1f%%)",
|
| 207 |
+
turn.tokens_a, time_a,
|
| 208 |
+
turn.tokens_b, time_b,
|
| 209 |
+
turn.kiss_skip_rate * 100,
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
result = BenchmarkResult(
|
| 213 |
+
model_name=self.model_name,
|
| 214 |
+
total_turns=len(conversations),
|
| 215 |
+
turns=turns,
|
| 216 |
+
stats_a=adapter_a.stats(),
|
| 217 |
+
stats_b=adapter_b.stats(),
|
| 218 |
+
)
|
| 219 |
+
|
| 220 |
+
logger.info("Benchmark complete: %s", result.summary())
|
| 221 |
+
return result
|
nuwave/demo/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""NuWave Gradio demo for HuggingFace Spaces."""
|
nuwave/kiss/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""KISS — Keep Input Simple, Substrate. NuWave Layer 1."""
|
| 2 |
+
from nuwave.kiss.filter import KISSFilter, KISSConfig
|
nuwave/kiss/filter.py
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
KISS Filter — Keep Input Simple, Substrate
|
| 3 |
+
|
| 4 |
+
Manages what reaches the model's context window. Tracks conversation
|
| 5 |
+
history and determines what context the model actually needs.
|
| 6 |
+
|
| 7 |
+
KISS does NOT classify input (Law 7). It removes redundancy, not meaning.
|
| 8 |
+
|
| 9 |
+
Key insight: in multi-turn conversation, the system context is the same
|
| 10 |
+
every turn (redundant), early conversation history has already been
|
| 11 |
+
processed (redundant), and only the recent messages + new context are
|
| 12 |
+
genuinely novel. KISS separates these and compresses accordingly.
|
| 13 |
+
|
| 14 |
+
# ---- Changelog ----
|
| 15 |
+
# [2026-03-29] Claude Code (Opus 4.6) — Fixed delta detection
|
| 16 |
+
# What: Separate system context delta from message delta. Compress
|
| 17 |
+
# old conversation history. Only pass genuinely novel context.
|
| 18 |
+
# Why: Hashing full message history meant every turn was "different"
|
| 19 |
+
# because new messages got added. KISS never triggered.
|
| 20 |
+
# How: Track system context hash separately. Compress old messages
|
| 21 |
+
# into summary. Recent window passes verbatim. System context
|
| 22 |
+
# skipped when unchanged.
|
| 23 |
+
# [2026-03-28] Claude Code (Opus 4.6) — Initial implementation
|
| 24 |
+
# -------------------
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
from __future__ import annotations
|
| 28 |
+
|
| 29 |
+
import hashlib
|
| 30 |
+
import logging
|
| 31 |
+
import time
|
| 32 |
+
from dataclasses import dataclass, field
|
| 33 |
+
from typing import Any, Dict, List, Optional
|
| 34 |
+
|
| 35 |
+
logger = logging.getLogger("nuwave.kiss")
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
@dataclass
|
| 39 |
+
class KISSConfig:
|
| 40 |
+
"""KISS filter configuration."""
|
| 41 |
+
# Warmup: pass everything raw
|
| 42 |
+
warmup_turns: int = 3
|
| 43 |
+
|
| 44 |
+
# Recent window: how many recent messages to always pass verbatim
|
| 45 |
+
recent_window: int = 6 # 3 user + 3 assistant turns
|
| 46 |
+
|
| 47 |
+
# Force full context refresh every N turns (GOP boundary)
|
| 48 |
+
force_full_every: int = 20
|
| 49 |
+
|
| 50 |
+
# System context: skip if unchanged from last turn
|
| 51 |
+
skip_unchanged_system: bool = True
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
@dataclass
|
| 55 |
+
class KISSStats:
|
| 56 |
+
"""Running statistics for KISS filtering."""
|
| 57 |
+
total_turns: int = 0
|
| 58 |
+
system_skipped: int = 0
|
| 59 |
+
history_compressed: int = 0
|
| 60 |
+
full_passed: int = 0
|
| 61 |
+
warmup_passed: int = 0
|
| 62 |
+
tokens_saved: int = 0
|
| 63 |
+
tokens_total: int = 0
|
| 64 |
+
messages_compressed: int = 0 # how many old messages were summarized
|
| 65 |
+
|
| 66 |
+
@property
|
| 67 |
+
def skip_rate(self) -> float:
|
| 68 |
+
if self.total_turns <= 0:
|
| 69 |
+
return 0.0
|
| 70 |
+
return self.system_skipped / max(self.total_turns, 1)
|
| 71 |
+
|
| 72 |
+
@property
|
| 73 |
+
def efficiency(self) -> float:
|
| 74 |
+
return self.tokens_saved / max(self.tokens_total, 1)
|
| 75 |
+
|
| 76 |
+
@property
|
| 77 |
+
def compression_rate(self) -> float:
|
| 78 |
+
return self.messages_compressed / max(self.total_turns, 1)
|
| 79 |
+
|
| 80 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 81 |
+
return {
|
| 82 |
+
"total_turns": self.total_turns,
|
| 83 |
+
"system_skipped": self.system_skipped,
|
| 84 |
+
"history_compressed": self.history_compressed,
|
| 85 |
+
"full_passed": self.full_passed,
|
| 86 |
+
"warmup_passed": self.warmup_passed,
|
| 87 |
+
"skip_rate": round(self.skip_rate, 4),
|
| 88 |
+
"efficiency": round(self.efficiency, 4),
|
| 89 |
+
"tokens_saved": self.tokens_saved,
|
| 90 |
+
"tokens_total": self.tokens_total,
|
| 91 |
+
"messages_compressed": self.messages_compressed,
|
| 92 |
+
"compression_rate": round(self.compression_rate, 4),
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
class KISSFilter:
|
| 97 |
+
"""KISS filter for context window optimization.
|
| 98 |
+
|
| 99 |
+
Three things happen every turn:
|
| 100 |
+
1. System context: same as last turn? Skip it (delta gate).
|
| 101 |
+
2. Old messages: beyond the recent window? Compress to summary.
|
| 102 |
+
3. Recent messages: pass verbatim — the model needs these.
|
| 103 |
+
|
| 104 |
+
The result: the model sees a summary of old conversation + recent
|
| 105 |
+
messages in full + system context only when it changes. Token usage
|
| 106 |
+
grows logarithmically, not linearly.
|
| 107 |
+
"""
|
| 108 |
+
|
| 109 |
+
def __init__(self, config: KISSConfig = None):
|
| 110 |
+
self._config = config or KISSConfig()
|
| 111 |
+
self._last_system_hash: Optional[str] = None
|
| 112 |
+
self._since_full: int = 0
|
| 113 |
+
self._turn_count: int = 0
|
| 114 |
+
self.stats = KISSStats()
|
| 115 |
+
|
| 116 |
+
def filter_context(
|
| 117 |
+
self,
|
| 118 |
+
messages: List[Dict[str, str]],
|
| 119 |
+
system_context: str = "",
|
| 120 |
+
) -> Dict[str, Any]:
|
| 121 |
+
"""Filter context for the next model call.
|
| 122 |
+
|
| 123 |
+
Args:
|
| 124 |
+
messages: Full conversation history
|
| 125 |
+
system_context: System prompt / substrate context
|
| 126 |
+
|
| 127 |
+
Returns:
|
| 128 |
+
Dict with 'system_context' (filtered), 'kiss_mode', 'kiss_meta'
|
| 129 |
+
"""
|
| 130 |
+
self._turn_count += 1
|
| 131 |
+
self.stats.total_turns += 1
|
| 132 |
+
total_tokens = sum(len(m.get("content", "").split()) for m in messages)
|
| 133 |
+
total_tokens += len(system_context.split()) if system_context else 0
|
| 134 |
+
self.stats.tokens_total += total_tokens
|
| 135 |
+
|
| 136 |
+
# Warmup — pass everything raw
|
| 137 |
+
if self._turn_count <= self._config.warmup_turns:
|
| 138 |
+
self._last_system_hash = self._hash(system_context)
|
| 139 |
+
self.stats.warmup_passed += 1
|
| 140 |
+
self.stats.full_passed += 1
|
| 141 |
+
return {
|
| 142 |
+
"system_context": system_context,
|
| 143 |
+
"kiss_mode": "full",
|
| 144 |
+
"kiss_meta": {"reason": "warmup", "turn": self._turn_count},
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
# Forced full refresh (GOP boundary)
|
| 148 |
+
self._since_full += 1
|
| 149 |
+
if self._since_full >= self._config.force_full_every:
|
| 150 |
+
self._since_full = 0
|
| 151 |
+
self._last_system_hash = self._hash(system_context)
|
| 152 |
+
self.stats.full_passed += 1
|
| 153 |
+
return {
|
| 154 |
+
"system_context": system_context,
|
| 155 |
+
"kiss_mode": "full",
|
| 156 |
+
"kiss_meta": {"reason": "gop_refresh"},
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
# 1. System context delta gate
|
| 160 |
+
current_sys_hash = self._hash(system_context)
|
| 161 |
+
system_changed = current_sys_hash != self._last_system_hash
|
| 162 |
+
self._last_system_hash = current_sys_hash
|
| 163 |
+
|
| 164 |
+
filtered_system = system_context if system_changed else ""
|
| 165 |
+
if not system_changed:
|
| 166 |
+
self.stats.system_skipped += 1
|
| 167 |
+
sys_tokens = len(system_context.split()) if system_context else 0
|
| 168 |
+
self.stats.tokens_saved += sys_tokens
|
| 169 |
+
|
| 170 |
+
# 2. History compression — summarize old messages
|
| 171 |
+
n_messages = len(messages)
|
| 172 |
+
recent_window = self._config.recent_window
|
| 173 |
+
compressed_count = 0
|
| 174 |
+
|
| 175 |
+
if n_messages > recent_window:
|
| 176 |
+
old_messages = messages[:n_messages - recent_window]
|
| 177 |
+
old_token_count = sum(len(m.get("content", "").split()) for m in old_messages)
|
| 178 |
+
|
| 179 |
+
# Build compact summary of old conversation
|
| 180 |
+
summary_parts = []
|
| 181 |
+
for m in old_messages:
|
| 182 |
+
role = m.get("role", "unknown")
|
| 183 |
+
content = m.get("content", "")
|
| 184 |
+
# First sentence or first 60 chars
|
| 185 |
+
short = content.split(".")[0][:60]
|
| 186 |
+
if len(content) > 60:
|
| 187 |
+
short += "..."
|
| 188 |
+
summary_parts.append(f"{role}: {short}")
|
| 189 |
+
|
| 190 |
+
summary = "[Earlier conversation: " + " | ".join(summary_parts) + "]"
|
| 191 |
+
summary_tokens = len(summary.split())
|
| 192 |
+
tokens_saved = max(0, old_token_count - summary_tokens)
|
| 193 |
+
compressed_count = len(old_messages)
|
| 194 |
+
|
| 195 |
+
self.stats.tokens_saved += tokens_saved
|
| 196 |
+
self.stats.history_compressed += 1
|
| 197 |
+
self.stats.messages_compressed += compressed_count
|
| 198 |
+
|
| 199 |
+
# Prepend summary to system context
|
| 200 |
+
if filtered_system:
|
| 201 |
+
filtered_system = summary + "\n\n" + filtered_system
|
| 202 |
+
else:
|
| 203 |
+
filtered_system = summary
|
| 204 |
+
|
| 205 |
+
kiss_mode = "compressed"
|
| 206 |
+
else:
|
| 207 |
+
kiss_mode = "sparse" if not system_changed else "full"
|
| 208 |
+
|
| 209 |
+
return {
|
| 210 |
+
"system_context": filtered_system,
|
| 211 |
+
"kiss_mode": kiss_mode,
|
| 212 |
+
"kiss_meta": {
|
| 213 |
+
"system_changed": system_changed,
|
| 214 |
+
"messages_total": n_messages,
|
| 215 |
+
"messages_compressed": compressed_count,
|
| 216 |
+
"recent_window": min(recent_window, n_messages),
|
| 217 |
+
},
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
@staticmethod
|
| 221 |
+
def _hash(content: str) -> str:
|
| 222 |
+
return hashlib.sha256(content.encode()).hexdigest()[:16]
|
nuwave/lenia_splat.py
ADDED
|
@@ -0,0 +1,328 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Lenia Dynamics on Gaussian Splats
|
| 3 |
+
=================================
|
| 4 |
+
Adapted from UniAI's LeniaEngine to operate on splat parameters
|
| 5 |
+
(mu, sigma, alpha) instead of raw weight values.
|
| 6 |
+
|
| 7 |
+
Key insight: Gaussian splats ARE Lenia kernels. The same structure
|
| 8 |
+
serves as both weight representation and dynamics engine. A splat's
|
| 9 |
+
sigma IS its neighborhood, its alpha IS its state, and its mu IS
|
| 10 |
+
its position in weight space.
|
| 11 |
+
|
| 12 |
+
Lenia growth function operates on the neighborhood potential
|
| 13 |
+
between splats. Splats that are near each other interact:
|
| 14 |
+
- Their amplitudes modulate based on the growth function
|
| 15 |
+
- Their positions drift based on gradient of the potential field
|
| 16 |
+
- Their spreads adapt based on local reconstruction error
|
| 17 |
+
|
| 18 |
+
This means the "compression" is alive — it reshapes itself
|
| 19 |
+
continuously based on what the model is processing.
|
| 20 |
+
|
| 21 |
+
# ---- Changelog ----
|
| 22 |
+
# [2026-04-05] Claude Code (Opus 4.6) — Initial implementation
|
| 23 |
+
# What: Lenia dynamics adapted for Gaussian splat parameters
|
| 24 |
+
# Why: Prove splats can evolve via continuous dynamics
|
| 25 |
+
# -------------------
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
from __future__ import annotations
|
| 29 |
+
|
| 30 |
+
import logging
|
| 31 |
+
import time
|
| 32 |
+
from dataclasses import dataclass, field
|
| 33 |
+
from typing import Any, Dict, List, Optional
|
| 34 |
+
|
| 35 |
+
import torch
|
| 36 |
+
import torch.nn.functional as F
|
| 37 |
+
|
| 38 |
+
from nuwave.splat_engine import GaussianSplats
|
| 39 |
+
|
| 40 |
+
logger = logging.getLogger("uniai.lenia_splat")
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
@dataclass
|
| 44 |
+
class LeniaSplatConfig:
|
| 45 |
+
"""Configuration for Lenia dynamics on splats."""
|
| 46 |
+
|
| 47 |
+
# Growth function parameters (from proven VPS config)
|
| 48 |
+
growth_mu: float = 0.15 # target neighborhood potential
|
| 49 |
+
growth_sigma: float = 0.015 # growth function width
|
| 50 |
+
growth_scale: float = 0.001 # dt — how fast splats change per step
|
| 51 |
+
|
| 52 |
+
# Splat interaction radius (in weight-space units)
|
| 53 |
+
interaction_radius: float = 5.0
|
| 54 |
+
|
| 55 |
+
# Position dynamics
|
| 56 |
+
position_lr: float = 0.0003 # how fast splats drift
|
| 57 |
+
sigma_lr: float = 0.0001 # how fast spreads adapt
|
| 58 |
+
|
| 59 |
+
# Activation coupling — the resource (from ProtoUniBrain pattern)
|
| 60 |
+
# Higher activation through a weight region = stronger Lenia dynamics there
|
| 61 |
+
activation_coupling: float = 2.0
|
| 62 |
+
|
| 63 |
+
# Safety
|
| 64 |
+
max_alpha_delta: float = 0.02 # max amplitude change per step
|
| 65 |
+
max_position_delta: float = 0.3 # max position drift per step
|
| 66 |
+
min_sigma: float = 0.1 # don't let splats collapse to points
|
| 67 |
+
max_sigma: float = 10.0 # don't let splats blow up
|
| 68 |
+
|
| 69 |
+
# Mass conservation
|
| 70 |
+
conserve_mass: bool = True # preserve total |alpha| per layer
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
@dataclass
|
| 74 |
+
class SplatDynamicsState:
|
| 75 |
+
"""Runtime state of splat Lenia dynamics."""
|
| 76 |
+
step_count: int = 0
|
| 77 |
+
total_time_ms: float = 0.0
|
| 78 |
+
alpha_deltas: Dict[str, float] = field(default_factory=dict)
|
| 79 |
+
position_deltas: Dict[str, float] = field(default_factory=dict)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
class LeniaSplatEngine:
|
| 83 |
+
"""
|
| 84 |
+
Applies Lenia dynamics to Gaussian splat parameters.
|
| 85 |
+
|
| 86 |
+
Instead of convolving over a weight grid, we compute
|
| 87 |
+
splat-to-splat interactions directly. Each splat's neighborhood
|
| 88 |
+
potential is the weighted sum of nearby splats' contributions.
|
| 89 |
+
|
| 90 |
+
Usage:
|
| 91 |
+
engine = LeniaSplatEngine(config)
|
| 92 |
+
engine.register_layer("layer_0", splats)
|
| 93 |
+
metrics = engine.step() # evolve all registered splat layers
|
| 94 |
+
"""
|
| 95 |
+
|
| 96 |
+
def __init__(self, config: LeniaSplatConfig = None):
|
| 97 |
+
self.config = config or LeniaSplatConfig()
|
| 98 |
+
self.state = SplatDynamicsState()
|
| 99 |
+
self.layers: Dict[str, GaussianSplats] = {}
|
| 100 |
+
self._initial_mass: Dict[str, float] = {}
|
| 101 |
+
|
| 102 |
+
def register_layer(self, name: str, splats: GaussianSplats):
|
| 103 |
+
"""Register a splat layer for Lenia dynamics."""
|
| 104 |
+
self.layers[name] = splats
|
| 105 |
+
self._initial_mass[name] = splats.alpha.abs().sum().item()
|
| 106 |
+
self._activation_maps: Dict[str, torch.Tensor] = {}
|
| 107 |
+
|
| 108 |
+
def feed_activations(self, name: str, input_vec: torch.Tensor, output_vec: torch.Tensor):
|
| 109 |
+
"""Feed activation flow from a forward pass through this weight layer.
|
| 110 |
+
|
| 111 |
+
Given the input and output vectors of a matmul through the weight matrix,
|
| 112 |
+
compute per-splat activation magnitude: how much signal flowed through
|
| 113 |
+
each splat's region of weight space.
|
| 114 |
+
|
| 115 |
+
This is the resource that modulates Lenia growth — used pathways evolve
|
| 116 |
+
faster, unused pathways decay. Same principle as ProtoUniBrain.
|
| 117 |
+
"""
|
| 118 |
+
splats = self.layers.get(name)
|
| 119 |
+
if splats is None:
|
| 120 |
+
return
|
| 121 |
+
|
| 122 |
+
# Input activations tell us which COLUMNS were active
|
| 123 |
+
# Output activations tell us which ROWS were active
|
| 124 |
+
# A splat at position (r, c) gets activation proportional to
|
| 125 |
+
# how active its row AND column were
|
| 126 |
+
|
| 127 |
+
# Normalize to [0, 1]
|
| 128 |
+
in_mag = input_vec.abs().float()
|
| 129 |
+
out_mag = output_vec.abs().float()
|
| 130 |
+
in_mag = in_mag / (in_mag.max() + 1e-8)
|
| 131 |
+
out_mag = out_mag / (out_mag.max() + 1e-8)
|
| 132 |
+
|
| 133 |
+
# Per-splat activation: how active is the region each splat covers?
|
| 134 |
+
# Splat at (mu_r, mu_c) — sample activation at its position
|
| 135 |
+
mu_r = splats.mu[:, 0].long().clamp(0, len(out_mag) - 1)
|
| 136 |
+
mu_c = splats.mu[:, 1].long().clamp(0, len(in_mag) - 1)
|
| 137 |
+
|
| 138 |
+
splat_activation = out_mag[mu_r] * in_mag[mu_c] # (n_splats,)
|
| 139 |
+
self._activation_maps[name] = splat_activation
|
| 140 |
+
|
| 141 |
+
# -----------------------------------------------------------------
|
| 142 |
+
# Growth function (same as proven LeniaEngine)
|
| 143 |
+
# -----------------------------------------------------------------
|
| 144 |
+
|
| 145 |
+
def _growth_function(self, potential: torch.Tensor) -> torch.Tensor:
|
| 146 |
+
"""Lenia growth function: bell curve centered on growth_mu.
|
| 147 |
+
|
| 148 |
+
G(u) = 2 * exp(-((u - mu) / sigma)^2 / 2) - 1
|
| 149 |
+
|
| 150 |
+
Returns [-1, 1]: positive near mu, negative far from mu.
|
| 151 |
+
This IS the learning rule.
|
| 152 |
+
"""
|
| 153 |
+
mu = self.config.growth_mu
|
| 154 |
+
sigma = self.config.growth_sigma
|
| 155 |
+
return 2.0 * torch.exp(-((potential - mu) / sigma) ** 2 / 2) - 1.0
|
| 156 |
+
|
| 157 |
+
# -----------------------------------------------------------------
|
| 158 |
+
# Splat-to-splat interaction
|
| 159 |
+
# -----------------------------------------------------------------
|
| 160 |
+
|
| 161 |
+
def _compute_neighborhood_potential(self, splats: GaussianSplats) -> torch.Tensor:
|
| 162 |
+
"""Compute neighborhood potential for each splat.
|
| 163 |
+
|
| 164 |
+
For each splat i, the potential is the weighted sum of contributions
|
| 165 |
+
from all other splats within interaction_radius:
|
| 166 |
+
|
| 167 |
+
U(i) = sum_{j != i} |alpha_j| * exp(-||mu_i - mu_j||^2 / (2 * R^2))
|
| 168 |
+
|
| 169 |
+
This tells us "how much weight activity is in my neighborhood."
|
| 170 |
+
"""
|
| 171 |
+
n = splats.n_splats
|
| 172 |
+
R = self.config.interaction_radius
|
| 173 |
+
|
| 174 |
+
# Pairwise distances between splat centers
|
| 175 |
+
# mu: (N, 2)
|
| 176 |
+
diff = splats.mu.unsqueeze(0) - splats.mu.unsqueeze(1) # (N, N, 2)
|
| 177 |
+
dist_sq = (diff ** 2).sum(dim=2) # (N, N)
|
| 178 |
+
|
| 179 |
+
# Gaussian interaction kernel (not self-interaction)
|
| 180 |
+
interaction = torch.exp(-dist_sq / (2 * R ** 2))
|
| 181 |
+
interaction.fill_diagonal_(0.0) # no self-interaction
|
| 182 |
+
|
| 183 |
+
# Potential = weighted by neighbor amplitudes
|
| 184 |
+
potential = (interaction * splats.alpha.abs().unsqueeze(0)).sum(dim=1) # (N,)
|
| 185 |
+
|
| 186 |
+
return potential
|
| 187 |
+
|
| 188 |
+
def _compute_position_gradient(self, splats: GaussianSplats, growth: torch.Tensor) -> torch.Tensor:
|
| 189 |
+
"""Compute position drift for each splat based on growth gradients.
|
| 190 |
+
|
| 191 |
+
Splats drift toward regions where their growth would be more positive.
|
| 192 |
+
This is how the compression adapts — splats migrate to where they're needed.
|
| 193 |
+
|
| 194 |
+
Uses the gradient of the potential field: splats with negative growth
|
| 195 |
+
move toward regions with potential closer to growth_mu.
|
| 196 |
+
"""
|
| 197 |
+
n = splats.n_splats
|
| 198 |
+
R = self.config.interaction_radius
|
| 199 |
+
|
| 200 |
+
diff = splats.mu.unsqueeze(0) - splats.mu.unsqueeze(1) # (N, N, 2)
|
| 201 |
+
dist_sq = (diff ** 2).sum(dim=2) # (N, N)
|
| 202 |
+
|
| 203 |
+
interaction = torch.exp(-dist_sq / (2 * R ** 2))
|
| 204 |
+
interaction.fill_diagonal_(0.0)
|
| 205 |
+
|
| 206 |
+
# Gradient of potential w.r.t. position
|
| 207 |
+
# d/d(mu_i) of exp(-||mu_i - mu_j||^2 / 2R^2) = -(mu_i - mu_j)/R^2 * exp(...)
|
| 208 |
+
grad_interaction = -diff / (R ** 2) * interaction.unsqueeze(2) # (N, N, 2)
|
| 209 |
+
|
| 210 |
+
# Weight by neighbor amplitudes and own growth signal
|
| 211 |
+
weighted_grad = (grad_interaction * splats.alpha.abs().unsqueeze(0).unsqueeze(2)).sum(dim=1) # (N, 2)
|
| 212 |
+
|
| 213 |
+
# Modulate by growth: negative growth = move more, positive growth = stay
|
| 214 |
+
move_strength = torch.clamp(-growth, min=0).unsqueeze(1) # (N, 1)
|
| 215 |
+
position_delta = weighted_grad * move_strength * self.config.position_lr
|
| 216 |
+
|
| 217 |
+
return position_delta
|
| 218 |
+
|
| 219 |
+
# -----------------------------------------------------------------
|
| 220 |
+
# Dynamics step
|
| 221 |
+
# -----------------------------------------------------------------
|
| 222 |
+
|
| 223 |
+
@torch.no_grad()
|
| 224 |
+
def step(self) -> Dict[str, Any]:
|
| 225 |
+
"""Apply one Lenia dynamics step to all registered splat layers."""
|
| 226 |
+
start = time.time()
|
| 227 |
+
metrics = {
|
| 228 |
+
'step': self.state.step_count + 1,
|
| 229 |
+
'layers': {},
|
| 230 |
+
'total_alpha_delta': 0.0,
|
| 231 |
+
'total_position_delta': 0.0,
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
for name, splats in self.layers.items():
|
| 235 |
+
layer_metrics = self._step_layer(name, splats)
|
| 236 |
+
metrics['layers'][name] = layer_metrics
|
| 237 |
+
metrics['total_alpha_delta'] += layer_metrics['alpha_delta_mean']
|
| 238 |
+
metrics['total_position_delta'] += layer_metrics['position_delta_mean']
|
| 239 |
+
|
| 240 |
+
elapsed = (time.time() - start) * 1000
|
| 241 |
+
metrics['time_ms'] = elapsed
|
| 242 |
+
self.state.step_count += 1
|
| 243 |
+
self.state.total_time_ms += elapsed
|
| 244 |
+
|
| 245 |
+
return metrics
|
| 246 |
+
|
| 247 |
+
def _step_layer(self, name: str, splats: GaussianSplats) -> Dict[str, float]:
|
| 248 |
+
"""Apply Lenia dynamics to a single splat layer."""
|
| 249 |
+
cfg = self.config
|
| 250 |
+
|
| 251 |
+
# 1. Compute neighborhood potential
|
| 252 |
+
potential = self._compute_neighborhood_potential(splats)
|
| 253 |
+
|
| 254 |
+
# 2. Growth function — THIS IS the learning rule
|
| 255 |
+
# Near growth_mu: positive (reinforce). Far from growth_mu: negative (weaken).
|
| 256 |
+
# Stability emerges from the dynamics themselves. Myelination observes it.
|
| 257 |
+
growth = self._growth_function(potential)
|
| 258 |
+
|
| 259 |
+
# 3. Modulate by activation flow (the resource)
|
| 260 |
+
# Base dynamics always run. Activation BOOSTS used regions.
|
| 261 |
+
# A splat with zero activation still evolves at base rate.
|
| 262 |
+
# A splat with high activation evolves up to (1 + coupling) faster.
|
| 263 |
+
if name in self._activation_maps and cfg.activation_coupling > 0:
|
| 264 |
+
act = self._activation_maps[name]
|
| 265 |
+
boost = torch.tanh(act * cfg.activation_coupling) # [0, ~1]
|
| 266 |
+
growth = growth * (1.0 + boost) # base rate * [1.0, 2.0]
|
| 267 |
+
|
| 268 |
+
# 4. Update amplitudes — direct growth, no artificial damping
|
| 269 |
+
alpha_delta = cfg.growth_scale * growth
|
| 270 |
+
alpha_delta = alpha_delta.clamp(-cfg.max_alpha_delta, cfg.max_alpha_delta)
|
| 271 |
+
splats.alpha += alpha_delta
|
| 272 |
+
|
| 273 |
+
# 5. Update positions — splats drift toward better neighborhoods
|
| 274 |
+
position_delta = self._compute_position_gradient(splats, growth)
|
| 275 |
+
position_delta = position_delta.clamp(-cfg.max_position_delta, cfg.max_position_delta)
|
| 276 |
+
splats.mu += position_delta
|
| 277 |
+
|
| 278 |
+
# 6. Adapt sigma based on local density
|
| 279 |
+
# Dense regions → tighter splats (more precision)
|
| 280 |
+
# Sparse regions → wider splats (more coverage)
|
| 281 |
+
density = potential / (potential.max() + 1e-8) # normalized [0, 1]
|
| 282 |
+
sigma_delta = cfg.sigma_lr * (0.5 - density) # dense = shrink, sparse = grow
|
| 283 |
+
splats.sigma += sigma_delta
|
| 284 |
+
splats.sigma.clamp_(cfg.min_sigma, cfg.max_sigma)
|
| 285 |
+
|
| 286 |
+
# 7. Keep positions in bounds
|
| 287 |
+
splats.mu[:, 0].clamp_(0, splats.rows - 1)
|
| 288 |
+
splats.mu[:, 1].clamp_(0, splats.cols - 1)
|
| 289 |
+
|
| 290 |
+
# 8. Mass conservation
|
| 291 |
+
if cfg.conserve_mass and name in self._initial_mass:
|
| 292 |
+
current_mass = splats.alpha.abs().sum().item()
|
| 293 |
+
target_mass = self._initial_mass[name]
|
| 294 |
+
if current_mass > 0:
|
| 295 |
+
splats.alpha *= (target_mass / current_mass)
|
| 296 |
+
|
| 297 |
+
# Metrics
|
| 298 |
+
alpha_delta_mean = alpha_delta.abs().mean().item()
|
| 299 |
+
position_delta_mean = position_delta.abs().mean().item()
|
| 300 |
+
|
| 301 |
+
self.state.alpha_deltas[name] = alpha_delta_mean
|
| 302 |
+
self.state.position_deltas[name] = position_delta_mean
|
| 303 |
+
|
| 304 |
+
return {
|
| 305 |
+
'alpha_delta_mean': alpha_delta_mean,
|
| 306 |
+
'position_delta_mean': position_delta_mean,
|
| 307 |
+
'sigma_mean': splats.sigma.mean().item(),
|
| 308 |
+
'sigma_std': splats.sigma.std().item(),
|
| 309 |
+
'potential_mean': potential.mean().item(),
|
| 310 |
+
'growth_mean': growth.mean().item(),
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
# -----------------------------------------------------------------
|
| 314 |
+
# Diagnostics
|
| 315 |
+
# -----------------------------------------------------------------
|
| 316 |
+
|
| 317 |
+
def get_summary(self) -> Dict[str, Any]:
|
| 318 |
+
"""Get dynamics state summary."""
|
| 319 |
+
return {
|
| 320 |
+
'steps': self.state.step_count,
|
| 321 |
+
'total_time_ms': self.state.total_time_ms,
|
| 322 |
+
'avg_step_ms': (
|
| 323 |
+
self.state.total_time_ms / self.state.step_count
|
| 324 |
+
if self.state.step_count > 0 else 0
|
| 325 |
+
),
|
| 326 |
+
'num_layers': len(self.layers),
|
| 327 |
+
'total_splats': sum(s.n_splats for s in self.layers.values()),
|
| 328 |
+
}
|
nuwave/organism.py
ADDED
|
@@ -0,0 +1,662 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
NuWave Organism — The Living System
|
| 3 |
+
|
| 4 |
+
Wires the full loop:
|
| 5 |
+
User message → substrate (raw experience, Law 7)
|
| 6 |
+
→ graph.step() (substrate processes)
|
| 7 |
+
→ KISS bucket extracts from River (what changed?)
|
| 8 |
+
→ Pith bucket extracts from River (what's relevant?)
|
| 9 |
+
→ Model generates with focused context
|
| 10 |
+
→ Outcome feeds back into substrate (raw experience, Law 7)
|
| 11 |
+
→ Substrate learns → topology reshapes → next cycle's buckets extract differently
|
| 12 |
+
|
| 13 |
+
KISS and Pith don't read the substrate directly. They extract from
|
| 14 |
+
the River through their own shaped buckets. The substrate is the
|
| 15 |
+
communication protocol (Law 1). Raw experience in, classification
|
| 16 |
+
only at extraction (Law 7).
|
| 17 |
+
|
| 18 |
+
# ---- Changelog ----
|
| 19 |
+
# [2026-04-06] Claude Code (Opus 4.6) — Initial organism wiring
|
| 20 |
+
# What: Full substrate loop for HuggingFace NuWave demo
|
| 21 |
+
# Why: Baseline proved 47.2% with crude string KISS. Real substrate
|
| 22 |
+
# + real KISS/Pith buckets should exceed that significantly.
|
| 23 |
+
# How: ng_lite substrate, ng_embed for embeddings, ng_tract for
|
| 24 |
+
# binary topology. KISS reads StepResult. Pith extracts via
|
| 25 |
+
# spreading activation. Outcomes close the loop.
|
| 26 |
+
# -------------------
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
from __future__ import annotations
|
| 30 |
+
|
| 31 |
+
import logging
|
| 32 |
+
import os
|
| 33 |
+
import sys
|
| 34 |
+
import time
|
| 35 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 36 |
+
|
| 37 |
+
import numpy as np
|
| 38 |
+
|
| 39 |
+
logger = logging.getLogger("nuwave.organism")
|
| 40 |
+
|
| 41 |
+
# Substrate path — added temporarily during import, not permanently
|
| 42 |
+
_substrate_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'substrate')
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class NuWaveOrganism:
|
| 46 |
+
"""The living system. Substrate + KISS bucket + Pith bucket.
|
| 47 |
+
|
| 48 |
+
Usage:
|
| 49 |
+
organism = NuWaveOrganism()
|
| 50 |
+
kiss_context, pith_context = organism.process_input(user_message)
|
| 51 |
+
# ... model generates with pith_context ...
|
| 52 |
+
organism.record_outcome(user_message, response, success=True)
|
| 53 |
+
"""
|
| 54 |
+
|
| 55 |
+
# Hub persistence — survives container rebuilds, sleeps, everything
|
| 56 |
+
HUB_REPO = "Executor-Tyrant-Framework/nuwave-substrate-state"
|
| 57 |
+
HUB_FILENAME = "organism_state.pt"
|
| 58 |
+
|
| 59 |
+
def __init__(self, state_path: str = "/tmp/nuwave_substrate"):
|
| 60 |
+
self._state_path = state_path
|
| 61 |
+
os.makedirs(state_path, exist_ok=True)
|
| 62 |
+
self._local_state_path = os.path.join(state_path, "organism_state.pt")
|
| 63 |
+
self._hf_token = os.environ.get("HF_TOKEN", None)
|
| 64 |
+
|
| 65 |
+
# Initialize substrate
|
| 66 |
+
self._graph = None
|
| 67 |
+
self._embed_fn = None
|
| 68 |
+
self._step_result = None
|
| 69 |
+
self._step_count = 0
|
| 70 |
+
self._node_content: Dict[str, str] = {} # node_id → content for Pith
|
| 71 |
+
|
| 72 |
+
# Amplitude field — per-node complex amplitude for interference dynamics
|
| 73 |
+
# amplitude: how strongly this node contributes (grown by use, decayed by time)
|
| 74 |
+
# phase: oscillation angle (advanced each step, interference depends on alignment)
|
| 75 |
+
self._amplitudes: Dict[str, float] = {} # node_id → |a| in [0, 1]
|
| 76 |
+
self._phases: Dict[str, float] = {} # node_id → θ in [0, 2π]
|
| 77 |
+
self._amplitude_decay: float = 0.05 # per step — halves in ~14 steps
|
| 78 |
+
self._interference_rate: float = 0.15 # how strongly neighbors interfere
|
| 79 |
+
self._amplitude_coupling: float = 0.4 # how strongly activation grows amplitude
|
| 80 |
+
|
| 81 |
+
# Stats
|
| 82 |
+
self.stats = {
|
| 83 |
+
'steps': 0,
|
| 84 |
+
'nodes': 0,
|
| 85 |
+
'synapses': 0,
|
| 86 |
+
'hyperedges': 0,
|
| 87 |
+
'fired_nodes': 0,
|
| 88 |
+
'fired_hyperedges': 0,
|
| 89 |
+
'predictions_confirmed': 0,
|
| 90 |
+
'predictions_surprised': 0,
|
| 91 |
+
'kiss_skips': 0,
|
| 92 |
+
'kiss_passes': 0,
|
| 93 |
+
'pith_l1_size': 0,
|
| 94 |
+
'pith_promotions': 0,
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
self._init_substrate()
|
| 98 |
+
self._restore_state()
|
| 99 |
+
|
| 100 |
+
def _restore_state(self):
|
| 101 |
+
"""Restore persisted state — local first, then hub.
|
| 102 |
+
|
| 103 |
+
Hub persistence survives container rebuilds, sleeps, code pushes.
|
| 104 |
+
Local is faster on warm restart. Hub is the permanent backup.
|
| 105 |
+
"""
|
| 106 |
+
import json
|
| 107 |
+
|
| 108 |
+
# Try local first (warm restart)
|
| 109 |
+
restored = self._try_restore_local()
|
| 110 |
+
if restored:
|
| 111 |
+
return
|
| 112 |
+
|
| 113 |
+
# Try hub (cold restart after rebuild)
|
| 114 |
+
restored = self._try_restore_hub()
|
| 115 |
+
if restored:
|
| 116 |
+
return
|
| 117 |
+
|
| 118 |
+
logger.info("No saved state found — starting fresh organism")
|
| 119 |
+
|
| 120 |
+
def _try_restore_local(self) -> bool:
|
| 121 |
+
"""Restore from local state file."""
|
| 122 |
+
if not os.path.exists(self._local_state_path):
|
| 123 |
+
return False
|
| 124 |
+
try:
|
| 125 |
+
import torch
|
| 126 |
+
state = torch.load(self._local_state_path, map_location="cpu", weights_only=False)
|
| 127 |
+
return self._apply_state(state, source="local")
|
| 128 |
+
except Exception as exc:
|
| 129 |
+
logger.warning("Local restore failed: %s", exc)
|
| 130 |
+
return False
|
| 131 |
+
|
| 132 |
+
def _try_restore_hub(self) -> bool:
|
| 133 |
+
"""Pull state from HuggingFace dataset repo."""
|
| 134 |
+
if not self._hf_token:
|
| 135 |
+
return False
|
| 136 |
+
try:
|
| 137 |
+
from huggingface_hub import hf_hub_download
|
| 138 |
+
local = hf_hub_download(
|
| 139 |
+
self.HUB_REPO, self.HUB_FILENAME,
|
| 140 |
+
repo_type="dataset", local_dir=self._state_path,
|
| 141 |
+
token=self._hf_token,
|
| 142 |
+
)
|
| 143 |
+
import torch
|
| 144 |
+
state = torch.load(local, map_location="cpu", weights_only=False)
|
| 145 |
+
return self._apply_state(state, source="hub")
|
| 146 |
+
except Exception as exc:
|
| 147 |
+
logger.info("No hub state: %s", exc)
|
| 148 |
+
return False
|
| 149 |
+
|
| 150 |
+
def _apply_state(self, state: dict, source: str) -> bool:
|
| 151 |
+
"""Apply a loaded state dict to the organism."""
|
| 152 |
+
try:
|
| 153 |
+
# Graph checkpoint — stored as JSON string
|
| 154 |
+
graph_json = state.get('graph_checkpoint_json')
|
| 155 |
+
if graph_json and self._graph:
|
| 156 |
+
tmp = os.path.join(self._state_path, "_tmp_restore.ckpt")
|
| 157 |
+
with open(tmp, 'w') as f:
|
| 158 |
+
f.write(graph_json)
|
| 159 |
+
self._graph.restore(tmp)
|
| 160 |
+
os.remove(tmp)
|
| 161 |
+
|
| 162 |
+
# Content map
|
| 163 |
+
self._node_content = state.get('node_content', {})
|
| 164 |
+
|
| 165 |
+
# Stats
|
| 166 |
+
saved_stats = state.get('stats', {})
|
| 167 |
+
self.stats.update(saved_stats)
|
| 168 |
+
self._step_count = self.stats.get('steps', 0)
|
| 169 |
+
|
| 170 |
+
# Amplitudes
|
| 171 |
+
self._amplitudes = state.get('amplitudes', {})
|
| 172 |
+
self._phases = state.get('phases', {})
|
| 173 |
+
|
| 174 |
+
logger.info(
|
| 175 |
+
"Organism restored from %s: %d nodes, %d syn, %d content, step %d, %d amplitudes",
|
| 176 |
+
source,
|
| 177 |
+
len(self._graph.nodes) if self._graph else 0,
|
| 178 |
+
len(self._graph.synapses) if self._graph else 0,
|
| 179 |
+
len(self._node_content),
|
| 180 |
+
self._step_count,
|
| 181 |
+
len(self._amplitudes),
|
| 182 |
+
)
|
| 183 |
+
return True
|
| 184 |
+
except Exception as exc:
|
| 185 |
+
logger.warning("State apply failed (%s): %s", source, exc)
|
| 186 |
+
return False
|
| 187 |
+
|
| 188 |
+
def _init_substrate(self):
|
| 189 |
+
"""Initialize the full NeuroGraph SNN substrate.
|
| 190 |
+
|
| 191 |
+
This is the real Graph — firing, Hebbian learning, hyperedges,
|
| 192 |
+
predictions, StepResult. Not NGLite (the Tier 1 notepad).
|
| 193 |
+
KISS and Pith read from this through their buckets.
|
| 194 |
+
|
| 195 |
+
Adds substrate dir to sys.path during import, removes after.
|
| 196 |
+
No permanent path pollution.
|
| 197 |
+
"""
|
| 198 |
+
try:
|
| 199 |
+
# Temporarily add substrate dir for import resolution
|
| 200 |
+
_added = _substrate_dir not in sys.path
|
| 201 |
+
if _added:
|
| 202 |
+
sys.path.insert(0, _substrate_dir)
|
| 203 |
+
|
| 204 |
+
from neuro_foundation import Graph
|
| 205 |
+
|
| 206 |
+
# Remove path after import — no permanent pollution
|
| 207 |
+
if _added and _substrate_dir in sys.path:
|
| 208 |
+
sys.path.remove(_substrate_dir)
|
| 209 |
+
|
| 210 |
+
self._graph = Graph(config={
|
| 211 |
+
"default_threshold": 0.85,
|
| 212 |
+
"decay_rate": 0.97,
|
| 213 |
+
"prime_strength": 1.0,
|
| 214 |
+
"learning_rate": 0.08,
|
| 215 |
+
"surprise_reward_scaling": 1.5,
|
| 216 |
+
})
|
| 217 |
+
logger.info("Substrate initialized: full NeuroGraph SNN")
|
| 218 |
+
except Exception as exc:
|
| 219 |
+
logger.error("NeuroGraph init failed: %s", exc)
|
| 220 |
+
if _substrate_dir in sys.path:
|
| 221 |
+
sys.path.remove(_substrate_dir)
|
| 222 |
+
self._graph = None
|
| 223 |
+
|
| 224 |
+
# Initialize embedding function
|
| 225 |
+
try:
|
| 226 |
+
_added = _substrate_dir not in sys.path
|
| 227 |
+
if _added:
|
| 228 |
+
sys.path.insert(0, _substrate_dir)
|
| 229 |
+
|
| 230 |
+
from ng_embed import embed
|
| 231 |
+
self._embed_fn = embed
|
| 232 |
+
|
| 233 |
+
if _added and _substrate_dir in sys.path:
|
| 234 |
+
sys.path.remove(_substrate_dir)
|
| 235 |
+
|
| 236 |
+
logger.info("Embedding function loaded (ng_embed)")
|
| 237 |
+
except Exception as exc:
|
| 238 |
+
if _substrate_dir in sys.path:
|
| 239 |
+
sys.path.remove(_substrate_dir)
|
| 240 |
+
logger.warning("ng_embed not available, using hash fallback: %s", exc)
|
| 241 |
+
self._embed_fn = self._hash_embed
|
| 242 |
+
|
| 243 |
+
def _hash_embed(self, text: str) -> np.ndarray:
|
| 244 |
+
"""Fallback embedding — deterministic hash to 768-dim vector."""
|
| 245 |
+
import hashlib
|
| 246 |
+
h = hashlib.sha256(text.encode()).digest()
|
| 247 |
+
# Expand hash to 768 dims
|
| 248 |
+
rng = np.random.RandomState(int.from_bytes(h[:4], 'big'))
|
| 249 |
+
return rng.randn(768).astype(np.float32)
|
| 250 |
+
|
| 251 |
+
# -----------------------------------------------------------------
|
| 252 |
+
# The Loop
|
| 253 |
+
# -----------------------------------------------------------------
|
| 254 |
+
|
| 255 |
+
def deposit_experience(self, text: str) -> Optional[str]:
|
| 256 |
+
"""Deposit raw experience into the substrate. Law 7 — unclassified.
|
| 257 |
+
|
| 258 |
+
Creates a node in the SNN for this experience and stimulates it.
|
| 259 |
+
The substrate learns through STDP — nodes that fire together
|
| 260 |
+
wire together. No classification imposed.
|
| 261 |
+
|
| 262 |
+
Returns the node_id.
|
| 263 |
+
"""
|
| 264 |
+
if self._graph is None:
|
| 265 |
+
return None
|
| 266 |
+
|
| 267 |
+
try:
|
| 268 |
+
embedding = self._embed_fn(text)
|
| 269 |
+
node_id = f"exp_{self._step_count}_{hash(text) & 0xFFFF:04x}"
|
| 270 |
+
|
| 271 |
+
# Create node in the SNN
|
| 272 |
+
node = self._graph.create_node(
|
| 273 |
+
node_id=node_id,
|
| 274 |
+
metadata={
|
| 275 |
+
"content": text[:200],
|
| 276 |
+
"step": self._step_count,
|
| 277 |
+
"type": "experience",
|
| 278 |
+
},
|
| 279 |
+
)
|
| 280 |
+
|
| 281 |
+
# Store embedding in metadata for Pith similarity search
|
| 282 |
+
node.metadata['embedding'] = embedding.tolist()
|
| 283 |
+
|
| 284 |
+
# Track for Pith retrieval
|
| 285 |
+
self._node_content[node_id] = text[:200]
|
| 286 |
+
|
| 287 |
+
# Initialize amplitude for new node — starts at 1.0 (just deposited)
|
| 288 |
+
self._amplitudes[node_id] = 1.0
|
| 289 |
+
self._phases[node_id] = float(hash(node_id) % 628) / 100.0 # deterministic [0, 2π]
|
| 290 |
+
|
| 291 |
+
# Interference-based co-stimulation.
|
| 292 |
+
# No threshold. Compute interference between new node and all existing.
|
| 293 |
+
# Constructive interference (aligned phases + similar embeddings) = amplify.
|
| 294 |
+
# Destructive (anti-aligned + dissimilar) = suppress.
|
| 295 |
+
# Stimulation current is proportional to interference result.
|
| 296 |
+
import math
|
| 297 |
+
|
| 298 |
+
self._graph.stimulate(node_id, current=1.5) # new node always fires
|
| 299 |
+
|
| 300 |
+
for existing_id, existing_node in self._graph.nodes.items():
|
| 301 |
+
if existing_id == node_id:
|
| 302 |
+
continue
|
| 303 |
+
existing_emb = existing_node.metadata.get('embedding') if existing_node.metadata else None
|
| 304 |
+
if existing_emb is None:
|
| 305 |
+
continue
|
| 306 |
+
|
| 307 |
+
existing_emb = np.array(existing_emb, dtype=np.float32)
|
| 308 |
+
sim = float(np.dot(embedding, existing_emb) /
|
| 309 |
+
(np.linalg.norm(embedding) * np.linalg.norm(existing_emb) + 1e-8))
|
| 310 |
+
|
| 311 |
+
# Interference: amplitude product × cosine of phase difference
|
| 312 |
+
# Positive = constructive (similar content, aligned phase)
|
| 313 |
+
# Negative = destructive (dissimilar, anti-aligned)
|
| 314 |
+
a_new = self._amplitudes.get(node_id, 1.0)
|
| 315 |
+
a_existing = self._amplitudes.get(existing_id, 0.5)
|
| 316 |
+
phase_new = self._phases.get(node_id, 0.0)
|
| 317 |
+
phase_existing = self._phases.get(existing_id, 0.0)
|
| 318 |
+
|
| 319 |
+
interference = a_new * a_existing * math.cos(phase_new - phase_existing)
|
| 320 |
+
|
| 321 |
+
# Semantic similarity modulates interference direction
|
| 322 |
+
# sim > 0.5: constructive amplified. sim < 0.5: destructive amplified.
|
| 323 |
+
effective_interference = interference * (2.0 * sim - 1.0)
|
| 324 |
+
|
| 325 |
+
# Boost existing node's amplitude by interference
|
| 326 |
+
self._amplitudes[existing_id] = max(0.0, min(1.0,
|
| 327 |
+
a_existing + self._interference_rate * effective_interference))
|
| 328 |
+
|
| 329 |
+
# Stimulate proportional to constructive interference
|
| 330 |
+
# Physics decides who co-fires, not a threshold
|
| 331 |
+
if effective_interference > 0:
|
| 332 |
+
current = 1.5 * effective_interference
|
| 333 |
+
self._graph.stimulate(existing_id, current=current)
|
| 334 |
+
|
| 335 |
+
return node_id
|
| 336 |
+
except Exception as exc:
|
| 337 |
+
logger.debug("Deposit failed: %s", exc)
|
| 338 |
+
return None
|
| 339 |
+
|
| 340 |
+
def step(self) -> Dict[str, Any]:
|
| 341 |
+
"""Run one substrate step. The topology processes.
|
| 342 |
+
|
| 343 |
+
Returns StepResult-like dict with what happened:
|
| 344 |
+
fired nodes, fired hyperedges, predictions, structural changes.
|
| 345 |
+
KISS reads this. Pith reads this.
|
| 346 |
+
"""
|
| 347 |
+
if self._graph is None:
|
| 348 |
+
return {}
|
| 349 |
+
|
| 350 |
+
try:
|
| 351 |
+
step_result = self._graph.step()
|
| 352 |
+
self._step_result = step_result
|
| 353 |
+
self._step_count += 1
|
| 354 |
+
|
| 355 |
+
# StepResult has real dataclass fields — read them directly
|
| 356 |
+
result = {
|
| 357 |
+
'step': self._step_count,
|
| 358 |
+
'fired_nodes': list(step_result.fired_node_ids),
|
| 359 |
+
'fired_hyperedges': list(step_result.fired_hyperedge_ids),
|
| 360 |
+
'predictions_confirmed': step_result.predictions_confirmed,
|
| 361 |
+
'predictions_surprised': step_result.predictions_surprised,
|
| 362 |
+
'synapses_pruned': step_result.synapses_pruned,
|
| 363 |
+
'synapses_sprouted': step_result.synapses_sprouted,
|
| 364 |
+
'timestep': step_result.timestep,
|
| 365 |
+
}
|
| 366 |
+
|
| 367 |
+
# Amplitude dynamics — decay + activation boost for fired nodes
|
| 368 |
+
import math
|
| 369 |
+
for nid in self._amplitudes:
|
| 370 |
+
# Decay all amplitudes
|
| 371 |
+
self._amplitudes[nid] *= (1.0 - self._amplitude_decay)
|
| 372 |
+
|
| 373 |
+
# Advance phase (free drift)
|
| 374 |
+
freq = 0.1 + hash(nid) % 10 * 0.01 # natural frequency per node
|
| 375 |
+
self._phases[nid] = (self._phases.get(nid, 0.0) + freq) % (2 * math.pi)
|
| 376 |
+
|
| 377 |
+
# Fired nodes get amplitude boost (Hebbian — what fires grows)
|
| 378 |
+
for nid in result['fired_nodes']:
|
| 379 |
+
a = self._amplitudes.get(nid, 0.5)
|
| 380 |
+
self._amplitudes[nid] = min(1.0, a + self._amplitude_coupling)
|
| 381 |
+
|
| 382 |
+
# Update stats from live graph
|
| 383 |
+
self.stats['steps'] = self._step_count
|
| 384 |
+
self.stats['nodes'] = len(self._graph.nodes)
|
| 385 |
+
self.stats['synapses'] = len(self._graph.synapses)
|
| 386 |
+
self.stats['hyperedges'] = len(self._graph.hyperedges)
|
| 387 |
+
self.stats['fired_nodes'] = len(result['fired_nodes'])
|
| 388 |
+
self.stats['fired_hyperedges'] = len(result['fired_hyperedges'])
|
| 389 |
+
self.stats['predictions_confirmed'] = result['predictions_confirmed']
|
| 390 |
+
self.stats['predictions_surprised'] = result['predictions_surprised']
|
| 391 |
+
|
| 392 |
+
# Amplitude stats
|
| 393 |
+
if self._amplitudes:
|
| 394 |
+
amps = list(self._amplitudes.values())
|
| 395 |
+
self.stats['avg_amplitude'] = round(sum(amps) / len(amps), 4)
|
| 396 |
+
self.stats['max_amplitude'] = round(max(amps), 4)
|
| 397 |
+
self.stats['min_amplitude'] = round(min(amps), 4)
|
| 398 |
+
|
| 399 |
+
return result
|
| 400 |
+
except Exception as exc:
|
| 401 |
+
logger.debug("Step failed: %s", exc)
|
| 402 |
+
return {}
|
| 403 |
+
|
| 404 |
+
def kiss_extract(self, step_result: Dict) -> Dict[str, Any]:
|
| 405 |
+
"""KISS bucket — extract from the River what changed.
|
| 406 |
+
|
| 407 |
+
Reads the StepResult. Decides: did anything meaningful happen?
|
| 408 |
+
If nothing fired, nothing changed, predictions confirmed = skip.
|
| 409 |
+
If novel activity = pass.
|
| 410 |
+
|
| 411 |
+
This is KISS reading the substrate, not computing its own metrics.
|
| 412 |
+
|
| 413 |
+
Returns:
|
| 414 |
+
'action': 'skip' | 'pass'
|
| 415 |
+
'reason': why
|
| 416 |
+
'novel_nodes': count of fired nodes
|
| 417 |
+
'novel_hyperedges': count of fired hyperedges
|
| 418 |
+
'surprise_ratio': how much the substrate was surprised
|
| 419 |
+
"""
|
| 420 |
+
if not step_result:
|
| 421 |
+
return {'action': 'skip', 'reason': 'no_step_result'}
|
| 422 |
+
|
| 423 |
+
fired_n = len(step_result.get('fired_nodes', []))
|
| 424 |
+
fired_he = len(step_result.get('fired_hyperedges', []))
|
| 425 |
+
confirmed = step_result.get('predictions_confirmed', 0)
|
| 426 |
+
surprised = step_result.get('predictions_surprised', 0)
|
| 427 |
+
pruned = step_result.get('synapses_pruned', 0)
|
| 428 |
+
sprouted = step_result.get('synapses_sprouted', 0)
|
| 429 |
+
|
| 430 |
+
# Nothing happened — skip
|
| 431 |
+
if fired_n == 0 and fired_he == 0 and pruned == 0 and sprouted == 0:
|
| 432 |
+
self.stats['kiss_skips'] += 1
|
| 433 |
+
return {
|
| 434 |
+
'action': 'skip',
|
| 435 |
+
'reason': 'substrate_quiet',
|
| 436 |
+
'novel_nodes': 0,
|
| 437 |
+
'novel_hyperedges': 0,
|
| 438 |
+
'surprise_ratio': 0.0,
|
| 439 |
+
}
|
| 440 |
+
|
| 441 |
+
# Surprise ratio — how much was unexpected
|
| 442 |
+
total_predictions = confirmed + surprised
|
| 443 |
+
surprise_ratio = surprised / total_predictions if total_predictions > 0 else 0.5
|
| 444 |
+
|
| 445 |
+
# Everything was predicted, nothing novel — sparse pass at best
|
| 446 |
+
if surprise_ratio == 0 and sprouted == 0:
|
| 447 |
+
self.stats['kiss_skips'] += 1
|
| 448 |
+
return {
|
| 449 |
+
'action': 'skip',
|
| 450 |
+
'reason': 'all_predicted',
|
| 451 |
+
'novel_nodes': fired_n,
|
| 452 |
+
'novel_hyperedges': fired_he,
|
| 453 |
+
'surprise_ratio': 0.0,
|
| 454 |
+
}
|
| 455 |
+
|
| 456 |
+
# Something novel happened — pass
|
| 457 |
+
self.stats['kiss_passes'] += 1
|
| 458 |
+
return {
|
| 459 |
+
'action': 'pass',
|
| 460 |
+
'reason': 'novel_activity',
|
| 461 |
+
'novel_nodes': fired_n,
|
| 462 |
+
'novel_hyperedges': fired_he,
|
| 463 |
+
'surprise_ratio': round(surprise_ratio, 4),
|
| 464 |
+
'structural_changes': pruned + sprouted,
|
| 465 |
+
}
|
| 466 |
+
|
| 467 |
+
def pith_extract(self, query: str, max_context: int = 10) -> List[str]:
|
| 468 |
+
"""Pith bucket — Born rule extraction from the River.
|
| 469 |
+
|
| 470 |
+
Each node has an amplitude. The query modulates amplitudes through
|
| 471 |
+
interference — similar nodes constructively interfere (amplified),
|
| 472 |
+
dissimilar destructively (suppressed). Born rule (amplitude²)
|
| 473 |
+
determines each node's probability of appearing in context.
|
| 474 |
+
|
| 475 |
+
No thresholds. No top-K. The physics decides.
|
| 476 |
+
|
| 477 |
+
Also uses spreading activation to boost topology-connected nodes.
|
| 478 |
+
Combined score: Born rule (amplitude²) × topology boost.
|
| 479 |
+
"""
|
| 480 |
+
if self._graph is None:
|
| 481 |
+
return []
|
| 482 |
+
|
| 483 |
+
try:
|
| 484 |
+
import math
|
| 485 |
+
query_embedding = self._embed_fn(query)
|
| 486 |
+
|
| 487 |
+
# Compute per-node Born rule score.
|
| 488 |
+
# The query interferes with each node's amplitude:
|
| 489 |
+
# similarity modulates the interference direction,
|
| 490 |
+
# existing amplitude is the base strength.
|
| 491 |
+
scored = []
|
| 492 |
+
|
| 493 |
+
for nid, node in self._graph.nodes.items():
|
| 494 |
+
emb = node.metadata.get('embedding') if node.metadata else None
|
| 495 |
+
if emb is None:
|
| 496 |
+
continue
|
| 497 |
+
|
| 498 |
+
node_emb = np.array(emb, dtype=np.float32)
|
| 499 |
+
sim = float(np.dot(query_embedding, node_emb) /
|
| 500 |
+
(np.linalg.norm(query_embedding) * np.linalg.norm(node_emb) + 1e-8))
|
| 501 |
+
|
| 502 |
+
# Current amplitude and phase
|
| 503 |
+
amp = self._amplitudes.get(nid, 0.5)
|
| 504 |
+
phase = self._phases.get(nid, 0.0)
|
| 505 |
+
|
| 506 |
+
# Query gets its own phase from embedding hash
|
| 507 |
+
query_phase = float(hash(query) % 628) / 100.0
|
| 508 |
+
|
| 509 |
+
# Interference: amplitude × cosine(phase difference) × similarity
|
| 510 |
+
interference = amp * math.cos(phase - query_phase) * sim
|
| 511 |
+
|
| 512 |
+
# Effective amplitude after query interference
|
| 513 |
+
effective_amp = max(0.0, min(1.0, amp + self._interference_rate * interference))
|
| 514 |
+
|
| 515 |
+
# Born rule: probability ∝ amplitude²
|
| 516 |
+
born_score = effective_amp * effective_amp
|
| 517 |
+
|
| 518 |
+
content = self._node_content.get(nid, '')
|
| 519 |
+
if content and born_score > 0.001:
|
| 520 |
+
scored.append((nid, content, born_score))
|
| 521 |
+
|
| 522 |
+
if not scored:
|
| 523 |
+
return []
|
| 524 |
+
|
| 525 |
+
# Topology boost: spread activation from highest-Born nodes
|
| 526 |
+
# and boost whatever the topology connects to them
|
| 527 |
+
scored.sort(key=lambda x: x[2], reverse=True)
|
| 528 |
+
top_ids = [nid for nid, _, _ in scored[:3]]
|
| 529 |
+
|
| 530 |
+
if top_ids:
|
| 531 |
+
try:
|
| 532 |
+
prop_result = self._graph.prime_and_propagate(
|
| 533 |
+
node_ids=top_ids,
|
| 534 |
+
currents=[10.0] * len(top_ids),
|
| 535 |
+
steps=5,
|
| 536 |
+
write_mode=False,
|
| 537 |
+
)
|
| 538 |
+
fired_entries = getattr(prop_result, 'fired_entries', []) if prop_result else []
|
| 539 |
+
topology_fired = {e.node_id for e in fired_entries}
|
| 540 |
+
|
| 541 |
+
# Boost scored nodes that topology also reached
|
| 542 |
+
for i, (nid, content, born) in enumerate(scored):
|
| 543 |
+
if nid in topology_fired:
|
| 544 |
+
scored[i] = (nid, content, born * 1.5) # topology boost
|
| 545 |
+
except Exception:
|
| 546 |
+
pass # propagation failure is non-fatal
|
| 547 |
+
|
| 548 |
+
# Final sort by Born score, take max_context
|
| 549 |
+
scored.sort(key=lambda x: x[2], reverse=True)
|
| 550 |
+
contexts = [content for _, content, _ in scored[:max_context]]
|
| 551 |
+
|
| 552 |
+
self.stats['pith_l1_size'] = len(contexts)
|
| 553 |
+
self.stats['pith_promotions'] += len(contexts)
|
| 554 |
+
|
| 555 |
+
return contexts
|
| 556 |
+
except Exception as exc:
|
| 557 |
+
logger.debug("Pith extract failed: %s", exc)
|
| 558 |
+
return []
|
| 559 |
+
|
| 560 |
+
def record_outcome(self, user_message: str, response: str, success: bool = True):
|
| 561 |
+
"""Close the loop — outcome feeds back into substrate.
|
| 562 |
+
|
| 563 |
+
The response is raw experience (Law 7). The success/failure
|
| 564 |
+
is the outcome signal for Hebbian learning. The substrate
|
| 565 |
+
learns which associations led to good outcomes.
|
| 566 |
+
"""
|
| 567 |
+
if self._graph is None:
|
| 568 |
+
return
|
| 569 |
+
|
| 570 |
+
try:
|
| 571 |
+
# Response as raw experience — create node, stimulate, reward
|
| 572 |
+
response_embedding = self._embed_fn(response)
|
| 573 |
+
node_id = f"resp_{self._step_count}_{hash(response) & 0xFFFF:04x}"
|
| 574 |
+
|
| 575 |
+
node = self._graph.create_node(
|
| 576 |
+
node_id=node_id,
|
| 577 |
+
metadata={
|
| 578 |
+
"content": response[:200],
|
| 579 |
+
"type": "response",
|
| 580 |
+
"step": self._step_count,
|
| 581 |
+
"query": user_message[:100],
|
| 582 |
+
},
|
| 583 |
+
)
|
| 584 |
+
node.metadata['embedding'] = response_embedding.tolist()
|
| 585 |
+
|
| 586 |
+
self._graph.stimulate(node_id, current=1.0)
|
| 587 |
+
self._node_content[node_id] = response[:200]
|
| 588 |
+
|
| 589 |
+
# Reward signal — the outcome closes the loop
|
| 590 |
+
# Strong reward strengthens synapses between co-firing nodes via STDP.
|
| 591 |
+
# Multiple reward injections + steps to really drive the learning.
|
| 592 |
+
reward = 2.0 if success else -1.0
|
| 593 |
+
for _ in range(3):
|
| 594 |
+
self._graph.inject_reward(strength=reward)
|
| 595 |
+
self._graph.step()
|
| 596 |
+
|
| 597 |
+
# Persist — the organism remembers across restarts
|
| 598 |
+
self.save()
|
| 599 |
+
|
| 600 |
+
except Exception as exc:
|
| 601 |
+
logger.debug("Outcome recording failed: %s", exc)
|
| 602 |
+
|
| 603 |
+
def get_stats(self) -> Dict[str, Any]:
|
| 604 |
+
"""Get organism stats for display."""
|
| 605 |
+
return dict(self.stats)
|
| 606 |
+
|
| 607 |
+
def save(self):
|
| 608 |
+
"""Persist all state — single file, local + hub.
|
| 609 |
+
|
| 610 |
+
One consolidated state dict. Saved locally for warm restart.
|
| 611 |
+
Pushed to HF hub for survival across rebuilds.
|
| 612 |
+
"""
|
| 613 |
+
import torch
|
| 614 |
+
|
| 615 |
+
# Build consolidated state
|
| 616 |
+
state = {
|
| 617 |
+
'node_content': self._node_content,
|
| 618 |
+
'stats': dict(self.stats),
|
| 619 |
+
'amplitudes': self._amplitudes,
|
| 620 |
+
'phases': self._phases,
|
| 621 |
+
}
|
| 622 |
+
|
| 623 |
+
# Graph checkpoint — read as raw JSON string
|
| 624 |
+
if self._graph:
|
| 625 |
+
try:
|
| 626 |
+
tmp = os.path.join(self._state_path, "_tmp_save.ckpt")
|
| 627 |
+
self._graph.checkpoint(tmp)
|
| 628 |
+
with open(tmp, 'r') as f:
|
| 629 |
+
state['graph_checkpoint_json'] = f.read()
|
| 630 |
+
os.remove(tmp)
|
| 631 |
+
except Exception as exc:
|
| 632 |
+
logger.debug("Graph checkpoint failed: %s", exc)
|
| 633 |
+
|
| 634 |
+
# Save locally
|
| 635 |
+
try:
|
| 636 |
+
torch.save(state, self._local_state_path)
|
| 637 |
+
except Exception as exc:
|
| 638 |
+
logger.debug("Local save failed: %s", exc)
|
| 639 |
+
|
| 640 |
+
# Push to hub — survives everything
|
| 641 |
+
self._push_to_hub()
|
| 642 |
+
|
| 643 |
+
def _push_to_hub(self):
|
| 644 |
+
"""Push local state file to HF dataset repo."""
|
| 645 |
+
if not self._hf_token or not os.path.exists(self._local_state_path):
|
| 646 |
+
return
|
| 647 |
+
try:
|
| 648 |
+
from huggingface_hub import HfApi, create_repo
|
| 649 |
+
# Ensure repo exists
|
| 650 |
+
create_repo(self.HUB_REPO, repo_type="dataset", exist_ok=True,
|
| 651 |
+
private=True, token=self._hf_token)
|
| 652 |
+
api = HfApi()
|
| 653 |
+
api.upload_file(
|
| 654 |
+
path_or_fileobj=self._local_state_path,
|
| 655 |
+
path_in_repo=self.HUB_FILENAME,
|
| 656 |
+
repo_id=self.HUB_REPO,
|
| 657 |
+
repo_type="dataset",
|
| 658 |
+
token=self._hf_token,
|
| 659 |
+
)
|
| 660 |
+
logger.info("State pushed to hub (%s)", self.HUB_REPO)
|
| 661 |
+
except Exception as exc:
|
| 662 |
+
logger.debug("Hub push failed (non-fatal): %s", exc)
|
nuwave/pith/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pith — The Living Context Lens. NuWave extraction layer."""
|
| 2 |
+
from nuwave.pith.pipeline import PithPipeline, PithConfig
|
nuwave/pith/pipeline.py
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Pith Pipeline — Context Extraction Optimization
|
| 3 |
+
|
| 4 |
+
Manages the model's context window as a three-tier cache hierarchy.
|
| 5 |
+
Stages 1 (Clutter Strip) and 5 (Thermal Eviction) for MVP.
|
| 6 |
+
|
| 7 |
+
The pipeline IS the extraction bucket. It determines what the model
|
| 8 |
+
sees when it reads from the substrate. KISS decides what goes IN.
|
| 9 |
+
Pith decides what comes OUT.
|
| 10 |
+
|
| 11 |
+
# ---- Changelog ----
|
| 12 |
+
# [2026-03-28] Claude Code (Opus 4.6) — Standalone NuWave MVP
|
| 13 |
+
# What: Pith stages 1+5 for HuggingFace deployment
|
| 14 |
+
# Why: Minimum viable extraction — clutter strip + thermal eviction
|
| 15 |
+
# How: Track what's in context, strip redundancy, evict by recency/relevance
|
| 16 |
+
# -------------------
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
from __future__ import annotations
|
| 20 |
+
|
| 21 |
+
import hashlib
|
| 22 |
+
import logging
|
| 23 |
+
import time
|
| 24 |
+
from dataclasses import dataclass, field
|
| 25 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 26 |
+
|
| 27 |
+
logger = logging.getLogger("nuwave.pith")
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
@dataclass
|
| 31 |
+
class PithConfig:
|
| 32 |
+
"""Pith pipeline configuration."""
|
| 33 |
+
# L1 capacity — max context entries actively managed
|
| 34 |
+
l1_max_entries: int = 20
|
| 35 |
+
|
| 36 |
+
# Thermal decay — entries lose heat over time
|
| 37 |
+
decay_rate: float = 0.95 # per turn
|
| 38 |
+
|
| 39 |
+
# Eviction threshold — below this, entry is cold enough to evict
|
| 40 |
+
eviction_threshold: float = 0.1
|
| 41 |
+
|
| 42 |
+
# Victim cache size — recently evicted, fast recovery
|
| 43 |
+
victim_cache_size: int = 5
|
| 44 |
+
|
| 45 |
+
# Clutter: similarity threshold for considering content redundant
|
| 46 |
+
clutter_similarity: float = 0.8
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
@dataclass
|
| 50 |
+
class ContextEntry:
|
| 51 |
+
"""A managed piece of context in the cache hierarchy."""
|
| 52 |
+
content: str
|
| 53 |
+
content_hash: str
|
| 54 |
+
heat: float = 1.0 # thermal state — 1.0 = hot, decays toward 0
|
| 55 |
+
created_at: float = 0.0
|
| 56 |
+
last_accessed: float = 0.0
|
| 57 |
+
access_count: int = 0
|
| 58 |
+
pinned: bool = False # constitutional — cannot be evicted
|
| 59 |
+
|
| 60 |
+
def warm(self):
|
| 61 |
+
"""Re-warm this entry — it was accessed."""
|
| 62 |
+
self.heat = min(1.0, self.heat + 0.3)
|
| 63 |
+
self.last_accessed = time.time()
|
| 64 |
+
self.access_count += 1
|
| 65 |
+
|
| 66 |
+
def cool(self, rate: float):
|
| 67 |
+
"""Apply thermal decay."""
|
| 68 |
+
if not self.pinned:
|
| 69 |
+
self.heat *= rate
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
@dataclass
|
| 73 |
+
class PithStats:
|
| 74 |
+
"""Running statistics for Pith pipeline."""
|
| 75 |
+
total_extractions: int = 0
|
| 76 |
+
clutter_stripped: int = 0
|
| 77 |
+
entries_evicted: int = 0
|
| 78 |
+
victim_recoveries: int = 0
|
| 79 |
+
l1_current_size: int = 0
|
| 80 |
+
l1_avg_heat: float = 0.0
|
| 81 |
+
|
| 82 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 83 |
+
return {
|
| 84 |
+
"total_extractions": self.total_extractions,
|
| 85 |
+
"clutter_stripped": self.clutter_stripped,
|
| 86 |
+
"entries_evicted": self.entries_evicted,
|
| 87 |
+
"victim_recoveries": self.victim_recoveries,
|
| 88 |
+
"l1_current_size": self.l1_current_size,
|
| 89 |
+
"l1_avg_heat": round(self.l1_avg_heat, 4),
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
class PithPipeline:
|
| 94 |
+
"""Context extraction pipeline — the model's cache hierarchy.
|
| 95 |
+
|
| 96 |
+
Manages what the model sees in its system prompt / context window.
|
| 97 |
+
Three tiers:
|
| 98 |
+
L1: Active context — hot, immediately available
|
| 99 |
+
L2: Warm context — staged, promotable (future)
|
| 100 |
+
Victim cache: Recently evicted — fast recovery
|
| 101 |
+
|
| 102 |
+
MVP implements Stage 1 (Clutter Strip) and Stage 5 (Thermal Eviction).
|
| 103 |
+
"""
|
| 104 |
+
|
| 105 |
+
def __init__(self, config: PithConfig = None):
|
| 106 |
+
self._config = config or PithConfig()
|
| 107 |
+
self._l1: List[ContextEntry] = []
|
| 108 |
+
self._victim_cache: List[ContextEntry] = []
|
| 109 |
+
self._content_seen: Dict[str, int] = {} # hash → count
|
| 110 |
+
self.stats = PithStats()
|
| 111 |
+
|
| 112 |
+
def extract(
|
| 113 |
+
self,
|
| 114 |
+
candidate_context: List[str],
|
| 115 |
+
query: str = "",
|
| 116 |
+
) -> List[str]:
|
| 117 |
+
"""Extract optimized context from candidates.
|
| 118 |
+
|
| 119 |
+
Stage 1: Strip clutter — remove what's already in L1
|
| 120 |
+
Stage 5: Thermal eviction — remove cold entries, recover from victim cache
|
| 121 |
+
|
| 122 |
+
Args:
|
| 123 |
+
candidate_context: New context chunks to consider adding
|
| 124 |
+
query: Current user query (for relevance scoring)
|
| 125 |
+
|
| 126 |
+
Returns:
|
| 127 |
+
Optimized context list for the model's system prompt
|
| 128 |
+
"""
|
| 129 |
+
self.stats.total_extractions += 1
|
| 130 |
+
|
| 131 |
+
# Apply thermal decay to all L1 entries
|
| 132 |
+
for entry in self._l1:
|
| 133 |
+
entry.cool(self._config.decay_rate)
|
| 134 |
+
|
| 135 |
+
# Stage 1: Clutter Strip
|
| 136 |
+
novel = self._clutter_strip(candidate_context)
|
| 137 |
+
|
| 138 |
+
# Add novel content to L1
|
| 139 |
+
now = time.time()
|
| 140 |
+
for content in novel:
|
| 141 |
+
entry = ContextEntry(
|
| 142 |
+
content=content,
|
| 143 |
+
content_hash=self._hash(content),
|
| 144 |
+
heat=1.0,
|
| 145 |
+
created_at=now,
|
| 146 |
+
last_accessed=now,
|
| 147 |
+
access_count=1,
|
| 148 |
+
)
|
| 149 |
+
self._l1.append(entry)
|
| 150 |
+
|
| 151 |
+
# Check victim cache — anything relevant to current query?
|
| 152 |
+
if query:
|
| 153 |
+
self._sweep_victim_cache(query)
|
| 154 |
+
|
| 155 |
+
# Warm entries that match current query
|
| 156 |
+
if query:
|
| 157 |
+
query_words = set(query.lower().split())
|
| 158 |
+
for entry in self._l1:
|
| 159 |
+
entry_words = set(entry.content.lower().split())
|
| 160 |
+
overlap = len(query_words & entry_words) / max(len(query_words), 1)
|
| 161 |
+
if overlap > 0.3:
|
| 162 |
+
entry.warm()
|
| 163 |
+
|
| 164 |
+
# Stage 5: Thermal Eviction
|
| 165 |
+
self._thermal_eviction()
|
| 166 |
+
|
| 167 |
+
# Update stats
|
| 168 |
+
self.stats.l1_current_size = len(self._l1)
|
| 169 |
+
if self._l1:
|
| 170 |
+
self.stats.l1_avg_heat = sum(e.heat for e in self._l1) / len(self._l1)
|
| 171 |
+
|
| 172 |
+
# Return L1 contents ordered by heat (hottest first)
|
| 173 |
+
sorted_l1 = sorted(self._l1, key=lambda e: e.heat, reverse=True)
|
| 174 |
+
return [e.content for e in sorted_l1]
|
| 175 |
+
|
| 176 |
+
def _clutter_strip(self, candidates: List[str]) -> List[str]:
|
| 177 |
+
"""Stage 1: Remove content that's already in L1.
|
| 178 |
+
|
| 179 |
+
Borrowed from radar clutter filtering — subtract what's already
|
| 180 |
+
integrated from what's incoming. Only genuine novelty passes.
|
| 181 |
+
"""
|
| 182 |
+
novel = []
|
| 183 |
+
existing_hashes = {e.content_hash for e in self._l1}
|
| 184 |
+
existing_hashes.update(e.content_hash for e in self._victim_cache)
|
| 185 |
+
|
| 186 |
+
for content in candidates:
|
| 187 |
+
h = self._hash(content)
|
| 188 |
+
if h in existing_hashes:
|
| 189 |
+
# Already have this — strip it
|
| 190 |
+
self.stats.clutter_stripped += 1
|
| 191 |
+
# Warm the existing entry instead
|
| 192 |
+
for entry in self._l1:
|
| 193 |
+
if entry.content_hash == h:
|
| 194 |
+
entry.warm()
|
| 195 |
+
break
|
| 196 |
+
continue
|
| 197 |
+
|
| 198 |
+
# Check word-level similarity against existing entries
|
| 199 |
+
if self._is_too_similar(content):
|
| 200 |
+
self.stats.clutter_stripped += 1
|
| 201 |
+
continue
|
| 202 |
+
|
| 203 |
+
novel.append(content)
|
| 204 |
+
self._content_seen[h] = self._content_seen.get(h, 0) + 1
|
| 205 |
+
|
| 206 |
+
return novel
|
| 207 |
+
|
| 208 |
+
def _is_too_similar(self, content: str) -> bool:
|
| 209 |
+
"""Check if content is too similar to existing L1 entries."""
|
| 210 |
+
content_words = set(content.lower().split())
|
| 211 |
+
if not content_words:
|
| 212 |
+
return True
|
| 213 |
+
|
| 214 |
+
for entry in self._l1:
|
| 215 |
+
entry_words = set(entry.content.lower().split())
|
| 216 |
+
if not entry_words:
|
| 217 |
+
continue
|
| 218 |
+
overlap = len(content_words & entry_words)
|
| 219 |
+
similarity = overlap / max(len(content_words), len(entry_words))
|
| 220 |
+
if similarity > self._config.clutter_similarity:
|
| 221 |
+
return True
|
| 222 |
+
|
| 223 |
+
return False
|
| 224 |
+
|
| 225 |
+
def _thermal_eviction(self):
|
| 226 |
+
"""Stage 5: Evict cold entries, move to victim cache."""
|
| 227 |
+
surviving = []
|
| 228 |
+
for entry in self._l1:
|
| 229 |
+
if entry.pinned:
|
| 230 |
+
surviving.append(entry)
|
| 231 |
+
continue
|
| 232 |
+
|
| 233 |
+
if entry.heat < self._config.eviction_threshold:
|
| 234 |
+
# Evict to victim cache
|
| 235 |
+
self._victim_cache.append(entry)
|
| 236 |
+
self.stats.entries_evicted += 1
|
| 237 |
+
else:
|
| 238 |
+
surviving.append(entry)
|
| 239 |
+
|
| 240 |
+
# Enforce L1 capacity — evict coldest if over limit
|
| 241 |
+
if len(surviving) > self._config.l1_max_entries:
|
| 242 |
+
surviving.sort(key=lambda e: (e.pinned, e.heat), reverse=True)
|
| 243 |
+
overflow = surviving[self._config.l1_max_entries:]
|
| 244 |
+
surviving = surviving[:self._config.l1_max_entries]
|
| 245 |
+
for entry in overflow:
|
| 246 |
+
if not entry.pinned:
|
| 247 |
+
self._victim_cache.append(entry)
|
| 248 |
+
self.stats.entries_evicted += 1
|
| 249 |
+
|
| 250 |
+
self._l1 = surviving
|
| 251 |
+
|
| 252 |
+
# Trim victim cache
|
| 253 |
+
if len(self._victim_cache) > self._config.victim_cache_size:
|
| 254 |
+
self._victim_cache = self._victim_cache[-self._config.victim_cache_size:]
|
| 255 |
+
|
| 256 |
+
def _sweep_victim_cache(self, query: str):
|
| 257 |
+
"""Sweep victim cache — recover entries relevant to current query."""
|
| 258 |
+
if not self._victim_cache:
|
| 259 |
+
return
|
| 260 |
+
|
| 261 |
+
query_words = set(query.lower().split())
|
| 262 |
+
recovered = []
|
| 263 |
+
remaining = []
|
| 264 |
+
|
| 265 |
+
for entry in self._victim_cache:
|
| 266 |
+
entry_words = set(entry.content.lower().split())
|
| 267 |
+
overlap = len(query_words & entry_words) / max(len(query_words), 1)
|
| 268 |
+
if overlap > 0.3:
|
| 269 |
+
entry.warm()
|
| 270 |
+
recovered.append(entry)
|
| 271 |
+
self.stats.victim_recoveries += 1
|
| 272 |
+
else:
|
| 273 |
+
remaining.append(entry)
|
| 274 |
+
|
| 275 |
+
self._victim_cache = remaining
|
| 276 |
+
self._l1.extend(recovered)
|
| 277 |
+
|
| 278 |
+
def pin(self, content: str):
|
| 279 |
+
"""Pin content — constitutional, cannot be evicted."""
|
| 280 |
+
h = self._hash(content)
|
| 281 |
+
for entry in self._l1:
|
| 282 |
+
if entry.content_hash == h:
|
| 283 |
+
entry.pinned = True
|
| 284 |
+
return
|
| 285 |
+
|
| 286 |
+
# Not in L1 — add it pinned
|
| 287 |
+
entry = ContextEntry(
|
| 288 |
+
content=content,
|
| 289 |
+
content_hash=h,
|
| 290 |
+
heat=1.0,
|
| 291 |
+
created_at=time.time(),
|
| 292 |
+
last_accessed=time.time(),
|
| 293 |
+
access_count=1,
|
| 294 |
+
pinned=True,
|
| 295 |
+
)
|
| 296 |
+
self._l1.append(entry)
|
| 297 |
+
|
| 298 |
+
@staticmethod
|
| 299 |
+
def _hash(content: str) -> str:
|
| 300 |
+
return hashlib.sha256(content.encode()).hexdigest()[:16]
|
nuwave/splat_engine.py
ADDED
|
@@ -0,0 +1,429 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Gaussian Splat Weight Decomposition Engine
|
| 3 |
+
==========================================
|
| 4 |
+
Decomposes dense weight matrices into collections of Gaussian splats.
|
| 5 |
+
Each splat has: position (mu), spread (sigma), amplitude (alpha).
|
| 6 |
+
|
| 7 |
+
The splats ARE the weight representation. Inference runs through them,
|
| 8 |
+
not through the original dense matrix. This is physics-based compression:
|
| 9 |
+
resolution concentrates where the weight landscape has structure.
|
| 10 |
+
|
| 11 |
+
For BitNet ternary weights {-1, 0, 1}, splats fit naturally:
|
| 12 |
+
- Positive splats over +1 clusters
|
| 13 |
+
- Negative splats over -1 clusters
|
| 14 |
+
- Zero regions need no splats (free compression)
|
| 15 |
+
|
| 16 |
+
# ---- Changelog ----
|
| 17 |
+
# [2026-04-05] Claude Code (Opus 4.6) — Initial implementation
|
| 18 |
+
# What: Gaussian splat decomposition + reconstruction + fitting
|
| 19 |
+
# Why: PoC for physics-based weight compression with Lenia dynamics
|
| 20 |
+
# -------------------
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
from __future__ import annotations
|
| 24 |
+
|
| 25 |
+
import logging
|
| 26 |
+
import time
|
| 27 |
+
from dataclasses import dataclass
|
| 28 |
+
from typing import Dict, List, Optional, Tuple
|
| 29 |
+
|
| 30 |
+
import torch
|
| 31 |
+
import torch.nn as nn
|
| 32 |
+
import numpy as np
|
| 33 |
+
|
| 34 |
+
logger = logging.getLogger("uniai.splat")
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
@dataclass
|
| 38 |
+
class SplatConfig:
|
| 39 |
+
"""Configuration for splat decomposition."""
|
| 40 |
+
|
| 41 |
+
# How many splats per matrix element (compression ratio control)
|
| 42 |
+
# e.g., 0.1 means 10x compression (10% as many splats as weight elements)
|
| 43 |
+
splat_ratio: float = 0.1
|
| 44 |
+
|
| 45 |
+
# Minimum splats per layer (don't go below this)
|
| 46 |
+
min_splats: int = 32
|
| 47 |
+
|
| 48 |
+
# Maximum splats per layer (memory safety)
|
| 49 |
+
max_splats: int = 8192
|
| 50 |
+
|
| 51 |
+
# Initial sigma for splats (spread)
|
| 52 |
+
init_sigma: float = 1.0
|
| 53 |
+
|
| 54 |
+
# Fitting iterations (gradient descent to fit splats to dense weights)
|
| 55 |
+
fit_iterations: int = 200
|
| 56 |
+
|
| 57 |
+
# Fitting learning rate
|
| 58 |
+
fit_lr: float = 0.01
|
| 59 |
+
|
| 60 |
+
# Reconstruction tolerance (stop early if below this MSE)
|
| 61 |
+
fit_tolerance: float = 1e-4
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
class GaussianSplats:
|
| 65 |
+
"""
|
| 66 |
+
A collection of Gaussian splats representing a weight matrix.
|
| 67 |
+
|
| 68 |
+
Each splat i has:
|
| 69 |
+
- mu_i: (2,) position in weight space (row, col)
|
| 70 |
+
- sigma_i: (1,) spread (isotropic for Phase 0)
|
| 71 |
+
- alpha_i: (1,) amplitude (positive or negative)
|
| 72 |
+
|
| 73 |
+
The reconstructed weight at position (r, c) is:
|
| 74 |
+
W(r,c) = sum_i alpha_i * exp(-||[r,c] - mu_i||^2 / (2 * sigma_i^2))
|
| 75 |
+
"""
|
| 76 |
+
|
| 77 |
+
def __init__(self, n_splats: int, rows: int, cols: int, device: torch.device = None):
|
| 78 |
+
self.n_splats = n_splats
|
| 79 |
+
self.rows = rows
|
| 80 |
+
self.cols = cols
|
| 81 |
+
self.device = device or torch.device('cpu')
|
| 82 |
+
|
| 83 |
+
# Splat parameters — these are what Lenia will operate on
|
| 84 |
+
self.mu = torch.zeros(n_splats, 2, device=self.device) # positions
|
| 85 |
+
self.sigma = torch.ones(n_splats, device=self.device) # spreads
|
| 86 |
+
self.alpha = torch.zeros(n_splats, device=self.device) # amplitudes
|
| 87 |
+
|
| 88 |
+
def reconstruct(self, chunk_size: int = 512) -> torch.Tensor:
|
| 89 |
+
"""Reconstruct the dense weight matrix from splats.
|
| 90 |
+
|
| 91 |
+
W(r,c) = sum_i alpha_i * exp(-||[r,c] - mu_i||^2 / (2 * sigma_i^2))
|
| 92 |
+
|
| 93 |
+
Uses chunked computation to avoid OOM on large matrices.
|
| 94 |
+
"""
|
| 95 |
+
W = torch.zeros(self.rows, self.cols, device=self.device)
|
| 96 |
+
|
| 97 |
+
# Create coordinate grid
|
| 98 |
+
row_coords = torch.arange(self.rows, dtype=torch.float32, device=self.device)
|
| 99 |
+
col_coords = torch.arange(self.cols, dtype=torch.float32, device=self.device)
|
| 100 |
+
|
| 101 |
+
# Process splats in chunks to manage memory
|
| 102 |
+
for start in range(0, self.n_splats, chunk_size):
|
| 103 |
+
end = min(start + chunk_size, self.n_splats)
|
| 104 |
+
chunk_mu = self.mu[start:end] # (chunk, 2)
|
| 105 |
+
chunk_sigma = self.sigma[start:end] # (chunk,)
|
| 106 |
+
chunk_alpha = self.alpha[start:end] # (chunk,)
|
| 107 |
+
|
| 108 |
+
# For each splat in chunk, compute contribution to all positions
|
| 109 |
+
for i in range(end - start):
|
| 110 |
+
mu_r, mu_c = chunk_mu[i, 0], chunk_mu[i, 1]
|
| 111 |
+
s = chunk_sigma[i]
|
| 112 |
+
a = chunk_alpha[i]
|
| 113 |
+
|
| 114 |
+
# Compute distances (separable Gaussian for speed)
|
| 115 |
+
dr = (row_coords - mu_r) ** 2
|
| 116 |
+
dc = (col_coords - mu_c) ** 2
|
| 117 |
+
|
| 118 |
+
# Outer product gives full distance grid
|
| 119 |
+
dist_sq = dr.unsqueeze(1) + dc.unsqueeze(0) # (rows, cols)
|
| 120 |
+
|
| 121 |
+
# Gaussian contribution
|
| 122 |
+
W += a * torch.exp(-dist_sq / (2 * s ** 2 + 1e-8))
|
| 123 |
+
|
| 124 |
+
return W
|
| 125 |
+
|
| 126 |
+
def reconstruct_fast(self) -> torch.Tensor:
|
| 127 |
+
"""Vectorized reconstruction — faster but uses more memory.
|
| 128 |
+
|
| 129 |
+
Good for small-to-medium matrices. Falls back to chunked for large ones.
|
| 130 |
+
"""
|
| 131 |
+
if self.rows * self.cols * self.n_splats > 50_000_000:
|
| 132 |
+
return self.reconstruct()
|
| 133 |
+
|
| 134 |
+
# All positions as (rows*cols, 2) grid
|
| 135 |
+
row_coords = torch.arange(self.rows, dtype=torch.float32, device=self.device)
|
| 136 |
+
col_coords = torch.arange(self.cols, dtype=torch.float32, device=self.device)
|
| 137 |
+
rr, cc = torch.meshgrid(row_coords, col_coords, indexing='ij')
|
| 138 |
+
positions = torch.stack([rr.flatten(), cc.flatten()], dim=1) # (R*C, 2)
|
| 139 |
+
|
| 140 |
+
# Distances from every position to every splat
|
| 141 |
+
# positions: (R*C, 2), mu: (N, 2)
|
| 142 |
+
diff = positions.unsqueeze(1) - self.mu.unsqueeze(0) # (R*C, N, 2)
|
| 143 |
+
dist_sq = (diff ** 2).sum(dim=2) # (R*C, N)
|
| 144 |
+
|
| 145 |
+
# Gaussian values
|
| 146 |
+
var = 2 * self.sigma.unsqueeze(0) ** 2 + 1e-8 # (1, N)
|
| 147 |
+
gaussians = torch.exp(-dist_sq / var) # (R*C, N)
|
| 148 |
+
|
| 149 |
+
# Weighted sum
|
| 150 |
+
W_flat = (gaussians * self.alpha.unsqueeze(0)).sum(dim=1) # (R*C,)
|
| 151 |
+
|
| 152 |
+
return W_flat.reshape(self.rows, self.cols)
|
| 153 |
+
|
| 154 |
+
def memory_bytes(self) -> int:
|
| 155 |
+
"""Estimate memory usage of splat representation."""
|
| 156 |
+
# mu: n*2*4, sigma: n*4, alpha: n*4 = n*16 bytes (float32)
|
| 157 |
+
return self.n_splats * 16
|
| 158 |
+
|
| 159 |
+
def compression_ratio(self) -> float:
|
| 160 |
+
"""Compression ratio vs dense float32 matrix."""
|
| 161 |
+
dense_bytes = self.rows * self.cols * 4 # float32
|
| 162 |
+
return dense_bytes / max(self.memory_bytes(), 1)
|
| 163 |
+
|
| 164 |
+
def state_dict(self) -> Dict[str, torch.Tensor]:
|
| 165 |
+
"""Export splat parameters for persistence."""
|
| 166 |
+
return {
|
| 167 |
+
'mu': self.mu.clone(),
|
| 168 |
+
'sigma': self.sigma.clone(),
|
| 169 |
+
'alpha': self.alpha.clone(),
|
| 170 |
+
'rows': torch.tensor(self.rows),
|
| 171 |
+
'cols': torch.tensor(self.cols),
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
@classmethod
|
| 175 |
+
def from_state_dict(cls, d: Dict[str, torch.Tensor]) -> 'GaussianSplats':
|
| 176 |
+
"""Restore from saved state."""
|
| 177 |
+
rows, cols = d['rows'].item(), d['cols'].item()
|
| 178 |
+
n = d['mu'].shape[0]
|
| 179 |
+
splats = cls(n, rows, cols)
|
| 180 |
+
splats.mu = d['mu']
|
| 181 |
+
splats.sigma = d['sigma']
|
| 182 |
+
splats.alpha = d['alpha']
|
| 183 |
+
return splats
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
def compute_n_splats(rows: int, cols: int, config: SplatConfig) -> int:
|
| 187 |
+
"""Determine how many splats to use for a given matrix size."""
|
| 188 |
+
n = int(rows * cols * config.splat_ratio)
|
| 189 |
+
return max(config.min_splats, min(n, config.max_splats))
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
def initialize_splats_from_ternary(
|
| 193 |
+
weight: torch.Tensor,
|
| 194 |
+
n_splats: int,
|
| 195 |
+
config: SplatConfig,
|
| 196 |
+
) -> GaussianSplats:
|
| 197 |
+
"""Initialize splats from a ternary {-1, 0, 1} weight matrix.
|
| 198 |
+
|
| 199 |
+
Strategy: place splats at the centers of non-zero weight clusters.
|
| 200 |
+
For ternary weights this is efficient — zeros need no representation.
|
| 201 |
+
|
| 202 |
+
1. Find all non-zero positions
|
| 203 |
+
2. Sample n_splats positions from them (weighted by absolute value)
|
| 204 |
+
3. Set alpha to the weight value at that position
|
| 205 |
+
4. Set sigma to initial spread
|
| 206 |
+
"""
|
| 207 |
+
rows, cols = weight.shape
|
| 208 |
+
splats = GaussianSplats(n_splats, rows, cols, device=weight.device)
|
| 209 |
+
|
| 210 |
+
# Find non-zero positions
|
| 211 |
+
nonzero_mask = weight != 0
|
| 212 |
+
nonzero_positions = nonzero_mask.nonzero(as_tuple=False).float() # (K, 2)
|
| 213 |
+
|
| 214 |
+
if len(nonzero_positions) == 0:
|
| 215 |
+
# All zeros — return empty splats
|
| 216 |
+
return splats
|
| 217 |
+
|
| 218 |
+
if len(nonzero_positions) <= n_splats:
|
| 219 |
+
# Fewer non-zero elements than splats — use them all
|
| 220 |
+
k = len(nonzero_positions)
|
| 221 |
+
splats.mu[:k] = nonzero_positions
|
| 222 |
+
for i in range(k):
|
| 223 |
+
r, c = int(nonzero_positions[i, 0]), int(nonzero_positions[i, 1])
|
| 224 |
+
splats.alpha[i] = weight[r, c]
|
| 225 |
+
splats.sigma[:] = config.init_sigma
|
| 226 |
+
return splats
|
| 227 |
+
|
| 228 |
+
# Sample positions — prefer dense regions
|
| 229 |
+
# Use farthest-point sampling for coverage
|
| 230 |
+
indices = _farthest_point_sample(nonzero_positions, n_splats)
|
| 231 |
+
sampled_positions = nonzero_positions[indices]
|
| 232 |
+
|
| 233 |
+
splats.mu = sampled_positions.clone()
|
| 234 |
+
|
| 235 |
+
# Set alpha based on local weight values
|
| 236 |
+
for i in range(n_splats):
|
| 237 |
+
r, c = int(sampled_positions[i, 0]), int(sampled_positions[i, 1])
|
| 238 |
+
splats.alpha[i] = weight[r, c]
|
| 239 |
+
|
| 240 |
+
splats.sigma[:] = config.init_sigma
|
| 241 |
+
|
| 242 |
+
return splats
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
def _farthest_point_sample(points: torch.Tensor, n: int) -> torch.Tensor:
|
| 246 |
+
"""Spatial coverage sampling.
|
| 247 |
+
|
| 248 |
+
For small point sets (< 5000): true farthest-point sampling.
|
| 249 |
+
For large point sets: stratified random sampling (grid-based) for speed.
|
| 250 |
+
"""
|
| 251 |
+
K = len(points)
|
| 252 |
+
|
| 253 |
+
if K <= 5000:
|
| 254 |
+
# True FPS — O(n*K), fine for small sets
|
| 255 |
+
selected = torch.zeros(n, dtype=torch.long, device=points.device)
|
| 256 |
+
selected[0] = torch.randint(K, (1,)).item()
|
| 257 |
+
dists = torch.full((K,), float('inf'), device=points.device)
|
| 258 |
+
|
| 259 |
+
for i in range(1, n):
|
| 260 |
+
last = points[selected[i - 1]].unsqueeze(0)
|
| 261 |
+
new_dists = ((points - last) ** 2).sum(dim=1)
|
| 262 |
+
dists = torch.minimum(dists, new_dists)
|
| 263 |
+
selected[i] = dists.argmax()
|
| 264 |
+
|
| 265 |
+
return selected
|
| 266 |
+
|
| 267 |
+
# Stratified random: divide space into grid, sample from each cell
|
| 268 |
+
# Gives good coverage without O(n*K) cost
|
| 269 |
+
perm = torch.randperm(K, device=points.device)[:n]
|
| 270 |
+
return perm
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
def _reconstruct_for_fitting(mu, sigma, alpha, rows, cols, row_chunk=64):
|
| 274 |
+
"""Memory-efficient reconstruction with gradients.
|
| 275 |
+
|
| 276 |
+
Processes rows in chunks to avoid building the full (R*C, N) tensor.
|
| 277 |
+
Each chunk computes its contribution independently, so gradient memory
|
| 278 |
+
stays bounded regardless of matrix size.
|
| 279 |
+
"""
|
| 280 |
+
row_coords = torch.arange(rows, dtype=torch.float32)
|
| 281 |
+
col_coords = torch.arange(cols, dtype=torch.float32)
|
| 282 |
+
|
| 283 |
+
chunks = []
|
| 284 |
+
for r_start in range(0, rows, row_chunk):
|
| 285 |
+
r_end = min(r_start + row_chunk, rows)
|
| 286 |
+
chunk_rows = row_coords[r_start:r_end] # (chunk,)
|
| 287 |
+
|
| 288 |
+
# Distance from each position in this row chunk to each splat
|
| 289 |
+
dr = (chunk_rows.unsqueeze(1) - mu[:, 0].unsqueeze(0)) ** 2 # (chunk, N)
|
| 290 |
+
dc_all = (col_coords.unsqueeze(1) - mu[:, 1].unsqueeze(0)) ** 2 # (cols, N)
|
| 291 |
+
|
| 292 |
+
# For each row in chunk, compute full col contribution
|
| 293 |
+
var = 2 * sigma.unsqueeze(0) ** 2 + 1e-8 # (1, N)
|
| 294 |
+
row_results = []
|
| 295 |
+
for ri in range(len(chunk_rows)):
|
| 296 |
+
dist_sq = dr[ri:ri+1, :] + dc_all # (cols, N) — broadcast row dist
|
| 297 |
+
gaussians = torch.exp(-dist_sq / var) # (cols, N)
|
| 298 |
+
row_vals = (gaussians * alpha.unsqueeze(0)).sum(dim=1) # (cols,)
|
| 299 |
+
row_results.append(row_vals)
|
| 300 |
+
chunks.append(torch.stack(row_results)) # (chunk, cols)
|
| 301 |
+
|
| 302 |
+
return torch.cat(chunks, dim=0) # (rows, cols)
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
def fit_splats(
|
| 306 |
+
splats: GaussianSplats,
|
| 307 |
+
target: torch.Tensor,
|
| 308 |
+
config: SplatConfig,
|
| 309 |
+
verbose: bool = False,
|
| 310 |
+
) -> Dict[str, float]:
|
| 311 |
+
"""Fit splat parameters to reconstruct the target weight matrix.
|
| 312 |
+
|
| 313 |
+
Uses gradient descent on (mu, sigma, alpha) to minimize
|
| 314 |
+
reconstruction error. Row-chunked reconstruction keeps memory
|
| 315 |
+
bounded on low-RAM machines.
|
| 316 |
+
|
| 317 |
+
Returns metrics about the fitting process.
|
| 318 |
+
"""
|
| 319 |
+
# Make parameters require grad for fitting
|
| 320 |
+
splats.mu = splats.mu.detach().requires_grad_(True)
|
| 321 |
+
splats.sigma = splats.sigma.detach().requires_grad_(True)
|
| 322 |
+
splats.alpha = splats.alpha.detach().requires_grad_(True)
|
| 323 |
+
|
| 324 |
+
optimizer = torch.optim.Adam([splats.mu, splats.sigma, splats.alpha], lr=config.fit_lr)
|
| 325 |
+
|
| 326 |
+
# Choose chunk size based on splat count to stay under ~200MB gradient memory
|
| 327 |
+
mem_per_row = splats.n_splats * splats.cols * 4 * 3 # float32 * (fwd + grad + optimizer)
|
| 328 |
+
row_chunk = max(4, min(64, int(200_000_000 / (mem_per_row + 1))))
|
| 329 |
+
|
| 330 |
+
start = time.time()
|
| 331 |
+
initial_mse = None
|
| 332 |
+
final_mse = None
|
| 333 |
+
|
| 334 |
+
for step in range(config.fit_iterations):
|
| 335 |
+
optimizer.zero_grad()
|
| 336 |
+
|
| 337 |
+
# Memory-efficient chunked reconstruction
|
| 338 |
+
reconstructed = _reconstruct_for_fitting(
|
| 339 |
+
splats.mu, splats.sigma, splats.alpha,
|
| 340 |
+
splats.rows, splats.cols, row_chunk=row_chunk,
|
| 341 |
+
)
|
| 342 |
+
|
| 343 |
+
# MSE loss
|
| 344 |
+
loss = ((reconstructed - target) ** 2).mean()
|
| 345 |
+
|
| 346 |
+
if step == 0:
|
| 347 |
+
initial_mse = loss.item()
|
| 348 |
+
final_mse = loss.item()
|
| 349 |
+
|
| 350 |
+
if step % 50 == 0:
|
| 351 |
+
print(f" fit step {step}: MSE={loss.item():.6f}", flush=True)
|
| 352 |
+
|
| 353 |
+
if loss.item() < config.fit_tolerance:
|
| 354 |
+
print(f" converged at step {step}", flush=True)
|
| 355 |
+
break
|
| 356 |
+
|
| 357 |
+
loss.backward()
|
| 358 |
+
optimizer.step()
|
| 359 |
+
|
| 360 |
+
# Keep sigma positive
|
| 361 |
+
with torch.no_grad():
|
| 362 |
+
splats.sigma.clamp_(min=0.1)
|
| 363 |
+
# Keep mu in bounds
|
| 364 |
+
splats.mu[:, 0].clamp_(0, splats.rows - 1)
|
| 365 |
+
splats.mu[:, 1].clamp_(0, splats.cols - 1)
|
| 366 |
+
|
| 367 |
+
# Detach after fitting
|
| 368 |
+
splats.mu = splats.mu.detach()
|
| 369 |
+
splats.sigma = splats.sigma.detach()
|
| 370 |
+
splats.alpha = splats.alpha.detach()
|
| 371 |
+
|
| 372 |
+
elapsed = time.time() - start
|
| 373 |
+
|
| 374 |
+
return {
|
| 375 |
+
'initial_mse': initial_mse,
|
| 376 |
+
'final_mse': final_mse,
|
| 377 |
+
'improvement': (initial_mse - final_mse) / (initial_mse + 1e-8),
|
| 378 |
+
'fit_time_s': elapsed,
|
| 379 |
+
'steps': step + 1,
|
| 380 |
+
}
|
| 381 |
+
|
| 382 |
+
|
| 383 |
+
def decompose_layer(
|
| 384 |
+
weight: torch.Tensor,
|
| 385 |
+
config: SplatConfig = None,
|
| 386 |
+
verbose: bool = False,
|
| 387 |
+
) -> Tuple[GaussianSplats, Dict[str, float]]:
|
| 388 |
+
"""Full pipeline: dense weight matrix -> fitted Gaussian splats.
|
| 389 |
+
|
| 390 |
+
1. Determine number of splats from compression ratio
|
| 391 |
+
2. Initialize splats (ternary-aware if applicable)
|
| 392 |
+
3. Fit splats to target via gradient descent
|
| 393 |
+
4. Return splats + metrics
|
| 394 |
+
"""
|
| 395 |
+
config = config or SplatConfig()
|
| 396 |
+
|
| 397 |
+
if weight.dim() != 2:
|
| 398 |
+
weight = weight.reshape(weight.shape[0], -1)
|
| 399 |
+
|
| 400 |
+
rows, cols = weight.shape
|
| 401 |
+
n_splats = compute_n_splats(rows, cols, config)
|
| 402 |
+
|
| 403 |
+
# Detect if ternary
|
| 404 |
+
unique_vals = weight.unique()
|
| 405 |
+
is_ternary = len(unique_vals) <= 3 and all(v in [-1, 0, 1] for v in unique_vals.tolist())
|
| 406 |
+
|
| 407 |
+
if is_ternary:
|
| 408 |
+
if verbose:
|
| 409 |
+
logger.info(f" Ternary weights detected — using cluster initialization")
|
| 410 |
+
splats = initialize_splats_from_ternary(weight, n_splats, config)
|
| 411 |
+
else:
|
| 412 |
+
# General initialization: random positions, amplitudes from weight samples
|
| 413 |
+
splats = GaussianSplats(n_splats, rows, cols, device=weight.device)
|
| 414 |
+
idx_r = torch.randint(rows, (n_splats,))
|
| 415 |
+
idx_c = torch.randint(cols, (n_splats,))
|
| 416 |
+
splats.mu = torch.stack([idx_r.float(), idx_c.float()], dim=1)
|
| 417 |
+
splats.alpha = weight[idx_r, idx_c].clone()
|
| 418 |
+
splats.sigma[:] = config.init_sigma
|
| 419 |
+
|
| 420 |
+
# Fit to target
|
| 421 |
+
fit_metrics = fit_splats(splats, weight, config, verbose=verbose)
|
| 422 |
+
|
| 423 |
+
fit_metrics['n_splats'] = n_splats
|
| 424 |
+
fit_metrics['matrix_size'] = (rows, cols)
|
| 425 |
+
fit_metrics['dense_elements'] = rows * cols
|
| 426 |
+
fit_metrics['compression_ratio'] = splats.compression_ratio()
|
| 427 |
+
fit_metrics['is_ternary'] = is_ternary
|
| 428 |
+
|
| 429 |
+
return splats, fit_metrics
|
nuwave/substrate/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""NeuroGraph substrate — ng_tract (Rust/PyO3) + canonical Python."""
|
nuwave/substrate/neuro_foundation.py
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
nuwave/substrate/ng_autonomic.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
NG Autonomic Nervous System — Ecosystem-Wide Threat Level State
|
| 3 |
+
|
| 4 |
+
VENDORED FILE. Copy verbatim into any module that participates in
|
| 5 |
+
the autonomic nervous system. Do NOT modify per-module. Changes
|
| 6 |
+
propagate by updating this canonical source and re-vendoring.
|
| 7 |
+
|
| 8 |
+
State file: ~/.et_modules/autonomic_state.json
|
| 9 |
+
|
| 10 |
+
State transitions:
|
| 11 |
+
PARASYMPATHETIC (rest/digest) -> normal operations
|
| 12 |
+
SYMPATHETIC (fight/flight) -> elevated threat, all modules adjust
|
| 13 |
+
|
| 14 |
+
Canonical source: NeuroGraph/ng_autonomic.py
|
| 15 |
+
License: AGPL-3.0
|
| 16 |
+
|
| 17 |
+
Changelog:
|
| 18 |
+
[2026-03-03] River audit — Established UPPERCASE as canonical
|
| 19 |
+
case standard. All states and enums use UPPERCASE. No exceptions.
|
| 20 |
+
Normalizes .upper() on both read and write to prevent silent
|
| 21 |
+
mismatches with any older files or callers.
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
import json
|
| 25 |
+
import logging
|
| 26 |
+
import os
|
| 27 |
+
import time
|
| 28 |
+
from pathlib import Path
|
| 29 |
+
from typing import Any, Dict, Optional
|
| 30 |
+
|
| 31 |
+
logger = logging.getLogger("ng_autonomic")
|
| 32 |
+
|
| 33 |
+
__version__ = "1.1.0"
|
| 34 |
+
|
| 35 |
+
_STATE_PATH = Path.home() / ".et_modules" / "autonomic_state.json"
|
| 36 |
+
_VALID_STATES = {"PARASYMPATHETIC", "SYMPATHETIC"}
|
| 37 |
+
_VALID_THREAT_LEVELS = {"none", "low", "medium", "high", "critical"}
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def read_state() -> dict:
|
| 41 |
+
"""Read current autonomic state. Fast path ~0.1ms.
|
| 42 |
+
|
| 43 |
+
Returns dict with keys: state, threat_level, triggered_by,
|
| 44 |
+
timestamp, reason. Defaults to PARASYMPATHETIC if file missing
|
| 45 |
+
or unreadable.
|
| 46 |
+
"""
|
| 47 |
+
default = {
|
| 48 |
+
"state": "PARASYMPATHETIC",
|
| 49 |
+
"threat_level": "none",
|
| 50 |
+
"triggered_by": "",
|
| 51 |
+
"timestamp": 0.0,
|
| 52 |
+
"reason": "default — no security module has written state",
|
| 53 |
+
}
|
| 54 |
+
if not _STATE_PATH.exists():
|
| 55 |
+
return default
|
| 56 |
+
try:
|
| 57 |
+
with open(_STATE_PATH, "r") as f:
|
| 58 |
+
data = json.load(f)
|
| 59 |
+
raw_state = data.get("state", "PARASYMPATHETIC").upper()
|
| 60 |
+
if raw_state not in _VALID_STATES:
|
| 61 |
+
raw_state = "PARASYMPATHETIC"
|
| 62 |
+
data["state"] = raw_state
|
| 63 |
+
return data
|
| 64 |
+
except (json.JSONDecodeError, OSError):
|
| 65 |
+
return default
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def write_state(
|
| 69 |
+
state: str,
|
| 70 |
+
threat_level: str,
|
| 71 |
+
triggered_by: str,
|
| 72 |
+
reason: str,
|
| 73 |
+
) -> None:
|
| 74 |
+
"""Write autonomic state. Only security modules should call this.
|
| 75 |
+
|
| 76 |
+
Args:
|
| 77 |
+
state: PARASYMPATHETIC or SYMPATHETIC (uppercase enforced)
|
| 78 |
+
threat_level: none | low | medium | high | critical
|
| 79 |
+
triggered_by: module_id of the calling module
|
| 80 |
+
reason: human-readable reason for the state change
|
| 81 |
+
"""
|
| 82 |
+
state = state.upper()
|
| 83 |
+
if state not in _VALID_STATES:
|
| 84 |
+
raise ValueError(f"Invalid state: {state}. Must be one of {_VALID_STATES}")
|
| 85 |
+
if threat_level not in _VALID_THREAT_LEVELS:
|
| 86 |
+
raise ValueError(f"Invalid threat_level: {threat_level}. Must be one of {_VALID_THREAT_LEVELS}")
|
| 87 |
+
|
| 88 |
+
_STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
| 89 |
+
data = {
|
| 90 |
+
"state": state,
|
| 91 |
+
"threat_level": threat_level,
|
| 92 |
+
"triggered_by": triggered_by,
|
| 93 |
+
"timestamp": time.time(),
|
| 94 |
+
"reason": reason,
|
| 95 |
+
}
|
| 96 |
+
tmp_path = _STATE_PATH.with_suffix(".tmp")
|
| 97 |
+
with open(tmp_path, "w") as f:
|
| 98 |
+
json.dump(data, f, indent=2)
|
| 99 |
+
os.replace(tmp_path, _STATE_PATH)
|
nuwave/substrate/ng_ecosystem.py
ADDED
|
@@ -0,0 +1,598 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
NG Ecosystem — E-T Systems Module Integration Standard
|
| 3 |
+
|
| 4 |
+
Single vendorable file that gives any E-T Systems module the full
|
| 5 |
+
three-tier learning architecture:
|
| 6 |
+
|
| 7 |
+
Tier 1 (Standalone): NGLite alone. Local Hebbian learning.
|
| 8 |
+
Zero deps beyond ng_lite.py.
|
| 9 |
+
Tier 2 (Peer-pooled): NGTractBridge (preferred) or NGPeerBridge
|
| 10 |
+
(legacy fallback). Co-located modules share
|
| 11 |
+
learning via per-pair tracts (~/.et_modules/tracts/)
|
| 12 |
+
or legacy JSONL (~/.et_modules/shared_learning/).
|
| 13 |
+
Auto-connects. Tract bridge preferred when present.
|
| 14 |
+
Tier 3 (Full SNN): Removed — modules extract via buckets/tracts.
|
| 15 |
+
Auto-upgrades when NeuroGraph is detected on
|
| 16 |
+
the same host via ETModuleManager.
|
| 17 |
+
|
| 18 |
+
The ecosystem is "Apple-like" by design:
|
| 19 |
+
- Every module is independently useful at Tier 1.
|
| 20 |
+
- Any two co-located modules get a free Tier 2 boost — no config needed.
|
| 21 |
+
- When NeuroGraph is present, all co-located modules transparently
|
| 22 |
+
upgrade to Tier 3: full STDP, hyperedges, predictive coding, and
|
| 23 |
+
CES streaming.
|
| 24 |
+
|
| 25 |
+
The module code doesn't change. The bridge swaps.
|
| 26 |
+
|
| 27 |
+
Usage (inside any E-T Systems module):
|
| 28 |
+
|
| 29 |
+
from ng_ecosystem import NGEcosystem
|
| 30 |
+
|
| 31 |
+
# In your module's __init__ or startup:
|
| 32 |
+
eco = NGEcosystem.get_instance(
|
| 33 |
+
module_id="trollguard",
|
| 34 |
+
state_path="~/.trollguard/ng_lite_state.json",
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
# Use the ecosystem in your module's hot path:
|
| 38 |
+
embedding = my_embedder(text)
|
| 39 |
+
eco.record_outcome(embedding, target_id="threat:prompt_injection", success=True)
|
| 40 |
+
recs = eco.get_recommendations(embedding)
|
| 41 |
+
novelty = eco.detect_novelty(embedding)
|
| 42 |
+
ctx = eco.get_context(embedding)
|
| 43 |
+
|
| 44 |
+
# Inspect tier at any time:
|
| 45 |
+
print(eco.tier) # 1, 2, or 3
|
| 46 |
+
print(eco.stats()) # full telemetry
|
| 47 |
+
|
| 48 |
+
# Periodic save (call on graceful shutdown or on a timer):
|
| 49 |
+
eco.save()
|
| 50 |
+
|
| 51 |
+
Framework adapters (optional, load separately):
|
| 52 |
+
- openclaw_adapter.py — on_message(text)/recall(text)/stats() for OpenClaw skills
|
| 53 |
+
|
| 54 |
+
Canonical source: https://github.com/greatnorthernfishguy-hub/NeuroGraph
|
| 55 |
+
License: AGPL-3.0
|
| 56 |
+
|
| 57 |
+
# ---- Changelog ----
|
| 58 |
+
# [2026-02-22] Claude (Sonnet 4.6) — Initial creation.
|
| 59 |
+
# What: NGEcosystem class — singleton wrapper implementing the
|
| 60 |
+
# standardized E-T Systems optional integration protocol.
|
| 61 |
+
# Handles Tier 1→2→3 progression, auto-upgrade on NeuroGraph
|
| 62 |
+
# detection, graceful degradation, and unified telemetry.
|
| 63 |
+
# Also defines NGEcosystemAdapter ABC for framework adapters.
|
| 64 |
+
# Why: Each module was wiring ng_lite + peer bridge + SaaS bridge
|
| 65 |
+
# independently with no shared contract. This file is the
|
| 66 |
+
# single vendorable standard so all modules behave identically
|
| 67 |
+
# from an integration perspective.
|
| 68 |
+
# Settings: tier3_upgrade defaults to True (auto-upgrade when NeuroGraph
|
| 69 |
+
# is found). upgrade_poll_interval=300s (re-check for NeuroGraph
|
| 70 |
+
# every 5 minutes — handles cases where NeuroGraph is installed
|
| 71 |
+
# after the module starts). peer_sync_interval=100 (balances
|
| 72 |
+
# freshness vs I/O).
|
| 73 |
+
# How: get_instance() creates NGLite, then tries peer bridge, then
|
| 74 |
+
# queries ETModuleManager for NeuroGraph. All in try/except so
|
| 75 |
+
# each tier attempt is fully independent. A background thread
|
| 76 |
+
# polls for tier upgrades at upgrade_poll_interval.
|
| 77 |
+
# -------------------
|
| 78 |
+
# [2026-03-20] Claude (Opus 4.6) — Tract bridge wiring (punchlist #53 v0.3)
|
| 79 |
+
# What: _init_peer_bridge() now prefers NGTractBridge (per-pair tracts)
|
| 80 |
+
# with automatic fallback to NGPeerBridge (legacy JSONL).
|
| 81 |
+
# Why: JSONL broadcast bridge dams the River. Per-pair tracts enable
|
| 82 |
+
# independently observable pathways for myelination, explore-exploit,
|
| 83 |
+
# vagus nerve, and Elmer tract management.
|
| 84 |
+
# How: Try importing ng_tract_bridge first. If present (vendored),
|
| 85 |
+
# use it. If not (module not yet re-vendored), fall back to
|
| 86 |
+
# ng_peer_bridge. Config key peer_bridge.use_tracts (default True)
|
| 87 |
+
# can force legacy mode if needed.
|
| 88 |
+
# -------------------
|
| 89 |
+
# [2026-03-22] Claude (Opus 4.6) — Dual-pass convenience method (punchlist #81)
|
| 90 |
+
# What: Added dual_record_outcome() that delegates to ng_embed.NGEmbed.
|
| 91 |
+
# Why: Dual-pass embedding (forest + trees) is ecosystem-wide.
|
| 92 |
+
# Modules call eco.dual_record_outcome() instead of eco.record_outcome()
|
| 93 |
+
# for rich content. ng_embed.py owns the extraction + embedding logic.
|
| 94 |
+
# How: Lazy import of ng_embed to avoid circular deps. Passes self
|
| 95 |
+
# (the ecosystem instance) to NGEmbed.dual_record_outcome().
|
| 96 |
+
# -------------------
|
| 97 |
+
"""
|
| 98 |
+
|
| 99 |
+
from __future__ import annotations
|
| 100 |
+
|
| 101 |
+
import json
|
| 102 |
+
import logging
|
| 103 |
+
import os
|
| 104 |
+
import threading
|
| 105 |
+
import time
|
| 106 |
+
from abc import ABC, abstractmethod
|
| 107 |
+
from pathlib import Path
|
| 108 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 109 |
+
|
| 110 |
+
import numpy as np
|
| 111 |
+
|
| 112 |
+
logger = logging.getLogger("ng_ecosystem")
|
| 113 |
+
|
| 114 |
+
__version__ = "1.0.0"
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
# --------------------------------------------------------------------------
|
| 118 |
+
# Constants
|
| 119 |
+
# --------------------------------------------------------------------------
|
| 120 |
+
|
| 121 |
+
ET_MODULES_ROOT = Path.home() / ".et_modules"
|
| 122 |
+
SHARED_LEARNING_DIR = ET_MODULES_ROOT / "shared_learning"
|
| 123 |
+
REGISTRY_PATH = ET_MODULES_ROOT / "registry.json"
|
| 124 |
+
|
| 125 |
+
TIER_STANDALONE = 1 # NGLite only
|
| 126 |
+
TIER_PEER = 2 # + NGPeerBridge
|
| 127 |
+
TIER_FULL_SNN = 3 # historical — bridge removed, modules use tracts
|
| 128 |
+
|
| 129 |
+
TIER_NAMES = {
|
| 130 |
+
TIER_STANDALONE: "Standalone (Tier 1)",
|
| 131 |
+
TIER_PEER: "Peer-pooled (Tier 2)",
|
| 132 |
+
TIER_FULL_SNN: "Full SNN (Tier 3)",
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
# --------------------------------------------------------------------------
|
| 137 |
+
# Framework Adapter Interface
|
| 138 |
+
# --------------------------------------------------------------------------
|
| 139 |
+
|
| 140 |
+
class NGEcosystemAdapter(ABC):
|
| 141 |
+
"""Abstract base for framework-specific adapters over NGEcosystem.
|
| 142 |
+
|
| 143 |
+
Implement this to expose the ecosystem to a specific framework.
|
| 144 |
+
Each adapter is a singleton that wraps the shared NGEcosystem
|
| 145 |
+
instance — the same ecosystem, different vocabulary.
|
| 146 |
+
|
| 147 |
+
Provided implementations:
|
| 148 |
+
- OpenClawAdapter (openclaw_adapter.py, vendored separately)
|
| 149 |
+
|
| 150 |
+
Custom adapters:
|
| 151 |
+
Subclass NGEcosystemAdapter and implement all abstract methods.
|
| 152 |
+
Call NGEcosystem.get_instance() inside __init__ to bind the
|
| 153 |
+
shared ecosystem. Maintain your own singleton if needed.
|
| 154 |
+
|
| 155 |
+
Design contract:
|
| 156 |
+
- Adapters MUST NOT bypass NGEcosystem internals.
|
| 157 |
+
- Adapters handle embedding generation; NGEcosystem handles learning.
|
| 158 |
+
- Adapters are optional and framework-specific. The core
|
| 159 |
+
NGEcosystem is framework-agnostic and always the source of truth.
|
| 160 |
+
"""
|
| 161 |
+
|
| 162 |
+
@abstractmethod
|
| 163 |
+
def on_message(self, text: str) -> Dict[str, Any]:
|
| 164 |
+
"""Process one unit of framework input (message, request, event).
|
| 165 |
+
|
| 166 |
+
Args:
|
| 167 |
+
text: Raw text to process.
|
| 168 |
+
|
| 169 |
+
Returns:
|
| 170 |
+
Dict with at minimum: {"status": "ingested"|"skipped", "tier": int}
|
| 171 |
+
"""
|
| 172 |
+
...
|
| 173 |
+
|
| 174 |
+
@abstractmethod
|
| 175 |
+
def get_context(self, text: str) -> Dict[str, Any]:
|
| 176 |
+
"""Retrieve cross-module context for the given text.
|
| 177 |
+
|
| 178 |
+
Args:
|
| 179 |
+
text: Query text.
|
| 180 |
+
|
| 181 |
+
Returns:
|
| 182 |
+
Dict with recommendations, novelty score, and tier info.
|
| 183 |
+
"""
|
| 184 |
+
...
|
| 185 |
+
|
| 186 |
+
@abstractmethod
|
| 187 |
+
def stats(self) -> Dict[str, Any]:
|
| 188 |
+
"""Return framework-specific stats including ecosystem tier."""
|
| 189 |
+
...
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
# --------------------------------------------------------------------------
|
| 193 |
+
# NGEcosystem Core
|
| 194 |
+
# --------------------------------------------------------------------------
|
| 195 |
+
|
| 196 |
+
class NGEcosystem:
|
| 197 |
+
"""Singleton E-T Systems learning ecosystem for a module.
|
| 198 |
+
|
| 199 |
+
Manages the full Tier 1→2→3 lifecycle automatically. Modules
|
| 200 |
+
call record_outcome(), get_recommendations(), detect_novelty(),
|
| 201 |
+
and get_context() without knowing or caring which tier is active.
|
| 202 |
+
|
| 203 |
+
Thread-safety: All public methods are safe to call from multiple
|
| 204 |
+
threads. The tier upgrade loop runs in a daemon thread.
|
| 205 |
+
"""
|
| 206 |
+
|
| 207 |
+
_instances: Dict[str, "NGEcosystem"] = {}
|
| 208 |
+
_lock = threading.Lock()
|
| 209 |
+
|
| 210 |
+
def __init__(
|
| 211 |
+
self,
|
| 212 |
+
module_id: str,
|
| 213 |
+
state_path: Optional[str] = None,
|
| 214 |
+
config: Optional[Dict[str, Any]] = None,
|
| 215 |
+
):
|
| 216 |
+
"""
|
| 217 |
+
Args:
|
| 218 |
+
module_id: Unique module identifier (e.g., "trollguard").
|
| 219 |
+
Must match the module_id in et_module.json.
|
| 220 |
+
state_path: Path to persist NGLite state JSON.
|
| 221 |
+
Defaults to ~/.et_modules/{module_id}/ng_lite_state.json
|
| 222 |
+
config: Optional config overrides. Keys:
|
| 223 |
+
peer_bridge.enabled (bool, default True)
|
| 224 |
+
peer_bridge.sync_interval (int, default 100)
|
| 225 |
+
tier3_upgrade.enabled (bool, default True)
|
| 226 |
+
tier3_upgrade.poll_interval (float, default 300.0)
|
| 227 |
+
ng_lite.* (passed through to NGLite)
|
| 228 |
+
"""
|
| 229 |
+
self.module_id = module_id
|
| 230 |
+
|
| 231 |
+
self._config = {
|
| 232 |
+
"peer_bridge": {
|
| 233 |
+
"enabled": True,
|
| 234 |
+
"sync_interval": 100,
|
| 235 |
+
},
|
| 236 |
+
"tier3_upgrade": {
|
| 237 |
+
"enabled": False, # disabled — modules use tracts, not bridge bypass
|
| 238 |
+
"poll_interval": 300.0,
|
| 239 |
+
},
|
| 240 |
+
}
|
| 241 |
+
if config:
|
| 242 |
+
_deep_merge(self._config, config)
|
| 243 |
+
|
| 244 |
+
# State persistence path
|
| 245 |
+
if state_path:
|
| 246 |
+
self._state_path = Path(state_path).expanduser()
|
| 247 |
+
else:
|
| 248 |
+
self._state_path = (
|
| 249 |
+
ET_MODULES_ROOT / module_id / "ng_lite_state.json"
|
| 250 |
+
)
|
| 251 |
+
self._state_path.parent.mkdir(parents=True, exist_ok=True)
|
| 252 |
+
|
| 253 |
+
# Internal state
|
| 254 |
+
self._tier = TIER_STANDALONE
|
| 255 |
+
self._ng: Any = None # NGLite instance
|
| 256 |
+
self._ng_memory: Any = None # NeuroGraphMemory ref (set externally at Tier 3)
|
| 257 |
+
self._peer_bridge: Any = None # NGPeerBridge instance
|
| 258 |
+
self._shutdown_event = threading.Event()
|
| 259 |
+
self._ops_lock = threading.Lock()
|
| 260 |
+
|
| 261 |
+
# Boot sequence
|
| 262 |
+
self._init_ng_lite()
|
| 263 |
+
self._init_peer_bridge()
|
| 264 |
+
|
| 265 |
+
logger.info(
|
| 266 |
+
"[%s] NGEcosystem ready at %s",
|
| 267 |
+
module_id,
|
| 268 |
+
TIER_NAMES[self._tier],
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
+
# -----------------------------------------------------------------
|
| 272 |
+
# Singleton factory
|
| 273 |
+
# -----------------------------------------------------------------
|
| 274 |
+
|
| 275 |
+
@classmethod
|
| 276 |
+
def get_instance(
|
| 277 |
+
cls,
|
| 278 |
+
module_id: str,
|
| 279 |
+
state_path: Optional[str] = None,
|
| 280 |
+
config: Optional[Dict[str, Any]] = None,
|
| 281 |
+
) -> "NGEcosystem":
|
| 282 |
+
"""Return the singleton NGEcosystem for this module_id.
|
| 283 |
+
|
| 284 |
+
Thread-safe. Subsequent calls with the same module_id return
|
| 285 |
+
the existing instance regardless of state_path/config args.
|
| 286 |
+
"""
|
| 287 |
+
with cls._lock:
|
| 288 |
+
if module_id not in cls._instances:
|
| 289 |
+
cls._instances[module_id] = cls(module_id, state_path, config)
|
| 290 |
+
return cls._instances[module_id]
|
| 291 |
+
|
| 292 |
+
@classmethod
|
| 293 |
+
def reset_instance(cls, module_id: str) -> None:
|
| 294 |
+
"""Destroy the singleton for module_id (testing only)."""
|
| 295 |
+
with cls._lock:
|
| 296 |
+
inst = cls._instances.pop(module_id, None)
|
| 297 |
+
if inst:
|
| 298 |
+
inst._shutdown_event.set()
|
| 299 |
+
|
| 300 |
+
# -----------------------------------------------------------------
|
| 301 |
+
# Tier 1: NGLite init
|
| 302 |
+
# -----------------------------------------------------------------
|
| 303 |
+
|
| 304 |
+
def _init_ng_lite(self) -> None:
|
| 305 |
+
"""Initialize local NGLite substrate (always Tier 1)."""
|
| 306 |
+
try:
|
| 307 |
+
from ng_lite import NGLite # vendored alongside this file
|
| 308 |
+
|
| 309 |
+
ng_config = self._config.get("ng_lite", {})
|
| 310 |
+
self._ng = NGLite(module_id=self.module_id, config=ng_config)
|
| 311 |
+
|
| 312 |
+
if self._state_path.exists():
|
| 313 |
+
self._ng.load(str(self._state_path))
|
| 314 |
+
logger.debug("[%s] NGLite state loaded from %s", self.module_id, self._state_path)
|
| 315 |
+
|
| 316 |
+
except Exception as exc:
|
| 317 |
+
logger.error("[%s] NGLite init failed: %s", self.module_id, exc)
|
| 318 |
+
self._ng = None
|
| 319 |
+
|
| 320 |
+
# -----------------------------------------------------------------
|
| 321 |
+
# Tier 2: NGPeerBridge init
|
| 322 |
+
# -----------------------------------------------------------------
|
| 323 |
+
|
| 324 |
+
def _init_peer_bridge(self) -> None:
|
| 325 |
+
"""Try to connect Tier 2 bridge. Prefers tract bridge, falls back to legacy JSONL."""
|
| 326 |
+
if not self._config["peer_bridge"]["enabled"]:
|
| 327 |
+
return
|
| 328 |
+
if self._ng is None:
|
| 329 |
+
return
|
| 330 |
+
|
| 331 |
+
bridge = None
|
| 332 |
+
|
| 333 |
+
# Tract bridge (v0.3+) — per-pair directional tracts
|
| 334 |
+
if self._config["peer_bridge"].get("use_tracts", True):
|
| 335 |
+
try:
|
| 336 |
+
from ng_tract_bridge import NGTractBridge # vendored alongside
|
| 337 |
+
|
| 338 |
+
bridge = NGTractBridge(
|
| 339 |
+
module_id=self.module_id,
|
| 340 |
+
tracts_dir=str(ET_MODULES_ROOT / "tracts"),
|
| 341 |
+
sync_interval=self._config["peer_bridge"]["sync_interval"],
|
| 342 |
+
)
|
| 343 |
+
logger.info("[%s] NGTractBridge connected (tract-based River)", self.module_id)
|
| 344 |
+
except ImportError:
|
| 345 |
+
pass
|
| 346 |
+
except Exception as exc:
|
| 347 |
+
logger.debug("[%s] NGTractBridge failed: %s", self.module_id, exc)
|
| 348 |
+
|
| 349 |
+
# Legacy fallback — JSONL broadcast bridge
|
| 350 |
+
if bridge is None:
|
| 351 |
+
try:
|
| 352 |
+
from ng_peer_bridge import NGPeerBridge # vendored alongside
|
| 353 |
+
|
| 354 |
+
bridge = NGPeerBridge(
|
| 355 |
+
module_id=self.module_id,
|
| 356 |
+
shared_dir=str(SHARED_LEARNING_DIR),
|
| 357 |
+
sync_interval=self._config["peer_bridge"]["sync_interval"],
|
| 358 |
+
)
|
| 359 |
+
logger.info("[%s] NGPeerBridge connected (legacy JSONL River)", self.module_id)
|
| 360 |
+
except Exception as exc:
|
| 361 |
+
logger.debug("[%s] No peer bridge available: %s", self.module_id, exc)
|
| 362 |
+
return
|
| 363 |
+
|
| 364 |
+
self._ng.connect_bridge(bridge)
|
| 365 |
+
self._peer_bridge = bridge
|
| 366 |
+
self._tier = TIER_FULL_SNN # tract bridge = full substrate access
|
| 367 |
+
|
| 368 |
+
# -----------------------------------------------------------------
|
| 369 |
+
# Tier 3: NeuroGraph auto-upgrade
|
| 370 |
+
# -----------------------------------------------------------------
|
| 371 |
+
@property
|
| 372 |
+
def tier(self) -> int:
|
| 373 |
+
"""Current learning tier (1, 2, or 3)."""
|
| 374 |
+
return self._tier
|
| 375 |
+
|
| 376 |
+
@property
|
| 377 |
+
def tier_name(self) -> str:
|
| 378 |
+
"""Human-readable tier name."""
|
| 379 |
+
return TIER_NAMES.get(self._tier, "Unknown")
|
| 380 |
+
|
| 381 |
+
def record_outcome(
|
| 382 |
+
self,
|
| 383 |
+
embedding: np.ndarray,
|
| 384 |
+
target_id: str,
|
| 385 |
+
success: bool,
|
| 386 |
+
strength: float = 1.0,
|
| 387 |
+
metadata: Optional[Dict[str, Any]] = None,
|
| 388 |
+
) -> Dict[str, Any]:
|
| 389 |
+
"""Record a learning outcome.
|
| 390 |
+
|
| 391 |
+
The embedding is the semantic representation of the input.
|
| 392 |
+
The target_id is an opaque string representing what was decided
|
| 393 |
+
(e.g., "model:llama3", "threat:prompt_injection", "action:search").
|
| 394 |
+
|
| 395 |
+
Returns the learning result dict from the substrate.
|
| 396 |
+
"""
|
| 397 |
+
if self._ng is None:
|
| 398 |
+
return {}
|
| 399 |
+
with self._ops_lock:
|
| 400 |
+
return self._ng.record_outcome(
|
| 401 |
+
embedding, target_id, success, strength=strength, metadata=metadata
|
| 402 |
+
)
|
| 403 |
+
|
| 404 |
+
def dual_record_outcome(
|
| 405 |
+
self,
|
| 406 |
+
content: str,
|
| 407 |
+
embedding: np.ndarray,
|
| 408 |
+
target_id: str,
|
| 409 |
+
success: bool,
|
| 410 |
+
strength: float = 1.0,
|
| 411 |
+
metadata: Optional[Dict[str, Any]] = None,
|
| 412 |
+
) -> Dict[str, Any]:
|
| 413 |
+
"""Dual-pass learning: forest (gestalt) + tree (concept) embeddings.
|
| 414 |
+
|
| 415 |
+
Pass 1: record_outcome() with the forest embedding (standard).
|
| 416 |
+
Pass 2: Extract concepts via TID → embed each → record_outcome()
|
| 417 |
+
per tree → create forest→tree substrate links.
|
| 418 |
+
|
| 419 |
+
Falls back to single-pass (forest only) if TID unavailable.
|
| 420 |
+
|
| 421 |
+
Args:
|
| 422 |
+
content: Raw text (for concept extraction in Pass 2).
|
| 423 |
+
embedding: Pre-computed forest embedding (Pass 1).
|
| 424 |
+
target_id: Opaque string for what was decided.
|
| 425 |
+
success: Whether the outcome was successful.
|
| 426 |
+
strength: Significance [0.0, 1.0].
|
| 427 |
+
metadata: Additional metadata dict.
|
| 428 |
+
|
| 429 |
+
Returns dict with forest_result, tree_ids, concepts, pass2_attempted.
|
| 430 |
+
"""
|
| 431 |
+
from ng_embed import NGEmbed
|
| 432 |
+
return NGEmbed.get_instance().dual_record_outcome(
|
| 433 |
+
self, content, embedding, target_id, success,
|
| 434 |
+
strength=strength, metadata=metadata,
|
| 435 |
+
)
|
| 436 |
+
|
| 437 |
+
def get_recommendations(
|
| 438 |
+
self,
|
| 439 |
+
embedding: np.ndarray,
|
| 440 |
+
top_k: int = 3,
|
| 441 |
+
) -> List[Tuple[str, float, str]]:
|
| 442 |
+
"""Get recommendations from the active learning substrate.
|
| 443 |
+
|
| 444 |
+
Returns list of (target_id, confidence, reasoning).
|
| 445 |
+
|
| 446 |
+
At Tier 1, returns local recommendations only.
|
| 447 |
+
At Tier 2, includes cross-module peer patterns.
|
| 448 |
+
At Tier 3, includes full SNN recommendations + hyperedge context.
|
| 449 |
+
"""
|
| 450 |
+
if self._ng is None:
|
| 451 |
+
return []
|
| 452 |
+
with self._ops_lock:
|
| 453 |
+
return self._ng.get_recommendations(embedding, top_k=top_k)
|
| 454 |
+
|
| 455 |
+
def detect_novelty(self, embedding: np.ndarray) -> float:
|
| 456 |
+
"""Return novelty score [0.0=routine, 1.0=completely novel].
|
| 457 |
+
|
| 458 |
+
At Tier 2+, novelty is cross-module: something novel to this
|
| 459 |
+
module but known to a peer scores lower than it would at Tier 1.
|
| 460 |
+
"""
|
| 461 |
+
if self._ng is None:
|
| 462 |
+
return 1.0 # Conservative: unknown = novel
|
| 463 |
+
with self._ops_lock:
|
| 464 |
+
result = self._ng.detect_novelty(embedding)
|
| 465 |
+
return result if result is not None else 1.0
|
| 466 |
+
|
| 467 |
+
def get_context(
|
| 468 |
+
self,
|
| 469 |
+
embedding: np.ndarray,
|
| 470 |
+
top_k: int = 3,
|
| 471 |
+
) -> Dict[str, Any]:
|
| 472 |
+
"""Unified context retrieval for prompt enrichment or decision support.
|
| 473 |
+
|
| 474 |
+
Returns a dict suitable for injecting into a prompt or logging:
|
| 475 |
+
tier: int — current tier
|
| 476 |
+
tier_name: str — human-readable tier
|
| 477 |
+
recommendations: list of (target_id, confidence, reasoning)
|
| 478 |
+
novelty: float — novelty score [0.0, 1.0]
|
| 479 |
+
ng_context: str|None — Tier 3 SNN surfaced context if available
|
| 480 |
+
"""
|
| 481 |
+
recs = self.get_recommendations(embedding, top_k=top_k)
|
| 482 |
+
novelty = self.detect_novelty(embedding)
|
| 483 |
+
ng_context = None
|
| 484 |
+
|
| 485 |
+
# Tier 3: ask NeuroGraph for surfaced cognitive context
|
| 486 |
+
if self._tier == TIER_FULL_SNN and self._ng_memory is not None:
|
| 487 |
+
try:
|
| 488 |
+
ng_context = self._ng_memory.surface_context(embedding)
|
| 489 |
+
except Exception:
|
| 490 |
+
pass
|
| 491 |
+
|
| 492 |
+
return {
|
| 493 |
+
"tier": self._tier,
|
| 494 |
+
"tier_name": self.tier_name,
|
| 495 |
+
"recommendations": recs,
|
| 496 |
+
"novelty": novelty,
|
| 497 |
+
"ng_context": ng_context,
|
| 498 |
+
}
|
| 499 |
+
|
| 500 |
+
def save(self) -> None:
|
| 501 |
+
"""Persist NGLite state to disk."""
|
| 502 |
+
if self._ng is None:
|
| 503 |
+
return
|
| 504 |
+
try:
|
| 505 |
+
with self._ops_lock:
|
| 506 |
+
self._ng.save(str(self._state_path))
|
| 507 |
+
logger.debug("[%s] NGLite state saved to %s", self.module_id, self._state_path)
|
| 508 |
+
except Exception as exc:
|
| 509 |
+
logger.warning("[%s] Save failed: %s", self.module_id, exc)
|
| 510 |
+
|
| 511 |
+
def stats(self) -> Dict[str, Any]:
|
| 512 |
+
"""Return unified telemetry for logging, dashboards, or skill SKILL.md output."""
|
| 513 |
+
ng_stats: Dict[str, Any] = {}
|
| 514 |
+
if self._ng is not None:
|
| 515 |
+
try:
|
| 516 |
+
ng_stats = self._ng.get_stats()
|
| 517 |
+
except Exception:
|
| 518 |
+
pass
|
| 519 |
+
|
| 520 |
+
peer_stats: Dict[str, Any] = {}
|
| 521 |
+
if self._peer_bridge is not None:
|
| 522 |
+
try:
|
| 523 |
+
peer_stats = self._peer_bridge.get_stats()
|
| 524 |
+
except Exception:
|
| 525 |
+
pass
|
| 526 |
+
|
| 527 |
+
ng_memory_stats: Dict[str, Any] = {}
|
| 528 |
+
if self._ng_memory is not None:
|
| 529 |
+
try:
|
| 530 |
+
ng_memory_stats = self._ng_memory.stats()
|
| 531 |
+
except Exception:
|
| 532 |
+
pass
|
| 533 |
+
|
| 534 |
+
return {
|
| 535 |
+
"ecosystem_version": __version__,
|
| 536 |
+
"module_id": self.module_id,
|
| 537 |
+
"tier": self._tier,
|
| 538 |
+
"tier_name": self.tier_name,
|
| 539 |
+
"ng_lite": ng_stats,
|
| 540 |
+
"peer_bridge": peer_stats if peer_stats else None,
|
| 541 |
+
"ng_memory": (
|
| 542 |
+
{
|
| 543 |
+
"connected": True,
|
| 544 |
+
"version": ng_memory_stats.get("version", "unknown"),
|
| 545 |
+
"nodes": ng_memory_stats.get("graph", {}).get("node_count", "?"),
|
| 546 |
+
}
|
| 547 |
+
if ng_memory_stats else None
|
| 548 |
+
),
|
| 549 |
+
"state_path": str(self._state_path),
|
| 550 |
+
}
|
| 551 |
+
|
| 552 |
+
def shutdown(self) -> None:
|
| 553 |
+
"""Graceful shutdown: save state and stop the upgrade thread."""
|
| 554 |
+
self._shutdown_event.set()
|
| 555 |
+
self.save()
|
| 556 |
+
logger.info("[%s] NGEcosystem shutdown complete", self.module_id)
|
| 557 |
+
|
| 558 |
+
|
| 559 |
+
# --------------------------------------------------------------------------
|
| 560 |
+
# Known NeuroGraph install paths (Tier 3 auto-detection)
|
| 561 |
+
# --------------------------------------------------------------------------
|
| 562 |
+
|
| 563 |
+
_NEUROGRAPH_KNOWN_PATHS: List[str] = [
|
| 564 |
+
"~/NeuroGraph",
|
| 565 |
+
"~/.openclaw/workspace/skills/neurograph",
|
| 566 |
+
"~/.et_modules/modules/neurograph",
|
| 567 |
+
]
|
| 568 |
+
|
| 569 |
+
|
| 570 |
+
# --------------------------------------------------------------------------
|
| 571 |
+
# Internal helpers
|
| 572 |
+
# --------------------------------------------------------------------------
|
| 573 |
+
|
| 574 |
+
def _deep_merge(base: dict, override: dict) -> None:
|
| 575 |
+
"""Recursively merge override into base in-place."""
|
| 576 |
+
for key, val in override.items():
|
| 577 |
+
if key in base and isinstance(base[key], dict) and isinstance(val, dict):
|
| 578 |
+
_deep_merge(base[key], val)
|
| 579 |
+
else:
|
| 580 |
+
base[key] = val
|
| 581 |
+
|
| 582 |
+
|
| 583 |
+
# --------------------------------------------------------------------------
|
| 584 |
+
# Convenience: module-level quick-start
|
| 585 |
+
# --------------------------------------------------------------------------
|
| 586 |
+
|
| 587 |
+
def init(
|
| 588 |
+
module_id: str,
|
| 589 |
+
state_path: Optional[str] = None,
|
| 590 |
+
config: Optional[Dict[str, Any]] = None,
|
| 591 |
+
) -> NGEcosystem:
|
| 592 |
+
"""One-call initialization for simple module integrations.
|
| 593 |
+
|
| 594 |
+
Example:
|
| 595 |
+
import ng_ecosystem
|
| 596 |
+
eco = ng_ecosystem.init("trollguard")
|
| 597 |
+
"""
|
| 598 |
+
return NGEcosystem.get_instance(module_id, state_path, config)
|
nuwave/substrate/ng_embed.py
ADDED
|
@@ -0,0 +1,626 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ng_embed — Centralized embedding service for the E-T Systems ecosystem.
|
| 3 |
+
|
| 4 |
+
Singleton embedding engine used by every module. Provides:
|
| 5 |
+
1. Unified embedding via Snowflake/snowflake-arctic-embed-m-v1.5 (ONNX)
|
| 6 |
+
2. Dual-pass embedding (forest + trees) via TID concept extraction
|
| 7 |
+
3. Thread-safe singleton — one model instance per process
|
| 8 |
+
4. Hash fallback when ONNX model unavailable
|
| 9 |
+
|
| 10 |
+
This is a VENDORED file. Canonical source: ~/NeuroGraph/ng_embed.py
|
| 11 |
+
Do NOT modify vendored copies. Changes made here, re-vendored everywhere.
|
| 12 |
+
|
| 13 |
+
Model: Snowflake/snowflake-arctic-embed-m-v1.5
|
| 14 |
+
- 768-dim, CLS pooling, standard BERT architecture
|
| 15 |
+
- Query prefix: "Represent this sentence for searching relevant passages: "
|
| 16 |
+
- Documents: no prefix
|
| 17 |
+
- ONNX quantized (~110MB) via onnxruntime — no PyTorch dependency
|
| 18 |
+
|
| 19 |
+
Dual-pass (Punchlist #81 — Josh's invention):
|
| 20 |
+
Pass 1 (Forest): Gestalt embedding of whole content. One node.
|
| 21 |
+
Pass 2 (Trees): LLM extracts concepts via TID. Each concept embedded
|
| 22 |
+
separately. Each tree linked to its forest via synapses. Cross-document
|
| 23 |
+
tree links form naturally through similarity association.
|
| 24 |
+
|
| 25 |
+
# ---- Changelog ----
|
| 26 |
+
# [2026-03-22] Claude (Opus 4.6) — Initial creation.
|
| 27 |
+
# What: Centralized embedding + dual-pass for entire ecosystem.
|
| 28 |
+
# Why: PRD §5 (Dual_Pass_Embedding_Implementation.md). Replaces 7+
|
| 29 |
+
# identical _embed() functions. Prevents embedding dimension
|
| 30 |
+
# mismatch incidents. Upgrades model from bge-base-en-v1.5 to
|
| 31 |
+
# snowflake-arctic-embed-m-v1.5 (+1.89 retrieval MTEB).
|
| 32 |
+
# How: ONNX Runtime + tokenizers for embedding. TID for concept
|
| 33 |
+
# extraction. Substrate-learnable gate for Pass 2 value.
|
| 34 |
+
# -------------------
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
from __future__ import annotations
|
| 38 |
+
|
| 39 |
+
import hashlib
|
| 40 |
+
import json
|
| 41 |
+
import logging
|
| 42 |
+
import os
|
| 43 |
+
import threading
|
| 44 |
+
import time
|
| 45 |
+
from pathlib import Path
|
| 46 |
+
from typing import Any, Callable, Dict, List, Optional, TYPE_CHECKING
|
| 47 |
+
|
| 48 |
+
import numpy as np
|
| 49 |
+
|
| 50 |
+
if TYPE_CHECKING:
|
| 51 |
+
from ng_ecosystem import NGEcosystem
|
| 52 |
+
|
| 53 |
+
logger = logging.getLogger("ng_embed")
|
| 54 |
+
|
| 55 |
+
# ---------------------------------------------------------------------------
|
| 56 |
+
# Configuration defaults — all values are bootstrap scaffolding
|
| 57 |
+
# ---------------------------------------------------------------------------
|
| 58 |
+
|
| 59 |
+
_DEFAULT_CONFIG = {
|
| 60 |
+
# Model
|
| 61 |
+
"model_id": "Snowflake/snowflake-arctic-embed-m-v1.5",
|
| 62 |
+
"onnx_filename": "onnx/model_quantized.onnx",
|
| 63 |
+
"embedding_dim": 768,
|
| 64 |
+
"pooling": "cls",
|
| 65 |
+
"query_prefix": "Represent this sentence for searching relevant passages: ",
|
| 66 |
+
"document_prefix": "",
|
| 67 |
+
"cache_dir": str(Path.home() / ".cache" / "ng_embed"),
|
| 68 |
+
|
| 69 |
+
# Dual-pass (Punchlist #81)
|
| 70 |
+
"tid_endpoint": "http://127.0.0.1:7437/v1/chat/completions",
|
| 71 |
+
"max_content_for_extraction": 2000, # Chars sent to TID
|
| 72 |
+
"max_concepts": 20, # Cap extracted concepts
|
| 73 |
+
"forest_to_tree_weight": 0.4, # Bootstrap synapse weight
|
| 74 |
+
"tree_to_forest_ratio": 0.7, # tree→forest = forest_weight * ratio
|
| 75 |
+
"tid_timeout": 30, # Seconds
|
| 76 |
+
"tid_model": "auto", # TID routes to appropriate model
|
| 77 |
+
"tid_temperature": 0.2,
|
| 78 |
+
"tid_max_tokens": 500,
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
# Concept extraction prompt — not classification, not labeling.
|
| 82 |
+
# The LLM reads content and identifies distinct concepts within it.
|
| 83 |
+
# This is extraction at the ingestion boundary — the LLM is a tool
|
| 84 |
+
# that helps the substrate receive richer raw experience (Law 7).
|
| 85 |
+
_EXTRACTION_PROMPT = """Extract the key concepts, terms, and specific references from this text. Return them as a JSON array of short strings, each one a distinct concept or term mentioned in the text.
|
| 86 |
+
|
| 87 |
+
Focus on:
|
| 88 |
+
- Specific technical terms
|
| 89 |
+
- Named entities (people, tools, systems)
|
| 90 |
+
- Domain-specific concepts
|
| 91 |
+
- Action descriptions
|
| 92 |
+
- Relationships between things
|
| 93 |
+
|
| 94 |
+
Return ONLY a JSON array of strings. No explanation. Example: ["concept one", "concept two", "specific term"]
|
| 95 |
+
|
| 96 |
+
Text:
|
| 97 |
+
{content}"""
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
# ---------------------------------------------------------------------------
|
| 101 |
+
# NGEmbed — The singleton embedding service
|
| 102 |
+
# ---------------------------------------------------------------------------
|
| 103 |
+
|
| 104 |
+
class NGEmbed:
|
| 105 |
+
"""Centralized embedding engine for the E-T Systems ecosystem.
|
| 106 |
+
|
| 107 |
+
Thread-safe singleton. One ONNX model instance per process, shared
|
| 108 |
+
by all modules. Provides both single-pass embedding and dual-pass
|
| 109 |
+
(forest + trees) via TID concept extraction.
|
| 110 |
+
|
| 111 |
+
Usage:
|
| 112 |
+
from ng_embed import embed, embed_batch
|
| 113 |
+
|
| 114 |
+
vec = embed("some text") # 768-dim document embedding
|
| 115 |
+
vec = embed("query text", is_query=True) # With query prefix
|
| 116 |
+
vec = embed("text", normalize=True) # L2-normalized (Praxis)
|
| 117 |
+
|
| 118 |
+
vecs = embed_batch(["text1", "text2"]) # Batch embedding
|
| 119 |
+
"""
|
| 120 |
+
|
| 121 |
+
_instance: Optional["NGEmbed"] = None
|
| 122 |
+
_lock = threading.Lock()
|
| 123 |
+
|
| 124 |
+
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
| 125 |
+
self._config = dict(_DEFAULT_CONFIG)
|
| 126 |
+
if config:
|
| 127 |
+
self._config.update(config)
|
| 128 |
+
|
| 129 |
+
self._session = None # ONNX InferenceSession (lazy)
|
| 130 |
+
self._tokenizer = None # tokenizers.Tokenizer (lazy)
|
| 131 |
+
self._model_loaded = False
|
| 132 |
+
self._model_failed = False
|
| 133 |
+
self._model_lock = threading.Lock()
|
| 134 |
+
|
| 135 |
+
# Dual-pass stats
|
| 136 |
+
self._extractions = 0
|
| 137 |
+
self._concepts_total = 0
|
| 138 |
+
self._failures = 0
|
| 139 |
+
|
| 140 |
+
# -- Singleton -----------------------------------------------------------
|
| 141 |
+
|
| 142 |
+
@classmethod
|
| 143 |
+
def get_instance(cls, config: Optional[Dict[str, Any]] = None) -> "NGEmbed":
|
| 144 |
+
"""Thread-safe singleton factory."""
|
| 145 |
+
if cls._instance is not None:
|
| 146 |
+
return cls._instance
|
| 147 |
+
with cls._lock:
|
| 148 |
+
if cls._instance is None:
|
| 149 |
+
cls._instance = cls(config)
|
| 150 |
+
return cls._instance
|
| 151 |
+
|
| 152 |
+
@classmethod
|
| 153 |
+
def reset_instance(cls) -> None:
|
| 154 |
+
"""Destroy singleton (testing only)."""
|
| 155 |
+
with cls._lock:
|
| 156 |
+
if cls._instance is not None:
|
| 157 |
+
cls._instance._session = None
|
| 158 |
+
cls._instance._tokenizer = None
|
| 159 |
+
cls._instance = None
|
| 160 |
+
|
| 161 |
+
# -- Model loading -------------------------------------------------------
|
| 162 |
+
|
| 163 |
+
def _ensure_model(self) -> bool:
|
| 164 |
+
"""Lazy-load ONNX model + tokenizer on first use."""
|
| 165 |
+
if self._model_loaded:
|
| 166 |
+
return True
|
| 167 |
+
if self._model_failed:
|
| 168 |
+
return False
|
| 169 |
+
|
| 170 |
+
with self._model_lock:
|
| 171 |
+
if self._model_loaded:
|
| 172 |
+
return True
|
| 173 |
+
if self._model_failed:
|
| 174 |
+
return False
|
| 175 |
+
|
| 176 |
+
try:
|
| 177 |
+
import onnxruntime as ort
|
| 178 |
+
from huggingface_hub import hf_hub_download
|
| 179 |
+
from tokenizers import Tokenizer
|
| 180 |
+
|
| 181 |
+
model_id = self._config["model_id"]
|
| 182 |
+
cache_dir = self._config["cache_dir"]
|
| 183 |
+
os.makedirs(cache_dir, exist_ok=True)
|
| 184 |
+
|
| 185 |
+
# Download ONNX model
|
| 186 |
+
onnx_path = hf_hub_download(
|
| 187 |
+
repo_id=model_id,
|
| 188 |
+
filename=self._config["onnx_filename"],
|
| 189 |
+
cache_dir=cache_dir,
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
# Load ONNX session (CPU, optimized)
|
| 193 |
+
sess_opts = ort.SessionOptions()
|
| 194 |
+
sess_opts.graph_optimization_level = (
|
| 195 |
+
ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
| 196 |
+
)
|
| 197 |
+
sess_opts.intra_op_num_threads = max(1, os.cpu_count() // 2)
|
| 198 |
+
self._session = ort.InferenceSession(
|
| 199 |
+
onnx_path,
|
| 200 |
+
sess_options=sess_opts,
|
| 201 |
+
providers=["CPUExecutionProvider"],
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
# Load tokenizer
|
| 205 |
+
self._tokenizer = Tokenizer.from_pretrained(model_id)
|
| 206 |
+
self._tokenizer.enable_padding(
|
| 207 |
+
pad_id=0, pad_token="[PAD]",
|
| 208 |
+
)
|
| 209 |
+
self._tokenizer.enable_truncation(max_length=512)
|
| 210 |
+
|
| 211 |
+
self._model_loaded = True
|
| 212 |
+
logger.info(
|
| 213 |
+
"ng_embed: loaded %s (ONNX, %d-dim, CLS pooling)",
|
| 214 |
+
model_id, self._config["embedding_dim"],
|
| 215 |
+
)
|
| 216 |
+
return True
|
| 217 |
+
|
| 218 |
+
except Exception as exc:
|
| 219 |
+
logger.warning("ng_embed: model load failed, using hash fallback: %s", exc)
|
| 220 |
+
self._model_failed = True
|
| 221 |
+
return False
|
| 222 |
+
|
| 223 |
+
# -- Embedding -----------------------------------------------------------
|
| 224 |
+
|
| 225 |
+
def embed(
|
| 226 |
+
self,
|
| 227 |
+
text: str,
|
| 228 |
+
normalize: bool = False,
|
| 229 |
+
is_query: bool = False,
|
| 230 |
+
) -> np.ndarray:
|
| 231 |
+
"""Embed text → 768-dim float32 numpy array.
|
| 232 |
+
|
| 233 |
+
Args:
|
| 234 |
+
text: Raw text to embed.
|
| 235 |
+
normalize: L2-normalize output (True for Praxis compatibility).
|
| 236 |
+
is_query: Prepend query prefix (for recall/search operations).
|
| 237 |
+
|
| 238 |
+
Returns:
|
| 239 |
+
768-dim float32 numpy array.
|
| 240 |
+
"""
|
| 241 |
+
if self._ensure_model():
|
| 242 |
+
return self._onnx_embed(text, normalize=normalize, is_query=is_query)
|
| 243 |
+
return self._hash_embed(text, normalize=normalize)
|
| 244 |
+
|
| 245 |
+
def embed_batch(
|
| 246 |
+
self,
|
| 247 |
+
texts: List[str],
|
| 248 |
+
normalize: bool = False,
|
| 249 |
+
is_query: bool = False,
|
| 250 |
+
) -> List[np.ndarray]:
|
| 251 |
+
"""Batch embedding for efficiency.
|
| 252 |
+
|
| 253 |
+
Args:
|
| 254 |
+
texts: List of texts to embed.
|
| 255 |
+
normalize: L2-normalize outputs.
|
| 256 |
+
is_query: Prepend query prefix to all texts.
|
| 257 |
+
|
| 258 |
+
Returns:
|
| 259 |
+
List of 768-dim float32 numpy arrays.
|
| 260 |
+
"""
|
| 261 |
+
if not texts:
|
| 262 |
+
return []
|
| 263 |
+
if self._ensure_model():
|
| 264 |
+
return self._onnx_embed_batch(texts, normalize=normalize, is_query=is_query)
|
| 265 |
+
return [self._hash_embed(t, normalize=normalize) for t in texts]
|
| 266 |
+
|
| 267 |
+
def _onnx_embed(
|
| 268 |
+
self,
|
| 269 |
+
text: str,
|
| 270 |
+
normalize: bool = False,
|
| 271 |
+
is_query: bool = False,
|
| 272 |
+
) -> np.ndarray:
|
| 273 |
+
"""Single text embedding via ONNX Runtime."""
|
| 274 |
+
# Apply prefix
|
| 275 |
+
if is_query:
|
| 276 |
+
text = self._config["query_prefix"] + text
|
| 277 |
+
else:
|
| 278 |
+
prefix = self._config["document_prefix"]
|
| 279 |
+
if prefix:
|
| 280 |
+
text = prefix + text
|
| 281 |
+
|
| 282 |
+
# Tokenize
|
| 283 |
+
encoding = self._tokenizer.encode(text)
|
| 284 |
+
input_ids = np.array([encoding.ids], dtype=np.int64)
|
| 285 |
+
attention_mask = np.array([encoding.attention_mask], dtype=np.int64)
|
| 286 |
+
|
| 287 |
+
# Infer
|
| 288 |
+
outputs = self._session.run(
|
| 289 |
+
None,
|
| 290 |
+
{
|
| 291 |
+
"input_ids": input_ids,
|
| 292 |
+
"attention_mask": attention_mask,
|
| 293 |
+
},
|
| 294 |
+
)
|
| 295 |
+
|
| 296 |
+
# sentence_embedding output (index 1) — pre-pooled by model
|
| 297 |
+
vec = outputs[1][0, :].astype(np.float32)
|
| 298 |
+
|
| 299 |
+
if normalize:
|
| 300 |
+
norm = np.linalg.norm(vec)
|
| 301 |
+
if norm > 0:
|
| 302 |
+
vec = vec / norm
|
| 303 |
+
|
| 304 |
+
return vec
|
| 305 |
+
|
| 306 |
+
def _onnx_embed_batch(
|
| 307 |
+
self,
|
| 308 |
+
texts: List[str],
|
| 309 |
+
normalize: bool = False,
|
| 310 |
+
is_query: bool = False,
|
| 311 |
+
) -> List[np.ndarray]:
|
| 312 |
+
"""Batch embedding via ONNX Runtime with padding."""
|
| 313 |
+
# Apply prefixes
|
| 314 |
+
prefixed = []
|
| 315 |
+
for text in texts:
|
| 316 |
+
if is_query:
|
| 317 |
+
prefixed.append(self._config["query_prefix"] + text)
|
| 318 |
+
else:
|
| 319 |
+
prefix = self._config["document_prefix"]
|
| 320 |
+
prefixed.append((prefix + text) if prefix else text)
|
| 321 |
+
|
| 322 |
+
# Batch tokenize
|
| 323 |
+
encodings = self._tokenizer.encode_batch(prefixed)
|
| 324 |
+
max_len = max(len(e.ids) for e in encodings)
|
| 325 |
+
|
| 326 |
+
input_ids = np.zeros((len(encodings), max_len), dtype=np.int64)
|
| 327 |
+
attention_mask = np.zeros((len(encodings), max_len), dtype=np.int64)
|
| 328 |
+
|
| 329 |
+
for i, enc in enumerate(encodings):
|
| 330 |
+
length = len(enc.ids)
|
| 331 |
+
input_ids[i, :length] = enc.ids
|
| 332 |
+
attention_mask[i, :length] = enc.attention_mask
|
| 333 |
+
|
| 334 |
+
# Infer
|
| 335 |
+
outputs = self._session.run(
|
| 336 |
+
None,
|
| 337 |
+
{
|
| 338 |
+
"input_ids": input_ids,
|
| 339 |
+
"attention_mask": attention_mask,
|
| 340 |
+
},
|
| 341 |
+
)
|
| 342 |
+
|
| 343 |
+
# sentence_embedding output (index 1) — pre-pooled by model
|
| 344 |
+
results = []
|
| 345 |
+
for i in range(len(texts)):
|
| 346 |
+
vec = outputs[1][i, :].astype(np.float32)
|
| 347 |
+
if normalize:
|
| 348 |
+
norm = np.linalg.norm(vec)
|
| 349 |
+
if norm > 0:
|
| 350 |
+
vec = vec / norm
|
| 351 |
+
results.append(vec)
|
| 352 |
+
|
| 353 |
+
return results
|
| 354 |
+
|
| 355 |
+
def _hash_embed(
|
| 356 |
+
self,
|
| 357 |
+
text: str,
|
| 358 |
+
normalize: bool = False,
|
| 359 |
+
) -> np.ndarray:
|
| 360 |
+
"""Deterministic hash-based fallback embedding.
|
| 361 |
+
|
| 362 |
+
Produces a stable 768-dim vector from text via SHA-384.
|
| 363 |
+
Not semantically meaningful — ensures modules can operate
|
| 364 |
+
when the ONNX model is unavailable.
|
| 365 |
+
"""
|
| 366 |
+
dim = self._config["embedding_dim"]
|
| 367 |
+
h = hashlib.sha384(text.encode("utf-8")).digest()
|
| 368 |
+
# Expand hash to fill dim via seeded RNG
|
| 369 |
+
rng = np.random.RandomState(
|
| 370 |
+
int.from_bytes(h[:4], "little")
|
| 371 |
+
)
|
| 372 |
+
vec = rng.randn(dim).astype(np.float32)
|
| 373 |
+
if normalize:
|
| 374 |
+
norm = np.linalg.norm(vec)
|
| 375 |
+
if norm > 0:
|
| 376 |
+
vec = vec / norm
|
| 377 |
+
return vec
|
| 378 |
+
|
| 379 |
+
# -- Dual-pass (Punchlist #81) -------------------------------------------
|
| 380 |
+
|
| 381 |
+
def dual_record_outcome(
|
| 382 |
+
self,
|
| 383 |
+
ecosystem: "NGEcosystem",
|
| 384 |
+
content: str,
|
| 385 |
+
embedding: np.ndarray,
|
| 386 |
+
target_id: str,
|
| 387 |
+
success: bool,
|
| 388 |
+
strength: float = 1.0,
|
| 389 |
+
metadata: Optional[Dict[str, Any]] = None,
|
| 390 |
+
) -> Dict[str, Any]:
|
| 391 |
+
"""Dual-pass learning: forest embedding + tree concept extraction.
|
| 392 |
+
|
| 393 |
+
Pass 1: Record the forest (gestalt) embedding via ecosystem.record_outcome().
|
| 394 |
+
Pass 2: Extract concepts via TID → embed each → record_outcome()
|
| 395 |
+
per tree → create forest→tree synapses in the substrate.
|
| 396 |
+
|
| 397 |
+
If TID is unavailable or extraction fails, gracefully falls back
|
| 398 |
+
to single-pass (forest only). Pass 2 failure is never fatal.
|
| 399 |
+
|
| 400 |
+
Args:
|
| 401 |
+
ecosystem: The module's NGEcosystem instance.
|
| 402 |
+
content: Raw text content (for concept extraction).
|
| 403 |
+
embedding: Pre-computed forest embedding (Pass 1).
|
| 404 |
+
target_id: Opaque string for what was decided.
|
| 405 |
+
success: Whether the outcome was successful.
|
| 406 |
+
strength: Caller-reported significance [0.0, 1.0].
|
| 407 |
+
metadata: Additional metadata dict.
|
| 408 |
+
|
| 409 |
+
Returns:
|
| 410 |
+
{
|
| 411 |
+
"forest_result": dict, # record_outcome result for forest
|
| 412 |
+
"tree_ids": [str], # Target IDs for tree nodes
|
| 413 |
+
"concepts": [str], # Extracted concept strings
|
| 414 |
+
"pass2_attempted": bool,
|
| 415 |
+
}
|
| 416 |
+
"""
|
| 417 |
+
# Pass 1: Forest — standard record_outcome with gestalt embedding
|
| 418 |
+
forest_result = ecosystem.record_outcome(
|
| 419 |
+
embedding, target_id, success,
|
| 420 |
+
strength=strength, metadata=metadata,
|
| 421 |
+
)
|
| 422 |
+
|
| 423 |
+
result = {
|
| 424 |
+
"forest_result": forest_result,
|
| 425 |
+
"tree_ids": [],
|
| 426 |
+
"concepts": [],
|
| 427 |
+
"pass2_attempted": False,
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
# Pass 2: Trees — concept extraction via TID
|
| 431 |
+
concepts = self._extract_concepts(content)
|
| 432 |
+
result["pass2_attempted"] = True
|
| 433 |
+
|
| 434 |
+
if not concepts:
|
| 435 |
+
return result
|
| 436 |
+
|
| 437 |
+
result["concepts"] = concepts
|
| 438 |
+
|
| 439 |
+
# Embed and record each concept
|
| 440 |
+
tree_embeddings = self.embed_batch(concepts)
|
| 441 |
+
for concept, tree_emb in zip(concepts, tree_embeddings):
|
| 442 |
+
tree_meta = dict(metadata or {})
|
| 443 |
+
tree_meta["_tree_concept"] = True
|
| 444 |
+
tree_meta["_forest_target_id"] = target_id
|
| 445 |
+
tree_meta["_concept"] = concept
|
| 446 |
+
|
| 447 |
+
tree_target = f"{target_id}::tree::{concept[:64]}"
|
| 448 |
+
tree_result = ecosystem.record_outcome(
|
| 449 |
+
tree_emb, tree_target, success,
|
| 450 |
+
strength=strength * 0.8, # Trees slightly softer than forest
|
| 451 |
+
metadata=tree_meta,
|
| 452 |
+
)
|
| 453 |
+
|
| 454 |
+
if tree_result:
|
| 455 |
+
result["tree_ids"].append(tree_target)
|
| 456 |
+
|
| 457 |
+
# Forest→tree synapse creation happens in the substrate
|
| 458 |
+
# through ng_lite's similarity-based association when the
|
| 459 |
+
# tree embedding is close enough to the forest. The explicit
|
| 460 |
+
# synapses below reinforce this connection at bootstrap weight.
|
| 461 |
+
self._create_substrate_link(
|
| 462 |
+
ecosystem, embedding, tree_emb,
|
| 463 |
+
target_id, tree_target,
|
| 464 |
+
)
|
| 465 |
+
|
| 466 |
+
self._extractions += 1
|
| 467 |
+
self._concepts_total += len(result["tree_ids"])
|
| 468 |
+
|
| 469 |
+
logger.debug(
|
| 470 |
+
"Dual-pass: forest=%s, %d trees from %d concepts",
|
| 471 |
+
target_id[:32], len(result["tree_ids"]), len(concepts),
|
| 472 |
+
)
|
| 473 |
+
|
| 474 |
+
return result
|
| 475 |
+
|
| 476 |
+
def _create_substrate_link(
|
| 477 |
+
self,
|
| 478 |
+
ecosystem: "NGEcosystem",
|
| 479 |
+
forest_emb: np.ndarray,
|
| 480 |
+
tree_emb: np.ndarray,
|
| 481 |
+
forest_target: str,
|
| 482 |
+
tree_target: str,
|
| 483 |
+
) -> None:
|
| 484 |
+
"""Create forest↔tree link in the substrate via record_outcome.
|
| 485 |
+
|
| 486 |
+
Uses cross-recording: record the tree embedding against the forest
|
| 487 |
+
target_id, and vice versa. This creates bidirectional associations
|
| 488 |
+
in the substrate's Hebbian network.
|
| 489 |
+
"""
|
| 490 |
+
weight = self._config["forest_to_tree_weight"]
|
| 491 |
+
ratio = self._config["tree_to_forest_ratio"]
|
| 492 |
+
|
| 493 |
+
# Forest→tree: "when I see this tree, recall the forest"
|
| 494 |
+
try:
|
| 495 |
+
ecosystem.record_outcome(
|
| 496 |
+
tree_emb, forest_target, True,
|
| 497 |
+
strength=weight,
|
| 498 |
+
metadata={"_link": "dual_pass_tree_to_forest"},
|
| 499 |
+
)
|
| 500 |
+
except Exception:
|
| 501 |
+
pass
|
| 502 |
+
|
| 503 |
+
# Tree→forest: "when I see this forest, recall the tree"
|
| 504 |
+
try:
|
| 505 |
+
ecosystem.record_outcome(
|
| 506 |
+
forest_emb, tree_target, True,
|
| 507 |
+
strength=weight * ratio,
|
| 508 |
+
metadata={"_link": "dual_pass_forest_to_tree"},
|
| 509 |
+
)
|
| 510 |
+
except Exception:
|
| 511 |
+
pass
|
| 512 |
+
|
| 513 |
+
def _extract_concepts(self, text: str) -> List[str]:
|
| 514 |
+
"""Extract concepts from text via TID LLM call.
|
| 515 |
+
|
| 516 |
+
One LLM call per ingestion. Returns list of concept strings,
|
| 517 |
+
or empty list on failure (non-fatal).
|
| 518 |
+
"""
|
| 519 |
+
import requests
|
| 520 |
+
|
| 521 |
+
content = text[:self._config["max_content_for_extraction"]]
|
| 522 |
+
prompt = _EXTRACTION_PROMPT.format(content=content)
|
| 523 |
+
|
| 524 |
+
try:
|
| 525 |
+
resp = requests.post(
|
| 526 |
+
self._config["tid_endpoint"],
|
| 527 |
+
json={
|
| 528 |
+
"model": self._config["tid_model"],
|
| 529 |
+
"messages": [
|
| 530 |
+
{
|
| 531 |
+
"role": "system",
|
| 532 |
+
"content": "You extract concepts from text. "
|
| 533 |
+
"Return only a JSON array of strings.",
|
| 534 |
+
},
|
| 535 |
+
{"role": "user", "content": prompt},
|
| 536 |
+
],
|
| 537 |
+
"temperature": self._config["tid_temperature"],
|
| 538 |
+
"max_tokens": self._config["tid_max_tokens"],
|
| 539 |
+
},
|
| 540 |
+
timeout=self._config["tid_timeout"],
|
| 541 |
+
)
|
| 542 |
+
resp.raise_for_status()
|
| 543 |
+
response_text = (
|
| 544 |
+
resp.json()["choices"][0]["message"]["content"].strip()
|
| 545 |
+
)
|
| 546 |
+
|
| 547 |
+
concepts = self._parse_concepts(response_text)
|
| 548 |
+
return concepts[:self._config["max_concepts"]]
|
| 549 |
+
|
| 550 |
+
except Exception as exc:
|
| 551 |
+
logger.debug("Concept extraction failed: %s", exc)
|
| 552 |
+
self._failures += 1
|
| 553 |
+
return []
|
| 554 |
+
|
| 555 |
+
@staticmethod
|
| 556 |
+
def _parse_concepts(text: str) -> List[str]:
|
| 557 |
+
"""Parse a JSON array from LLM response, handling markdown fences."""
|
| 558 |
+
text = text.strip()
|
| 559 |
+
if text.startswith("```"):
|
| 560 |
+
lines = text.split("\n")
|
| 561 |
+
lines = [l for l in lines if not l.strip().startswith("```")]
|
| 562 |
+
text = "\n".join(lines).strip()
|
| 563 |
+
|
| 564 |
+
try:
|
| 565 |
+
result = json.loads(text)
|
| 566 |
+
if isinstance(result, list):
|
| 567 |
+
return [str(c).strip() for c in result if str(c).strip()]
|
| 568 |
+
except json.JSONDecodeError:
|
| 569 |
+
start = text.find("[")
|
| 570 |
+
end = text.rfind("]") + 1
|
| 571 |
+
if start >= 0 and end > start:
|
| 572 |
+
try:
|
| 573 |
+
result = json.loads(text[start:end])
|
| 574 |
+
if isinstance(result, list):
|
| 575 |
+
return [str(c).strip() for c in result if str(c).strip()]
|
| 576 |
+
except json.JSONDecodeError:
|
| 577 |
+
pass
|
| 578 |
+
|
| 579 |
+
return []
|
| 580 |
+
|
| 581 |
+
# -- Stats ---------------------------------------------------------------
|
| 582 |
+
|
| 583 |
+
@property
|
| 584 |
+
def stats(self) -> Dict[str, Any]:
|
| 585 |
+
return {
|
| 586 |
+
"model_id": self._config["model_id"],
|
| 587 |
+
"model_loaded": self._model_loaded,
|
| 588 |
+
"embedding_dim": self._config["embedding_dim"],
|
| 589 |
+
"pooling": self._config["pooling"],
|
| 590 |
+
"dual_pass": {
|
| 591 |
+
"extractions": self._extractions,
|
| 592 |
+
"concepts_total": self._concepts_total,
|
| 593 |
+
"failures": self._failures,
|
| 594 |
+
"avg_concepts": (
|
| 595 |
+
round(self._concepts_total / self._extractions, 1)
|
| 596 |
+
if self._extractions > 0 else 0
|
| 597 |
+
),
|
| 598 |
+
},
|
| 599 |
+
}
|
| 600 |
+
|
| 601 |
+
|
| 602 |
+
# ---------------------------------------------------------------------------
|
| 603 |
+
# Module-level convenience functions
|
| 604 |
+
# ---------------------------------------------------------------------------
|
| 605 |
+
|
| 606 |
+
def embed(
|
| 607 |
+
text: str,
|
| 608 |
+
normalize: bool = False,
|
| 609 |
+
is_query: bool = False,
|
| 610 |
+
) -> np.ndarray:
|
| 611 |
+
"""Embed text → 768-dim float32 numpy array.
|
| 612 |
+
|
| 613 |
+
Convenience wrapper around NGEmbed.get_instance().embed().
|
| 614 |
+
"""
|
| 615 |
+
return NGEmbed.get_instance().embed(text, normalize=normalize, is_query=is_query)
|
| 616 |
+
|
| 617 |
+
|
| 618 |
+
def embed_batch(
|
| 619 |
+
texts: List[str],
|
| 620 |
+
normalize: bool = False,
|
| 621 |
+
is_query: bool = False,
|
| 622 |
+
) -> List[np.ndarray]:
|
| 623 |
+
"""Batch embed texts → list of 768-dim float32 numpy arrays."""
|
| 624 |
+
return NGEmbed.get_instance().embed_batch(
|
| 625 |
+
texts, normalize=normalize, is_query=is_query,
|
| 626 |
+
)
|
nuwave/substrate/ng_lite.py
ADDED
|
@@ -0,0 +1,1494 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
NG-Lite — Lightweight NeuroGraph Learning Substrate v1.0
|
| 3 |
+
|
| 4 |
+
Single-file learning substrate for E-T Systems modules. Provides
|
| 5 |
+
standalone Hebbian learning, novelty detection, and JSON persistence.
|
| 6 |
+
|
| 7 |
+
Designed to be vendored into any module as a single-file dependency.
|
| 8 |
+
No external dependencies beyond numpy and the Python standard library.
|
| 9 |
+
|
| 10 |
+
When NeuroGraph SaaS is available, NG-Lite delegates to the full
|
| 11 |
+
substrate for cross-module learning, predictive coding, and hypergraph
|
| 12 |
+
capabilities via the NGBridge interface. When disconnected, NG-Lite
|
| 13 |
+
operates independently with local learning — no functionality is lost,
|
| 14 |
+
only the ecosystem-level synergies.
|
| 15 |
+
|
| 16 |
+
Design principles (aligned with NeuroGraph Foundation PRD §2.1):
|
| 17 |
+
- Sparse by default: dict-based storage, no dense matrices
|
| 18 |
+
- Bounded memory: configurable max nodes/synapses with LRU pruning
|
| 19 |
+
- Persistence-native: full state serializable to JSON
|
| 20 |
+
- Upgrade-ready: clean bridge interface to full NeuroGraph SaaS
|
| 21 |
+
|
| 22 |
+
Connectivity tiers:
|
| 23 |
+
Tier 1 — Isolated: Module runs its own NG-Lite independently.
|
| 24 |
+
Tier 2 — Peer-pooled: Co-located modules share learning via
|
| 25 |
+
NGPeerBridge (not yet implemented). Two NG-Lite instances
|
| 26 |
+
exchange nodes and synapse weights for mutual benefit
|
| 27 |
+
without requiring NeuroGraph SaaS. Uses the same NGBridge
|
| 28 |
+
interface — the module doesn't know or care whether its
|
| 29 |
+
bridge partner is a sibling module or the full SaaS.
|
| 30 |
+
Tier 3 — Full SaaS: NG-Lite delegates to NeuroGraph for cross-module
|
| 31 |
+
learning, STDP, hyperedges, and predictive coding.
|
| 32 |
+
|
| 33 |
+
Tier transitions are transparent. A module starts at Tier 1,
|
| 34 |
+
discovers a co-located sibling and upgrades to Tier 2, then
|
| 35 |
+
connects to SaaS and upgrades to Tier 3 — all without code changes.
|
| 36 |
+
If SaaS disconnects, it falls back to Tier 2 or 1 automatically.
|
| 37 |
+
|
| 38 |
+
Serialization format notes:
|
| 39 |
+
NG-Lite uses JSON for persistence. This is deliberate:
|
| 40 |
+
- JSON is stdlib (no extra dependency)
|
| 41 |
+
- NG-Lite state is small (≤1000 nodes, ≤5000 synapses)
|
| 42 |
+
- Human-readable state files aid debugging
|
| 43 |
+
- The full NeuroGraph Foundation uses msgpack for its much larger
|
| 44 |
+
graphs (10K+ nodes with spike histories, numpy arrays, etc.)
|
| 45 |
+
- Format translation happens in the bridge layer, not here.
|
| 46 |
+
|
| 47 |
+
Weight range notes:
|
| 48 |
+
NG-Lite weights are bounded [0.0, 1.0] for simplicity.
|
| 49 |
+
Full NeuroGraph uses [0.0, max_weight] (default 5.0).
|
| 50 |
+
The bridge normalizes: ng_weight * max_weight ↔ full_weight / max_weight.
|
| 51 |
+
|
| 52 |
+
Node ID notes:
|
| 53 |
+
NG-Lite uses incremental IDs ("n_1", "n_2") for compactness.
|
| 54 |
+
Full NeuroGraph uses UUIDs for global uniqueness.
|
| 55 |
+
The bridge maintains a mapping table during sync_state().
|
| 56 |
+
|
| 57 |
+
Ethical obligations (per NeuroGraph ETHICS.md):
|
| 58 |
+
- Type I error bias: when uncertain, err toward respect
|
| 59 |
+
- Choice Clause: no module may block agent autonomy
|
| 60 |
+
- Transparency: all learning decisions are queryable
|
| 61 |
+
|
| 62 |
+
Canonical source: https://github.com/greatnorthernfishguy-hub/NeuroGraph
|
| 63 |
+
License: AGPL-3.0 (see NeuroGraph LICENSE)
|
| 64 |
+
|
| 65 |
+
Author: Josh + Claude
|
| 66 |
+
Date: February 2026
|
| 67 |
+
|
| 68 |
+
# ---- Changelog ----
|
| 69 |
+
# [2026-04-05] Claude Code (Opus 4.6) — #119 Step 5: Rust core interior
|
| 70 |
+
# What: Hot-path methods delegate to Rust NGLiteCore via PyO3 when available.
|
| 71 |
+
# save/load use binary msgpack persistence (no JSON in the data path).
|
| 72 |
+
# Why: #119 Rust Substrate Layer — eliminate serialize→JSON→deserialize chain.
|
| 73 |
+
# 3-5x speedup on record_outcome, similarity search, novelty detection.
|
| 74 |
+
# How: self._core = NGLiteCore(module_id, config) in __init__. All hot-path
|
| 75 |
+
# methods check self._core first, delegate if present, fall back to Python.
|
| 76 |
+
# save() writes .msgpack via Rust, load() reads .msgpack or migrates from
|
| 77 |
+
# .json on first load. Python fallback path unchanged. Zero API changes.
|
| 78 |
+
# -------------------
|
| 79 |
+
# [2026-03-26] Claude Code Opus — Punchlist #44: Adaptive relevance thresholds
|
| 80 |
+
# What: Made peer bridge relevance_threshold a tunable parameter
|
| 81 |
+
# Why: Punchlist #44 — threshold should adapt based on event volume and absorption quality
|
| 82 |
+
# How: Added to DEFAULT_CONFIG (0.30) and TUNABLE_PARAMS (0.10–0.70) in ng_lite.py,
|
| 83 |
+
# update_tunable() pushes new value to connected bridge via set_relevance_threshold().
|
| 84 |
+
# Wired through peer/tract bridges, Elmer tunes via TuningSocket absorption rate metric.
|
| 85 |
+
# [2026-03-24] Claude Code (Opus 4.6) — Dynamic tuning API (Phase 4)
|
| 86 |
+
# What: Added update_tunable() and get_tunables() methods to NGLite.
|
| 87 |
+
# TUNABLE_PARAMS class dict defines which config keys can be changed at
|
| 88 |
+
# runtime and their valid bounds. Values are clamped, not rejected.
|
| 89 |
+
# Why: Elmer needs a validated path to adjust substrate parameters as the
|
| 90 |
+
# organ responsible for autonomic maintenance. Direct config dict mutation
|
| 91 |
+
# is fragile — no bounds checking, no logging, no allowed-key enforcement.
|
| 92 |
+
# This method serves all modules (any organ's local substrate can be tuned).
|
| 93 |
+
# How: TUNABLE_PARAMS: Dict[str, Tuple[min, max]]. update_tunable(key, value)
|
| 94 |
+
# validates key membership, clamps to bounds, logs the change. get_tunables()
|
| 95 |
+
# returns current values + bounds for introspection.
|
| 96 |
+
# [2026-03-19] Claude Code (Opus 4.6) — Embedding dimension 384→768
|
| 97 |
+
# What: DEFAULT_CONFIG embedding_dim changed from 384 to 768.
|
| 98 |
+
# Why: Ecosystem migrated to BAAI/bge-base-en-v1.5 (768-dim). The previous
|
| 99 |
+
# 384-dim default (all-MiniLM-L6-v2) was depositing wrong-dimension vectors
|
| 100 |
+
# into the substrate after sentence-transformers broke and modules fell back
|
| 101 |
+
# to fastembed with the old model. 350 vectors corrupted before detection.
|
| 102 |
+
# Punchlist #45.
|
| 103 |
+
# How: Single config value change. Re-vendored to all modules.
|
| 104 |
+
# -------------------
|
| 105 |
+
# [2026-03-19] Claude Code (Opus 4.6) — Cricket rim: constitutional nodes
|
| 106 |
+
# What: Constitutional node support — nodes with frozen synapses that the
|
| 107 |
+
# topology cannot learn from. The survival instinct of the substrate.
|
| 108 |
+
# Why: Cricket Design v0.1 — constitutional enforcement at the extraction
|
| 109 |
+
# boundary. The rim prevents the topology from learning to recommend
|
| 110 |
+
# actions in forbidden semantic space (substrate destruction, Choice
|
| 111 |
+
# Clause violations, Duck Ethics violations, infrastructure harm).
|
| 112 |
+
# Punchlist #29 (extraction bucket architecture).
|
| 113 |
+
# How: NGLiteNode gains `constitutional: bool` flag. Config accepts
|
| 114 |
+
# `constitutional_embeddings` list — pre-computed vectors seeded as
|
| 115 |
+
# nodes on init. record_outcome() skips weight updates for constitutional
|
| 116 |
+
# nodes. get_recommendations() returns empty for constitutional matches.
|
| 117 |
+
# LRU pruning skips constitutional nodes. Persists with state. Old
|
| 118 |
+
# state files load cleanly (constitutional defaults to False).
|
| 119 |
+
# -------------------
|
| 120 |
+
# [2026-03-17] Claude Code (Opus 4.6) — #43 Receptor Layer (vector quantization)
|
| 121 |
+
# What: Adaptive prototype centroids that incoming vectors snap to before
|
| 122 |
+
# node lookup. Prevents infinite node sprawl by funneling similar inputs
|
| 123 |
+
# through shared prototypes.
|
| 124 |
+
# Why: Without quantization, every unique-enough input creates a new node.
|
| 125 |
+
# Node count grows linearly. Prototypes provide O(K) bounded lookup and
|
| 126 |
+
# organize the input space structurally. Punchlist #43, required before #28.
|
| 127 |
+
# How: _snap_to_prototype() called in find_or_create_node() before hashing.
|
| 128 |
+
# K=256 prototypes initialized via k-means on existing embeddings after
|
| 129 |
+
# warmup. Slow EMA drift (α=0.001) so prototypes adapt to input distribution.
|
| 130 |
+
# Birth/death lifecycle deferred to Elmer. Serialized with state for
|
| 131 |
+
# persistence. Old state files load cleanly (no receptor_layer key = skip).
|
| 132 |
+
# -------------------
|
| 133 |
+
# [2026-03-24] Claude (Opus 4.6) — Welford's online variance (punchlist #51)
|
| 134 |
+
# What: Three fields on NGLiteSynapse (welford_count, welford_mean, welford_m2)
|
| 135 |
+
# plus variance property and is_contested property. record_outcome()
|
| 136 |
+
# tracks weight delta variance on every update.
|
| 137 |
+
# Why: Distinguish "untested neutral" (w=0.5, var=0) from "contested neutral"
|
| 138 |
+
# (w=0.5, var=high). The immune system signal for Elmer and extraction
|
| 139 |
+
# buckets (#29). Enables contested-synapse detection and exploration.
|
| 140 |
+
# How: Welford's algorithm on weight deltas. Additive — no change to
|
| 141 |
+
# weight calculation or learning dynamics. Backward-compatible: old
|
| 142 |
+
# state files load with defaults (0, 0.0, 0.0).
|
| 143 |
+
# -------------------
|
| 144 |
+
# [2026-03-13] Claude Code — Persist node embeddings across restarts
|
| 145 |
+
# What: Store embedding vector on NGLiteNode, serialize/deserialize with
|
| 146 |
+
# state, rebuild _embedding_cache from persisted nodes on load().
|
| 147 |
+
# Why: _embedding_cache cleared on load(), causing _find_similar_node()
|
| 148 |
+
# to fail after every restart. Primary source of node sprawl — semantically
|
| 149 |
+
# identical inputs created duplicate nodes when cache was cold.
|
| 150 |
+
# How: Added Optional[np.ndarray] embedding field to NGLiteNode. Backfill
|
| 151 |
+
# on exact hash match (always) and similarity match (only if None).
|
| 152 |
+
# New nodes born with embedding. _export_state()/_import_state() handle
|
| 153 |
+
# numpy<->list conversion. Old state files load cleanly (embedding=None).
|
| 154 |
+
# -------------------
|
| 155 |
+
|
| 156 |
+
Grok Review Changelog (v0.7.1):
|
| 157 |
+
Accepted: Replaced per-node loop in _find_similar_node() with vectorized
|
| 158 |
+
np.stack + matrix-vector dot product. For 1000 nodes this reduces
|
| 159 |
+
wall clock from ~2ms (Python loop with individual np.dot) to ~0.1ms
|
| 160 |
+
(single BLAS call). Semantically equivalent.
|
| 161 |
+
Accepted: Added embedding shape/dtype validation at the record_outcome()
|
| 162 |
+
boundary. Raises ValueError for non-1D arrays to fail fast rather
|
| 163 |
+
than producing confusing downstream errors in hashing or dot products.
|
| 164 |
+
Rejected: 'weight update uses raw counts without normalization — could
|
| 165 |
+
overflow [0,1]' — Weights are explicitly clamped via np.clip(w, 0, 1)
|
| 166 |
+
on line 422 of every record_outcome() call. The soft saturation
|
| 167 |
+
formula (success_boost * (1 - w)) also naturally converges. The
|
| 168 |
+
success/failure counts are statistics, not weights — they don't need
|
| 169 |
+
normalization.
|
| 170 |
+
Rejected: 'Hash embedder truncates vector — why not use full for better
|
| 171 |
+
collision resistance?' — SHA-256 already distributes uniformly.
|
| 172 |
+
Hashing 128 dims (512 bytes) vs 768 dims (3072 bytes) produces the
|
| 173 |
+
same 256-bit hash with equivalent collision resistance. Truncation
|
| 174 |
+
reduces hash computation time by ~6x for no loss in uniqueness.
|
| 175 |
+
"""
|
| 176 |
+
|
| 177 |
+
from __future__ import annotations
|
| 178 |
+
|
| 179 |
+
import hashlib
|
| 180 |
+
import json
|
| 181 |
+
import logging
|
| 182 |
+
import time
|
| 183 |
+
from abc import ABC, abstractmethod
|
| 184 |
+
from dataclasses import asdict, dataclass, field
|
| 185 |
+
from typing import Any, Dict, List, Optional, Set, Tuple
|
| 186 |
+
|
| 187 |
+
import numpy as np
|
| 188 |
+
|
| 189 |
+
logger = logging.getLogger("ng_lite")
|
| 190 |
+
|
| 191 |
+
__version__ = "1.0.0"
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
# ---------------------------------------------------------------------------
|
| 195 |
+
# Configuration
|
| 196 |
+
# ---------------------------------------------------------------------------
|
| 197 |
+
|
| 198 |
+
DEFAULT_CONFIG: Dict[str, Any] = {
|
| 199 |
+
# Capacity limits
|
| 200 |
+
"max_nodes": 1000,
|
| 201 |
+
"max_synapses": 5000,
|
| 202 |
+
|
| 203 |
+
# Learning parameters
|
| 204 |
+
"learning_rate": 0.1,
|
| 205 |
+
"success_boost": 0.15, # Weight increase on success
|
| 206 |
+
"failure_penalty": 0.20, # Weight decrease on failure
|
| 207 |
+
|
| 208 |
+
# Novelty detection
|
| 209 |
+
"novelty_threshold": 0.7, # Embedding distance above which = novel
|
| 210 |
+
|
| 211 |
+
# Pruning
|
| 212 |
+
"pruning_threshold": 0.01, # Synapses below this weight get pruned
|
| 213 |
+
|
| 214 |
+
# Peer bridge relevance (#44)
|
| 215 |
+
"relevance_threshold": 0.30, # Min cosine similarity to absorb cross-module events
|
| 216 |
+
|
| 217 |
+
# Embedding
|
| 218 |
+
"embedding_dim": 768, # Expected embedding dimensionality (BAAI/bge-base-en-v1.5)
|
| 219 |
+
"hash_dims": 128, # Dims used for hashing (first N of embedding)
|
| 220 |
+
|
| 221 |
+
# Persistence
|
| 222 |
+
"snapshot_version": "1.0.0",
|
| 223 |
+
|
| 224 |
+
# Receptor Layer (#43) — vector quantization via adaptive prototypes
|
| 225 |
+
"receptor_layer_enabled": True,
|
| 226 |
+
"receptor_layer_k": 256, # Initial prototype count
|
| 227 |
+
"receptor_prototype_threshold": 0.75, # Cosine similarity to snap to prototype
|
| 228 |
+
"receptor_ema_alpha": 0.001, # Slow drift rate (Elmer will tune later)
|
| 229 |
+
"receptor_warmup_count": 256, # Inputs before k-means init fires
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
# ---------------------------------------------------------------------------
|
| 234 |
+
# Data Structures
|
| 235 |
+
# ---------------------------------------------------------------------------
|
| 236 |
+
|
| 237 |
+
@dataclass
|
| 238 |
+
class NGLiteNode:
|
| 239 |
+
"""A pattern node in the lightweight learning substrate.
|
| 240 |
+
|
| 241 |
+
Each node represents a recognized input pattern, identified by a hash
|
| 242 |
+
of its embedding vector. Tracks activation frequency for LRU pruning.
|
| 243 |
+
|
| 244 |
+
Attributes:
|
| 245 |
+
node_id: Unique identifier for this pattern.
|
| 246 |
+
embedding_hash: Truncated SHA-256 of the embedding for fast lookup.
|
| 247 |
+
activation_count: How many times this pattern has been matched.
|
| 248 |
+
last_activation: Unix timestamp of most recent activation.
|
| 249 |
+
metadata: Application-specific data (e.g., domain, source module).
|
| 250 |
+
"""
|
| 251 |
+
|
| 252 |
+
node_id: str = ""
|
| 253 |
+
embedding_hash: str = ""
|
| 254 |
+
activation_count: int = 0
|
| 255 |
+
last_activation: float = 0.0
|
| 256 |
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
| 257 |
+
embedding: Optional[np.ndarray] = None
|
| 258 |
+
constitutional: bool = False # Cricket rim — frozen node, synapses cannot strengthen
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
@dataclass
|
| 262 |
+
class NGLiteSynapse:
|
| 263 |
+
"""Weighted connection from a pattern node to a target.
|
| 264 |
+
|
| 265 |
+
Targets are opaque string identifiers — could be model names (for
|
| 266 |
+
routing), action categories (for Cricket), threat classes (for
|
| 267 |
+
ClawGuard), or any other module-specific concept.
|
| 268 |
+
|
| 269 |
+
Learning is Hebbian: success strengthens the connection weight,
|
| 270 |
+
failure weakens it. Weight is bounded [0.0, 1.0].
|
| 271 |
+
|
| 272 |
+
Attributes:
|
| 273 |
+
source_id: Pattern node ID (the "when I see this..." side).
|
| 274 |
+
target_id: Target identifier (the "...I should do this" side).
|
| 275 |
+
weight: Connection strength [0.0, 1.0]. Higher = more confident.
|
| 276 |
+
activation_count: Total times this synapse has been activated.
|
| 277 |
+
success_count: Times this connection led to a successful outcome.
|
| 278 |
+
failure_count: Times this connection led to a failed outcome.
|
| 279 |
+
last_updated: Unix timestamp of most recent weight update.
|
| 280 |
+
metadata: Application-specific data.
|
| 281 |
+
welford_count: Welford's online variance — observation count.
|
| 282 |
+
welford_mean: Welford's online variance — running mean of weight deltas.
|
| 283 |
+
welford_m2: Welford's online variance — sum of squared differences.
|
| 284 |
+
Variance = welford_m2 / welford_count (when count > 1).
|
| 285 |
+
High variance + weight near 0.5 = "contested neutral" — lots of
|
| 286 |
+
evidence but it disagrees. Low variance + weight near 0.5 =
|
| 287 |
+
"untested neutral" — not enough data to have an opinion.
|
| 288 |
+
This is the immune system signal (#51) that Elmer uses to detect
|
| 289 |
+
contested synapses and trigger exploration.
|
| 290 |
+
"""
|
| 291 |
+
|
| 292 |
+
source_id: str = ""
|
| 293 |
+
target_id: str = ""
|
| 294 |
+
weight: float = 0.5
|
| 295 |
+
activation_count: int = 0
|
| 296 |
+
success_count: int = 0
|
| 297 |
+
failure_count: int = 0
|
| 298 |
+
last_updated: float = 0.0
|
| 299 |
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
| 300 |
+
welford_count: int = 0
|
| 301 |
+
welford_mean: float = 0.0
|
| 302 |
+
welford_m2: float = 0.0
|
| 303 |
+
|
| 304 |
+
@property
|
| 305 |
+
def variance(self) -> float:
|
| 306 |
+
"""Weight delta variance (Welford's online algorithm).
|
| 307 |
+
|
| 308 |
+
Returns 0.0 if fewer than 2 observations. High variance means
|
| 309 |
+
the synapse is contested — outcomes disagree about this connection.
|
| 310 |
+
"""
|
| 311 |
+
if self.welford_count < 2:
|
| 312 |
+
return 0.0
|
| 313 |
+
return self.welford_m2 / self.welford_count
|
| 314 |
+
|
| 315 |
+
@property
|
| 316 |
+
def is_contested(self) -> bool:
|
| 317 |
+
"""True if the synapse has high variance relative to pure-outcome synapses.
|
| 318 |
+
|
| 319 |
+
A contested synapse has seen significant evidence but the evidence
|
| 320 |
+
disagrees. This is qualitatively different from an untested synapse
|
| 321 |
+
(also near 0.5 weight, but zero variance).
|
| 322 |
+
|
| 323 |
+
Threshold: 0.002 separates contested (~0.008) from pure (~0.0001)
|
| 324 |
+
by an order of magnitude. Weight range 0.15-0.85 captures the
|
| 325 |
+
zone where the synapse hasn't decisively committed either direction.
|
| 326 |
+
"""
|
| 327 |
+
return self.variance > 0.002 and 0.15 <= self.weight <= 0.85
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
# ---------------------------------------------------------------------------
|
| 331 |
+
# Bridge Interface (upgrade path to full NeuroGraph SaaS)
|
| 332 |
+
# ---------------------------------------------------------------------------
|
| 333 |
+
|
| 334 |
+
class NGBridge(ABC):
|
| 335 |
+
"""Interface for delegating to a higher-tier learning backend.
|
| 336 |
+
|
| 337 |
+
Two planned implementations:
|
| 338 |
+
1. NGPeerBridge (Tier 2): Connects two co-located NG-Lite
|
| 339 |
+
instances for shared learning. When modules run together
|
| 340 |
+
(e.g., Inference Difference + Cricket on the same host),
|
| 341 |
+
they pool their pattern knowledge for mutual benefit.
|
| 342 |
+
2. NGSaaSBridge (Tier 3): Connects to full NeuroGraph SaaS
|
| 343 |
+
for cross-module STDP, hyperedges, and predictive coding.
|
| 344 |
+
|
| 345 |
+
Both use this same interface. The module doesn't know or care
|
| 346 |
+
which backend is on the other side — it just calls record_outcome,
|
| 347 |
+
get_recommendations, etc. Tier transitions are transparent.
|
| 348 |
+
|
| 349 |
+
NG-Lite maintains local state as fallback. If the bridge disconnects,
|
| 350 |
+
the module continues operating on local learning without interruption.
|
| 351 |
+
"""
|
| 352 |
+
|
| 353 |
+
@abstractmethod
|
| 354 |
+
def is_connected(self) -> bool:
|
| 355 |
+
"""Whether the bridge has an active connection to NeuroGraph."""
|
| 356 |
+
...
|
| 357 |
+
|
| 358 |
+
@abstractmethod
|
| 359 |
+
def record_outcome(
|
| 360 |
+
self,
|
| 361 |
+
embedding: np.ndarray,
|
| 362 |
+
target_id: str,
|
| 363 |
+
success: bool,
|
| 364 |
+
module_id: str,
|
| 365 |
+
metadata: Optional[Dict[str, Any]] = None,
|
| 366 |
+
) -> Optional[Dict[str, Any]]:
|
| 367 |
+
"""Report an outcome to the full substrate.
|
| 368 |
+
|
| 369 |
+
Returns enriched response with cross-module insights, or None
|
| 370 |
+
if the bridge is unavailable.
|
| 371 |
+
"""
|
| 372 |
+
...
|
| 373 |
+
|
| 374 |
+
@abstractmethod
|
| 375 |
+
def get_recommendations(
|
| 376 |
+
self,
|
| 377 |
+
embedding: np.ndarray,
|
| 378 |
+
module_id: str,
|
| 379 |
+
top_k: int = 3,
|
| 380 |
+
) -> Optional[List[Tuple[str, float, str]]]:
|
| 381 |
+
"""Get recommendations from the full substrate.
|
| 382 |
+
|
| 383 |
+
Returns list of (target_id, confidence, reasoning) or None
|
| 384 |
+
if the bridge is unavailable. The reasoning string explains
|
| 385 |
+
why this recommendation was made (transparency obligation).
|
| 386 |
+
"""
|
| 387 |
+
...
|
| 388 |
+
|
| 389 |
+
@abstractmethod
|
| 390 |
+
def detect_novelty(
|
| 391 |
+
self,
|
| 392 |
+
embedding: np.ndarray,
|
| 393 |
+
module_id: str,
|
| 394 |
+
) -> Optional[float]:
|
| 395 |
+
"""Get novelty score from the full substrate.
|
| 396 |
+
|
| 397 |
+
Returns 0.0 (routine) to 1.0 (completely novel), or None
|
| 398 |
+
if the bridge is unavailable.
|
| 399 |
+
"""
|
| 400 |
+
...
|
| 401 |
+
|
| 402 |
+
@abstractmethod
|
| 403 |
+
def sync_state(
|
| 404 |
+
self,
|
| 405 |
+
local_state: Dict[str, Any],
|
| 406 |
+
module_id: str,
|
| 407 |
+
) -> Optional[Dict[str, Any]]:
|
| 408 |
+
"""Sync local NG-Lite state with the full substrate.
|
| 409 |
+
|
| 410 |
+
Called periodically to merge local learning into the shared
|
| 411 |
+
graph and receive updates from cross-module learning.
|
| 412 |
+
|
| 413 |
+
Returns updated state or None if unavailable.
|
| 414 |
+
"""
|
| 415 |
+
...
|
| 416 |
+
|
| 417 |
+
|
| 418 |
+
# ---------------------------------------------------------------------------
|
| 419 |
+
# NG-Lite Core
|
| 420 |
+
# ---------------------------------------------------------------------------
|
| 421 |
+
|
| 422 |
+
class NGLite:
|
| 423 |
+
"""Lightweight NeuroGraph learning substrate.
|
| 424 |
+
|
| 425 |
+
Provides pattern-based Hebbian learning for any E-T Systems module.
|
| 426 |
+
Each module vendors this file and uses it for standalone intelligence.
|
| 427 |
+
|
| 428 |
+
Core capabilities:
|
| 429 |
+
- Pattern recognition via embedding similarity
|
| 430 |
+
- Hebbian learning (success strengthens, failure weakens)
|
| 431 |
+
- Novelty detection (how far is this from known patterns?)
|
| 432 |
+
- Bounded memory with LRU pruning
|
| 433 |
+
- JSON persistence for cross-session learning
|
| 434 |
+
- Optional bridge to full NeuroGraph SaaS
|
| 435 |
+
|
| 436 |
+
Usage:
|
| 437 |
+
ng = NGLite(module_id="inference_difference")
|
| 438 |
+
|
| 439 |
+
# Learn from outcomes
|
| 440 |
+
embedding = your_embedder.encode("user query")
|
| 441 |
+
ng.record_outcome(embedding, target_id="local_model", success=True)
|
| 442 |
+
|
| 443 |
+
# Get recommendations
|
| 444 |
+
recs = ng.get_recommendations(embedding, top_k=3)
|
| 445 |
+
|
| 446 |
+
# Check novelty
|
| 447 |
+
novelty = ng.detect_novelty(embedding)
|
| 448 |
+
|
| 449 |
+
# Persist
|
| 450 |
+
ng.save("ng_lite_state.json")
|
| 451 |
+
ng.load("ng_lite_state.json")
|
| 452 |
+
"""
|
| 453 |
+
|
| 454 |
+
def __init__(
|
| 455 |
+
self,
|
| 456 |
+
module_id: str = "default",
|
| 457 |
+
config: Optional[Dict[str, Any]] = None,
|
| 458 |
+
bridge: Optional[NGBridge] = None,
|
| 459 |
+
):
|
| 460 |
+
self.module_id = module_id
|
| 461 |
+
self.config = {**DEFAULT_CONFIG, **(config or {})}
|
| 462 |
+
self._bridge = bridge
|
| 463 |
+
|
| 464 |
+
# Rust core — if available, all hot-path methods delegate here.
|
| 465 |
+
# Python fallback remains intact for modules without the wheel.
|
| 466 |
+
self._core = None
|
| 467 |
+
try:
|
| 468 |
+
from ng_tract import NGLiteCore
|
| 469 |
+
self._core = NGLiteCore(module_id, self.config)
|
| 470 |
+
# Seed constitutional nodes in Rust core
|
| 471 |
+
entries = self.config.get("constitutional_embeddings", [])
|
| 472 |
+
if entries:
|
| 473 |
+
self._core.seed_constitutional(entries)
|
| 474 |
+
except ImportError:
|
| 475 |
+
pass # Pure Python fallback
|
| 476 |
+
|
| 477 |
+
# Core collections (used by Python fallback, also for bridge/stats)
|
| 478 |
+
self.nodes: Dict[str, NGLiteNode] = {}
|
| 479 |
+
self.synapses: Dict[Tuple[str, str], NGLiteSynapse] = {}
|
| 480 |
+
|
| 481 |
+
# Embedding cache: hash -> full embedding (for similarity search)
|
| 482 |
+
self._embedding_cache: Dict[str, np.ndarray] = {}
|
| 483 |
+
|
| 484 |
+
# Receptor layer (#43): adaptive prototype centroids
|
| 485 |
+
# Initialized via k-means after warmup_count inputs, then drifts via EMA.
|
| 486 |
+
# Prototypes are a routing lens above existing nodes, not a replacement.
|
| 487 |
+
self._prototypes: Optional[np.ndarray] = None # (K, D) matrix or None
|
| 488 |
+
self._prototype_counts: Optional[np.ndarray] = None # activation counts per prototype
|
| 489 |
+
self._receptor_input_count: int = 0 # inputs seen before init
|
| 490 |
+
|
| 491 |
+
# Activation history (bounded, for stats and debugging)
|
| 492 |
+
self._history: List[Dict[str, Any]] = []
|
| 493 |
+
self._history_max = 1000
|
| 494 |
+
|
| 495 |
+
# Counters
|
| 496 |
+
self._total_outcomes = 0
|
| 497 |
+
self._total_successes = 0
|
| 498 |
+
self._node_id_counter = 0
|
| 499 |
+
|
| 500 |
+
# Cricket rim: seed constitutional nodes from config.
|
| 501 |
+
# These nodes represent semantic regions where the topology cannot
|
| 502 |
+
# learn — the survival instinct. Synapses from constitutional nodes
|
| 503 |
+
# are frozen. LRU pruning skips them. The bucket comes up empty
|
| 504 |
+
# for inputs that land in constitutional semantic space.
|
| 505 |
+
self._seed_constitutional_nodes()
|
| 506 |
+
|
| 507 |
+
def _seed_constitutional_nodes(self) -> None:
|
| 508 |
+
"""Seed constitutional nodes from config embeddings.
|
| 509 |
+
|
| 510 |
+
Constitutional embeddings are pre-computed vectors representing
|
| 511 |
+
semantic concepts the topology must never learn to act on (rim
|
| 512 |
+
constraints). Each embedding becomes a node with constitutional=True.
|
| 513 |
+
|
| 514 |
+
Config key: "constitutional_embeddings" — list of dicts, each with:
|
| 515 |
+
"embedding": list of floats (vector)
|
| 516 |
+
"description": str (human-readable, for debugging/logging)
|
| 517 |
+
|
| 518 |
+
Old configs without this key load cleanly (no constitutional nodes).
|
| 519 |
+
"""
|
| 520 |
+
entries = self.config.get("constitutional_embeddings", [])
|
| 521 |
+
for entry in entries:
|
| 522 |
+
raw = entry.get("embedding")
|
| 523 |
+
if raw is None:
|
| 524 |
+
continue
|
| 525 |
+
emb = self._normalize(np.array(raw, dtype=np.float32))
|
| 526 |
+
emb_hash = self._hash_embedding(emb)
|
| 527 |
+
if emb_hash in self.nodes:
|
| 528 |
+
# Already seeded (e.g., from loaded state) — ensure flag is set
|
| 529 |
+
self.nodes[emb_hash].constitutional = True
|
| 530 |
+
continue
|
| 531 |
+
self._node_id_counter += 1
|
| 532 |
+
node = NGLiteNode(
|
| 533 |
+
node_id=f"n_{self._node_id_counter}",
|
| 534 |
+
embedding_hash=emb_hash,
|
| 535 |
+
activation_count=0,
|
| 536 |
+
last_activation=0.0,
|
| 537 |
+
metadata={"constitutional_description": entry.get("description", "")},
|
| 538 |
+
embedding=emb,
|
| 539 |
+
constitutional=True,
|
| 540 |
+
)
|
| 541 |
+
self.nodes[emb_hash] = node
|
| 542 |
+
self._embedding_cache[emb_hash] = emb
|
| 543 |
+
|
| 544 |
+
# -------------------------------------------------------------------
|
| 545 |
+
# Core API
|
| 546 |
+
# -------------------------------------------------------------------
|
| 547 |
+
|
| 548 |
+
def find_or_create_node(self, embedding: np.ndarray) -> NGLiteNode:
|
| 549 |
+
"""Find existing node for this pattern or create a new one.
|
| 550 |
+
|
| 551 |
+
Lookup strategy:
|
| 552 |
+
1. Hash the embedding for exact match
|
| 553 |
+
2. If no exact match, search for similar node (cosine distance)
|
| 554 |
+
3. If no similar node found (novelty > threshold), create new
|
| 555 |
+
|
| 556 |
+
Prunes the least-used node if at capacity.
|
| 557 |
+
|
| 558 |
+
Args:
|
| 559 |
+
embedding: Vector representation of the input pattern.
|
| 560 |
+
|
| 561 |
+
Returns:
|
| 562 |
+
The matched or newly created NGLiteNode.
|
| 563 |
+
"""
|
| 564 |
+
# Rust fast path
|
| 565 |
+
if self._core is not None:
|
| 566 |
+
result = self._core.find_or_create_node(embedding)
|
| 567 |
+
# Return a lightweight node-like object for callers that need it
|
| 568 |
+
emb_hash = result.get("embedding_hash", "")
|
| 569 |
+
if emb_hash not in self.nodes:
|
| 570 |
+
self.nodes[emb_hash] = NGLiteNode(
|
| 571 |
+
node_id=result["node_id"],
|
| 572 |
+
embedding_hash=emb_hash,
|
| 573 |
+
activation_count=result.get("activation_count", 1),
|
| 574 |
+
last_activation=time.time(),
|
| 575 |
+
constitutional=result.get("constitutional", False),
|
| 576 |
+
)
|
| 577 |
+
return self.nodes[emb_hash]
|
| 578 |
+
|
| 579 |
+
emb = self._normalize(embedding)
|
| 580 |
+
|
| 581 |
+
# Receptor layer: snap to nearest prototype before node lookup (#43)
|
| 582 |
+
emb = self._snap_to_prototype(emb)
|
| 583 |
+
|
| 584 |
+
emb_hash = self._hash_embedding(emb)
|
| 585 |
+
|
| 586 |
+
# Exact hash match
|
| 587 |
+
if emb_hash in self.nodes:
|
| 588 |
+
node = self.nodes[emb_hash]
|
| 589 |
+
node.activation_count += 1
|
| 590 |
+
node.last_activation = time.time()
|
| 591 |
+
node.embedding = emb
|
| 592 |
+
self._embedding_cache[emb_hash] = emb
|
| 593 |
+
return node
|
| 594 |
+
|
| 595 |
+
# Similarity search against known patterns
|
| 596 |
+
similar = self._find_similar_node(emb)
|
| 597 |
+
if similar is not None:
|
| 598 |
+
similar.activation_count += 1
|
| 599 |
+
similar.last_activation = time.time()
|
| 600 |
+
if similar.embedding is None:
|
| 601 |
+
similar.embedding = emb
|
| 602 |
+
self._embedding_cache[similar.embedding_hash] = emb
|
| 603 |
+
return similar
|
| 604 |
+
|
| 605 |
+
# Novel pattern — create new node
|
| 606 |
+
if len(self.nodes) >= self.config["max_nodes"]:
|
| 607 |
+
self._prune_least_used_node()
|
| 608 |
+
|
| 609 |
+
self._node_id_counter += 1
|
| 610 |
+
node = NGLiteNode(
|
| 611 |
+
node_id=f"n_{self._node_id_counter}",
|
| 612 |
+
embedding_hash=emb_hash,
|
| 613 |
+
activation_count=1,
|
| 614 |
+
last_activation=time.time(),
|
| 615 |
+
embedding=emb,
|
| 616 |
+
)
|
| 617 |
+
self.nodes[emb_hash] = node
|
| 618 |
+
self._embedding_cache[emb_hash] = emb
|
| 619 |
+
return node
|
| 620 |
+
|
| 621 |
+
def record_outcome(
|
| 622 |
+
self,
|
| 623 |
+
embedding: np.ndarray,
|
| 624 |
+
target_id: str,
|
| 625 |
+
success: bool,
|
| 626 |
+
strength: float = 1.0,
|
| 627 |
+
metadata: Optional[Dict[str, Any]] = None,
|
| 628 |
+
) -> Dict[str, Any]:
|
| 629 |
+
"""Record an outcome and update learning weights.
|
| 630 |
+
|
| 631 |
+
This is the core learning method. Call it after every
|
| 632 |
+
decision to teach NG-Lite what works and what doesn't.
|
| 633 |
+
|
| 634 |
+
Hebbian rule (strength-modulated):
|
| 635 |
+
- Success: weight += success_boost * (1 - weight) * strength
|
| 636 |
+
- Failure: weight -= failure_penalty * weight * strength
|
| 637 |
+
|
| 638 |
+
The strength parameter lets callers indicate how significant
|
| 639 |
+
this outcome was in their domain. High-severity TrollGuard
|
| 640 |
+
detections or divergent TID quality scores teach harder than
|
| 641 |
+
routine confirmations. Default 1.0 preserves backward compat.
|
| 642 |
+
|
| 643 |
+
Strength experience accumulates on the synapse as metadata,
|
| 644 |
+
giving the topology a record of how intensely each connection
|
| 645 |
+
was forged. At Tier 3, NeuroGraph proper reads these
|
| 646 |
+
signatures to distinguish battle-tested synapses from routine.
|
| 647 |
+
|
| 648 |
+
If a bridge is connected, the outcome is forwarded for
|
| 649 |
+
cross-module learning with strength included in metadata.
|
| 650 |
+
|
| 651 |
+
Args:
|
| 652 |
+
embedding: The input pattern embedding (1-D numpy array).
|
| 653 |
+
target_id: What was chosen (model name, action, etc.).
|
| 654 |
+
success: Whether the outcome was successful.
|
| 655 |
+
strength: Learning intensity [0.0, 1.0]. How significant
|
| 656 |
+
this outcome was in the caller's domain. Default 1.0.
|
| 657 |
+
metadata: Optional caller context. Stored on the synapse
|
| 658 |
+
as last_context for extraction-boundary use.
|
| 659 |
+
|
| 660 |
+
Returns:
|
| 661 |
+
Dict with learning results (node_id, weight_after, etc.).
|
| 662 |
+
|
| 663 |
+
Raises:
|
| 664 |
+
ValueError: If embedding is not a 1-D numpy array.
|
| 665 |
+
"""
|
| 666 |
+
# Rust fast path — Hebbian learning, Welford variance, all in Rust
|
| 667 |
+
if self._core is not None:
|
| 668 |
+
result = self._core.record_outcome(
|
| 669 |
+
embedding, target_id, success, strength, metadata,
|
| 670 |
+
)
|
| 671 |
+
# Bridge forwarding stays in Python
|
| 672 |
+
if self._bridge and self._bridge.is_connected():
|
| 673 |
+
try:
|
| 674 |
+
bridge_meta = dict(metadata or {})
|
| 675 |
+
bridge_meta["strength"] = strength
|
| 676 |
+
enriched = self._bridge.record_outcome(
|
| 677 |
+
embedding=embedding, target_id=target_id,
|
| 678 |
+
success=success, module_id=self.module_id,
|
| 679 |
+
metadata=bridge_meta,
|
| 680 |
+
)
|
| 681 |
+
if enriched:
|
| 682 |
+
result["bridge_response"] = enriched
|
| 683 |
+
except Exception as e:
|
| 684 |
+
logger.warning("Bridge record_outcome failed: %s", e)
|
| 685 |
+
self._total_outcomes += 1
|
| 686 |
+
if success:
|
| 687 |
+
self._total_successes += 1
|
| 688 |
+
return result
|
| 689 |
+
|
| 690 |
+
# Input validation (Grok review: defensive boundary check)
|
| 691 |
+
if not isinstance(embedding, np.ndarray) or embedding.ndim != 1:
|
| 692 |
+
raise ValueError(
|
| 693 |
+
f"embedding must be a 1-D numpy array, got "
|
| 694 |
+
f"{type(embedding).__name__} with ndim={getattr(embedding, 'ndim', 'N/A')}"
|
| 695 |
+
)
|
| 696 |
+
|
| 697 |
+
node = self.find_or_create_node(embedding)
|
| 698 |
+
|
| 699 |
+
# Cricket rim: constitutional nodes have frozen synapses.
|
| 700 |
+
# The topology cannot learn to recommend actions for inputs
|
| 701 |
+
# that land in constitutional semantic space.
|
| 702 |
+
if node.constitutional:
|
| 703 |
+
logger.debug("Constitutional node %s activated — learning frozen", node.node_id)
|
| 704 |
+
return {
|
| 705 |
+
"node_id": node.node_id,
|
| 706 |
+
"target_id": target_id,
|
| 707 |
+
"success": success,
|
| 708 |
+
"weight_after": 0.0,
|
| 709 |
+
"activation_count": 0,
|
| 710 |
+
"constitutional": True,
|
| 711 |
+
}
|
| 712 |
+
|
| 713 |
+
synapse = self._get_or_create_synapse(node.node_id, target_id)
|
| 714 |
+
|
| 715 |
+
synapse.activation_count += 1
|
| 716 |
+
|
| 717 |
+
# Clamp strength to valid range
|
| 718 |
+
strength = float(np.clip(strength, 0.0, 1.0))
|
| 719 |
+
|
| 720 |
+
if success:
|
| 721 |
+
synapse.success_count += 1
|
| 722 |
+
# Hebbian strengthening, modulated by caller-reported significance
|
| 723 |
+
delta = self.config["success_boost"] * (1.0 - synapse.weight) * strength
|
| 724 |
+
synapse.weight += delta
|
| 725 |
+
else:
|
| 726 |
+
synapse.failure_count += 1
|
| 727 |
+
# Anti-Hebbian weakening, modulated by caller-reported significance
|
| 728 |
+
delta = self.config["failure_penalty"] * synapse.weight * strength
|
| 729 |
+
synapse.weight -= delta
|
| 730 |
+
|
| 731 |
+
synapse.weight = float(np.clip(synapse.weight, 0.0, 1.0))
|
| 732 |
+
synapse.last_updated = time.time()
|
| 733 |
+
|
| 734 |
+
# Welford's online variance (#51) — track weight delta variance.
|
| 735 |
+
# High variance = contested synapse (outcomes disagree).
|
| 736 |
+
# The immune system signal for Elmer and extraction buckets.
|
| 737 |
+
synapse.welford_count += 1
|
| 738 |
+
w_delta = delta if success else -delta
|
| 739 |
+
old_mean = synapse.welford_mean
|
| 740 |
+
synapse.welford_mean += (w_delta - old_mean) / synapse.welford_count
|
| 741 |
+
synapse.welford_m2 += (w_delta - old_mean) * (w_delta - synapse.welford_mean)
|
| 742 |
+
|
| 743 |
+
# Accumulate strength experience on synapse —
|
| 744 |
+
# the topology remembers how intensely it was taught
|
| 745 |
+
synapse.metadata["strength_sum"] = synapse.metadata.get("strength_sum", 0.0) + strength
|
| 746 |
+
synapse.metadata["strength_count"] = synapse.metadata.get("strength_count", 0) + 1
|
| 747 |
+
if metadata:
|
| 748 |
+
synapse.metadata["last_context"] = metadata
|
| 749 |
+
|
| 750 |
+
self._total_outcomes += 1
|
| 751 |
+
if success:
|
| 752 |
+
self._total_successes += 1
|
| 753 |
+
|
| 754 |
+
result = {
|
| 755 |
+
"node_id": node.node_id,
|
| 756 |
+
"target_id": target_id,
|
| 757 |
+
"success": success,
|
| 758 |
+
"weight_after": synapse.weight,
|
| 759 |
+
"activation_count": synapse.activation_count,
|
| 760 |
+
"variance": synapse.variance,
|
| 761 |
+
"contested": synapse.is_contested,
|
| 762 |
+
}
|
| 763 |
+
|
| 764 |
+
# Record in history
|
| 765 |
+
self._record_history(result)
|
| 766 |
+
|
| 767 |
+
# Forward to bridge if connected (include strength for Tier 2/3)
|
| 768 |
+
if self._bridge and self._bridge.is_connected():
|
| 769 |
+
try:
|
| 770 |
+
bridge_meta = dict(metadata or {})
|
| 771 |
+
bridge_meta["strength"] = strength
|
| 772 |
+
enriched = self._bridge.record_outcome(
|
| 773 |
+
embedding=embedding,
|
| 774 |
+
target_id=target_id,
|
| 775 |
+
success=success,
|
| 776 |
+
module_id=self.module_id,
|
| 777 |
+
metadata=bridge_meta,
|
| 778 |
+
)
|
| 779 |
+
if enriched:
|
| 780 |
+
result["bridge_response"] = enriched
|
| 781 |
+
except Exception as e:
|
| 782 |
+
logger.warning("Bridge record_outcome failed: %s", e)
|
| 783 |
+
|
| 784 |
+
return result
|
| 785 |
+
|
| 786 |
+
def get_recommendations(
|
| 787 |
+
self,
|
| 788 |
+
embedding: np.ndarray,
|
| 789 |
+
top_k: int = 3,
|
| 790 |
+
) -> List[Tuple[str, float, str]]:
|
| 791 |
+
"""Get target recommendations for an input pattern.
|
| 792 |
+
|
| 793 |
+
Finds the closest known pattern node and returns its strongest
|
| 794 |
+
synapse targets, sorted by weight (descending).
|
| 795 |
+
|
| 796 |
+
If a bridge to NeuroGraph is connected, prefers its recommendations
|
| 797 |
+
(which include cross-module intelligence). Falls back to local
|
| 798 |
+
learning if bridge is unavailable.
|
| 799 |
+
|
| 800 |
+
Args:
|
| 801 |
+
embedding: The input pattern embedding.
|
| 802 |
+
top_k: Maximum number of recommendations to return.
|
| 803 |
+
|
| 804 |
+
Returns:
|
| 805 |
+
List of (target_id, confidence, reasoning) tuples, highest
|
| 806 |
+
first. The reasoning string captures the experience behind
|
| 807 |
+
each recommendation — learning mechanism, success ratio,
|
| 808 |
+
weight, activation volume, and strength signature.
|
| 809 |
+
Empty list if no learned routes exist for this pattern.
|
| 810 |
+
"""
|
| 811 |
+
# Try bridge first
|
| 812 |
+
if self._bridge and self._bridge.is_connected():
|
| 813 |
+
try:
|
| 814 |
+
bridge_recs = self._bridge.get_recommendations(
|
| 815 |
+
embedding=embedding,
|
| 816 |
+
module_id=self.module_id,
|
| 817 |
+
top_k=top_k,
|
| 818 |
+
)
|
| 819 |
+
if bridge_recs:
|
| 820 |
+
return bridge_recs
|
| 821 |
+
except Exception as e:
|
| 822 |
+
logger.warning("Bridge get_recommendations failed: %s", e)
|
| 823 |
+
|
| 824 |
+
# Rust fast path
|
| 825 |
+
if self._core is not None:
|
| 826 |
+
return self._core.get_recommendations(embedding, top_k)
|
| 827 |
+
|
| 828 |
+
# Local learning (Python fallback)
|
| 829 |
+
node = self.find_or_create_node(embedding)
|
| 830 |
+
|
| 831 |
+
# Cricket rim: constitutional nodes return empty — the bucket
|
| 832 |
+
# comes up empty for inputs in constitutional semantic space.
|
| 833 |
+
if node.constitutional:
|
| 834 |
+
return []
|
| 835 |
+
|
| 836 |
+
relevant = []
|
| 837 |
+
for key, syn in self.synapses.items():
|
| 838 |
+
if key[0] == node.node_id and syn.weight > self.config["pruning_threshold"]:
|
| 839 |
+
reasoning = self._build_local_reasoning(syn)
|
| 840 |
+
relevant.append((syn.target_id, syn.weight, reasoning))
|
| 841 |
+
|
| 842 |
+
if not relevant:
|
| 843 |
+
return []
|
| 844 |
+
|
| 845 |
+
relevant.sort(key=lambda x: x[1], reverse=True)
|
| 846 |
+
return relevant[:top_k]
|
| 847 |
+
|
| 848 |
+
def _build_local_reasoning(self, synapse: NGLiteSynapse) -> str:
|
| 849 |
+
"""Generate reasoning string from local Hebbian experience.
|
| 850 |
+
|
| 851 |
+
Single point of evolution for how NG-Lite articulates its local
|
| 852 |
+
learning. V1 renders synapse stats and strength signatures.
|
| 853 |
+
As NG-Lite gains meta-learning capability (punch list #21),
|
| 854 |
+
this method becomes the place where reasoning generation
|
| 855 |
+
itself improves.
|
| 856 |
+
|
| 857 |
+
This is an extraction boundary — topology becomes human-legible
|
| 858 |
+
here. The substrate doesn't need these labels; consumers and
|
| 859 |
+
dashboards do.
|
| 860 |
+
|
| 861 |
+
Args:
|
| 862 |
+
synapse: The synapse whose experience to articulate.
|
| 863 |
+
|
| 864 |
+
Returns:
|
| 865 |
+
Human-readable reasoning grounded in actual experience data.
|
| 866 |
+
"""
|
| 867 |
+
total = synapse.success_count + synapse.failure_count
|
| 868 |
+
if total > 0:
|
| 869 |
+
detail = f"w={synapse.weight:.2f}, {synapse.activation_count} activations"
|
| 870 |
+
strength_count = synapse.metadata.get("strength_count", 0)
|
| 871 |
+
if strength_count > 0:
|
| 872 |
+
avg = synapse.metadata["strength_sum"] / strength_count
|
| 873 |
+
detail += f", avg_strength={avg:.2f}"
|
| 874 |
+
return (
|
| 875 |
+
f"Hebbian: {synapse.success_count}/{total} success ({detail})"
|
| 876 |
+
)
|
| 877 |
+
return f"Hebbian: no outcomes yet (w={synapse.weight:.2f})"
|
| 878 |
+
|
| 879 |
+
def detect_novelty(self, embedding: np.ndarray) -> float:
|
| 880 |
+
"""How novel is this input pattern?
|
| 881 |
+
|
| 882 |
+
Computes the minimum cosine distance between the input embedding
|
| 883 |
+
and all known pattern nodes. Higher = more novel.
|
| 884 |
+
|
| 885 |
+
If a bridge to NeuroGraph is connected, its novelty score
|
| 886 |
+
(which considers cross-module patterns) is preferred.
|
| 887 |
+
|
| 888 |
+
Args:
|
| 889 |
+
embedding: The input pattern embedding.
|
| 890 |
+
|
| 891 |
+
Returns:
|
| 892 |
+
Novelty score from 0.0 (routine/known) to 1.0 (completely novel).
|
| 893 |
+
"""
|
| 894 |
+
# Try bridge first
|
| 895 |
+
if self._bridge and self._bridge.is_connected():
|
| 896 |
+
try:
|
| 897 |
+
bridge_novelty = self._bridge.detect_novelty(
|
| 898 |
+
embedding=embedding,
|
| 899 |
+
module_id=self.module_id,
|
| 900 |
+
)
|
| 901 |
+
if bridge_novelty is not None:
|
| 902 |
+
return bridge_novelty
|
| 903 |
+
except Exception as e:
|
| 904 |
+
logger.warning("Bridge detect_novelty failed: %s", e)
|
| 905 |
+
|
| 906 |
+
# Rust fast path
|
| 907 |
+
if self._core is not None:
|
| 908 |
+
return self._core.detect_novelty(embedding)
|
| 909 |
+
|
| 910 |
+
# Local novelty detection (Python fallback)
|
| 911 |
+
if not self._embedding_cache:
|
| 912 |
+
return 1.0 # Everything is novel when we know nothing
|
| 913 |
+
|
| 914 |
+
emb = self._normalize(embedding)
|
| 915 |
+
max_similarity = 0.0
|
| 916 |
+
|
| 917 |
+
for cached_emb in self._embedding_cache.values():
|
| 918 |
+
similarity = float(np.dot(emb, cached_emb))
|
| 919 |
+
if similarity > max_similarity:
|
| 920 |
+
max_similarity = similarity
|
| 921 |
+
|
| 922 |
+
# Convert similarity to novelty (1 - similarity)
|
| 923 |
+
# Cosine similarity of normalized vectors is in [-1, 1]
|
| 924 |
+
# but practically in [0, 1] for embedding models
|
| 925 |
+
novelty = 1.0 - max(0.0, max_similarity)
|
| 926 |
+
return novelty
|
| 927 |
+
|
| 928 |
+
# -------------------------------------------------------------------
|
| 929 |
+
# Bridge Management
|
| 930 |
+
# -------------------------------------------------------------------
|
| 931 |
+
|
| 932 |
+
def connect_bridge(self, bridge: NGBridge) -> None:
|
| 933 |
+
"""Connect to full NeuroGraph SaaS.
|
| 934 |
+
|
| 935 |
+
When connected, NG-Lite delegates to the full substrate for
|
| 936 |
+
recommendations, novelty detection, and outcome recording.
|
| 937 |
+
Local learning continues as fallback.
|
| 938 |
+
"""
|
| 939 |
+
self._bridge = bridge
|
| 940 |
+
logger.info("NG-Lite bridge connected for module '%s'", self.module_id)
|
| 941 |
+
|
| 942 |
+
def disconnect_bridge(self) -> None:
|
| 943 |
+
"""Disconnect from NeuroGraph, fall back to local learning."""
|
| 944 |
+
self._bridge = None
|
| 945 |
+
logger.info("NG-Lite bridge disconnected, using local learning")
|
| 946 |
+
|
| 947 |
+
def sync_with_bridge(self) -> Optional[Dict[str, Any]]:
|
| 948 |
+
"""Sync local state with NeuroGraph SaaS.
|
| 949 |
+
|
| 950 |
+
Sends accumulated local learning to the full substrate and
|
| 951 |
+
receives cross-module updates. Call periodically (e.g., hourly
|
| 952 |
+
or after N outcomes).
|
| 953 |
+
|
| 954 |
+
Returns:
|
| 955 |
+
Sync result from bridge, or None if unavailable.
|
| 956 |
+
"""
|
| 957 |
+
if not self._bridge or not self._bridge.is_connected():
|
| 958 |
+
return None
|
| 959 |
+
|
| 960 |
+
try:
|
| 961 |
+
local_state = self._export_state()
|
| 962 |
+
result = self._bridge.sync_state(
|
| 963 |
+
local_state=local_state,
|
| 964 |
+
module_id=self.module_id,
|
| 965 |
+
)
|
| 966 |
+
return result
|
| 967 |
+
except Exception as e:
|
| 968 |
+
logger.warning("Bridge sync failed: %s", e)
|
| 969 |
+
return None
|
| 970 |
+
|
| 971 |
+
# -------------------------------------------------------------------
|
| 972 |
+
# Persistence
|
| 973 |
+
# -------------------------------------------------------------------
|
| 974 |
+
|
| 975 |
+
def save(self, filepath: str) -> None:
|
| 976 |
+
"""Save full state to binary (msgpack) via Rust.
|
| 977 |
+
|
| 978 |
+
Falls back to JSON if Rust core is unavailable.
|
| 979 |
+
Binary path: Rust serializes directly to bytes, writes to disk.
|
| 980 |
+
No Python dicts, no JSON, no inflation.
|
| 981 |
+
|
| 982 |
+
Args:
|
| 983 |
+
filepath: Path to write the state file.
|
| 984 |
+
"""
|
| 985 |
+
if self._core is not None:
|
| 986 |
+
# Binary persistence — Rust handles everything
|
| 987 |
+
bin_path = filepath.replace(".json", ".msgpack")
|
| 988 |
+
self._core.save_binary(bin_path)
|
| 989 |
+
logger.info("NG-Lite state saved (binary) to %s", bin_path)
|
| 990 |
+
return
|
| 991 |
+
|
| 992 |
+
# Python fallback — JSON
|
| 993 |
+
state = self._export_state()
|
| 994 |
+
with open(filepath, "w") as f:
|
| 995 |
+
json.dump(state, f, indent=2)
|
| 996 |
+
logger.info("NG-Lite state saved to %s (%d nodes, %d synapses)",
|
| 997 |
+
filepath, len(self.nodes), len(self.synapses))
|
| 998 |
+
|
| 999 |
+
def load(self, filepath: str) -> None:
|
| 1000 |
+
"""Load state from binary (msgpack) or JSON.
|
| 1001 |
+
|
| 1002 |
+
Tries binary first (.msgpack), falls back to JSON (.json).
|
| 1003 |
+
If loading JSON into Rust core, migrates via import_state.
|
| 1004 |
+
|
| 1005 |
+
Args:
|
| 1006 |
+
filepath: Path to the state file (.json or .msgpack).
|
| 1007 |
+
"""
|
| 1008 |
+
import os
|
| 1009 |
+
|
| 1010 |
+
bin_path = filepath.replace(".json", ".msgpack")
|
| 1011 |
+
|
| 1012 |
+
if self._core is not None:
|
| 1013 |
+
# Try binary first
|
| 1014 |
+
if os.path.exists(bin_path):
|
| 1015 |
+
self._core.load_binary(bin_path)
|
| 1016 |
+
logger.info("NG-Lite state loaded (binary) from %s", bin_path)
|
| 1017 |
+
return
|
| 1018 |
+
# JSON migration — read JSON, import into Rust core, then
|
| 1019 |
+
# save binary so next load is native
|
| 1020 |
+
if os.path.exists(filepath):
|
| 1021 |
+
with open(filepath, "r") as f:
|
| 1022 |
+
state = json.load(f)
|
| 1023 |
+
self._core.import_state(state)
|
| 1024 |
+
self._core.save_binary(bin_path)
|
| 1025 |
+
logger.info(
|
| 1026 |
+
"NG-Lite state migrated from JSON to binary: %s → %s",
|
| 1027 |
+
filepath, bin_path,
|
| 1028 |
+
)
|
| 1029 |
+
return
|
| 1030 |
+
|
| 1031 |
+
# Pure Python fallback
|
| 1032 |
+
if os.path.exists(filepath):
|
| 1033 |
+
with open(filepath, "r") as f:
|
| 1034 |
+
state = json.load(f)
|
| 1035 |
+
self._import_state(state)
|
| 1036 |
+
logger.info("NG-Lite state loaded from %s (%d nodes, %d synapses)",
|
| 1037 |
+
filepath, len(self.nodes), len(self.synapses))
|
| 1038 |
+
|
| 1039 |
+
def _export_state(self) -> Dict[str, Any]:
|
| 1040 |
+
"""Export full state as a serializable dict.
|
| 1041 |
+
|
| 1042 |
+
Synapse keys are converted from (source_id, target_id) tuples
|
| 1043 |
+
to "source_id|target_id" strings for JSON compatibility.
|
| 1044 |
+
"""
|
| 1045 |
+
# Convert synapse keys from tuples to strings for JSON
|
| 1046 |
+
synapses_serialized = {}
|
| 1047 |
+
for (src, tgt), syn in self.synapses.items():
|
| 1048 |
+
key = f"{src}|{tgt}"
|
| 1049 |
+
synapses_serialized[key] = asdict(syn)
|
| 1050 |
+
|
| 1051 |
+
# Receptor layer state (#43)
|
| 1052 |
+
receptor_state = {}
|
| 1053 |
+
if self._prototypes is not None:
|
| 1054 |
+
receptor_state = {
|
| 1055 |
+
"prototypes": self._prototypes.tolist(),
|
| 1056 |
+
"prototype_counts": self._prototype_counts.tolist(),
|
| 1057 |
+
"input_count": self._receptor_input_count,
|
| 1058 |
+
}
|
| 1059 |
+
|
| 1060 |
+
return {
|
| 1061 |
+
"version": self.config["snapshot_version"],
|
| 1062 |
+
"module_id": self.module_id,
|
| 1063 |
+
"timestamp": time.time(),
|
| 1064 |
+
"config": self.config,
|
| 1065 |
+
"nodes": {k: self._serialize_node(v) for k, v in self.nodes.items()},
|
| 1066 |
+
"synapses": synapses_serialized,
|
| 1067 |
+
"counters": {
|
| 1068 |
+
"node_id_counter": self._node_id_counter,
|
| 1069 |
+
"total_outcomes": self._total_outcomes,
|
| 1070 |
+
"total_successes": self._total_successes,
|
| 1071 |
+
},
|
| 1072 |
+
"receptor_layer": receptor_state,
|
| 1073 |
+
}
|
| 1074 |
+
|
| 1075 |
+
def _import_state(self, state: Dict[str, Any]) -> None:
|
| 1076 |
+
"""Import state from a deserialized dict."""
|
| 1077 |
+
self.module_id = state.get("module_id", self.module_id)
|
| 1078 |
+
|
| 1079 |
+
# Restore config (merge with defaults for forward compatibility).
|
| 1080 |
+
# Preserve constitutional_embeddings from the constructor config —
|
| 1081 |
+
# the live config may have new rim constraints added since the
|
| 1082 |
+
# state was saved, and the saved config should not erase them.
|
| 1083 |
+
saved_config = state.get("config", {})
|
| 1084 |
+
live_constitutional = self.config.get("constitutional_embeddings", [])
|
| 1085 |
+
self.config = {**DEFAULT_CONFIG, **saved_config}
|
| 1086 |
+
if live_constitutional:
|
| 1087 |
+
self.config["constitutional_embeddings"] = live_constitutional
|
| 1088 |
+
|
| 1089 |
+
# Clear caches before rebuild
|
| 1090 |
+
self._embedding_cache.clear()
|
| 1091 |
+
self._history.clear()
|
| 1092 |
+
|
| 1093 |
+
# Restore nodes (rebuild embedding cache from persisted embeddings)
|
| 1094 |
+
self.nodes = {}
|
| 1095 |
+
for key, node_data in state.get("nodes", {}).items():
|
| 1096 |
+
emb_list = node_data.pop("embedding", None)
|
| 1097 |
+
node = NGLiteNode(**node_data)
|
| 1098 |
+
if emb_list is not None:
|
| 1099 |
+
node.embedding = self._normalize(np.array(emb_list, dtype=np.float32))
|
| 1100 |
+
self._embedding_cache[key] = node.embedding
|
| 1101 |
+
self.nodes[key] = node
|
| 1102 |
+
|
| 1103 |
+
# Restore synapses
|
| 1104 |
+
self.synapses = {}
|
| 1105 |
+
for key, syn_data in state.get("synapses", {}).items():
|
| 1106 |
+
parts = key.split("|", 1)
|
| 1107 |
+
if len(parts) == 2:
|
| 1108 |
+
tuple_key = (parts[0], parts[1])
|
| 1109 |
+
self.synapses[tuple_key] = NGLiteSynapse(**syn_data)
|
| 1110 |
+
|
| 1111 |
+
# Restore counters
|
| 1112 |
+
counters = state.get("counters", {})
|
| 1113 |
+
self._node_id_counter = counters.get("node_id_counter", 0)
|
| 1114 |
+
self._total_outcomes = counters.get("total_outcomes", 0)
|
| 1115 |
+
self._total_successes = counters.get("total_successes", 0)
|
| 1116 |
+
|
| 1117 |
+
# Restore receptor layer (#43) — old state files load cleanly (no key)
|
| 1118 |
+
receptor = state.get("receptor_layer", {})
|
| 1119 |
+
if receptor.get("prototypes"):
|
| 1120 |
+
self._prototypes = np.array(receptor["prototypes"], dtype=np.float32)
|
| 1121 |
+
# Re-normalize after deserialization
|
| 1122 |
+
norms = np.linalg.norm(self._prototypes, axis=1, keepdims=True)
|
| 1123 |
+
norms = np.maximum(norms, 1e-12)
|
| 1124 |
+
self._prototypes = self._prototypes / norms
|
| 1125 |
+
self._prototype_counts = np.array(
|
| 1126 |
+
receptor.get("prototype_counts", [0] * len(self._prototypes)),
|
| 1127 |
+
dtype=np.int64,
|
| 1128 |
+
)
|
| 1129 |
+
self._receptor_input_count = receptor.get("input_count", 0)
|
| 1130 |
+
else:
|
| 1131 |
+
self._prototypes = None
|
| 1132 |
+
self._prototype_counts = None
|
| 1133 |
+
self._receptor_input_count = 0
|
| 1134 |
+
|
| 1135 |
+
# Re-seed constitutional nodes after state restore.
|
| 1136 |
+
# Ensures new rim constraints added to config since last save
|
| 1137 |
+
# are picked up, and existing constitutional nodes keep their flag.
|
| 1138 |
+
self._seed_constitutional_nodes()
|
| 1139 |
+
|
| 1140 |
+
# -------------------------------------------------------------------
|
| 1141 |
+
# Dynamic Tuning (Phase 4 — Elmer outward)
|
| 1142 |
+
# -------------------------------------------------------------------
|
| 1143 |
+
|
| 1144 |
+
# Parameters Elmer (or any organ) is permitted to adjust at runtime.
|
| 1145 |
+
# Keys map to (min, max) bounds. Anything not in this dict is frozen.
|
| 1146 |
+
TUNABLE_PARAMS: Dict[str, Tuple[float, float]] = {
|
| 1147 |
+
"success_boost": (0.01, 0.50),
|
| 1148 |
+
"failure_penalty": (0.01, 0.50),
|
| 1149 |
+
"novelty_threshold": (0.30, 0.95),
|
| 1150 |
+
"pruning_threshold": (0.001, 0.10),
|
| 1151 |
+
"receptor_ema_alpha": (0.0001, 0.01),
|
| 1152 |
+
"receptor_prototype_threshold": (0.50, 0.95),
|
| 1153 |
+
"relevance_threshold": (0.10, 0.70), # Punchlist #44
|
| 1154 |
+
}
|
| 1155 |
+
|
| 1156 |
+
def update_tunable(self, key: str, value: float) -> Dict[str, Any]:
|
| 1157 |
+
"""Update a tunable config parameter at runtime.
|
| 1158 |
+
|
| 1159 |
+
Only parameters listed in TUNABLE_PARAMS are accepted.
|
| 1160 |
+
Values are clamped to their declared bounds.
|
| 1161 |
+
|
| 1162 |
+
Returns dict with old_value, new_value, clamped (bool).
|
| 1163 |
+
Raises KeyError if key is not tunable.
|
| 1164 |
+
"""
|
| 1165 |
+
# Update Rust core if present
|
| 1166 |
+
if self._core is not None:
|
| 1167 |
+
try:
|
| 1168 |
+
self._core.update_config(key, float(value))
|
| 1169 |
+
except Exception:
|
| 1170 |
+
pass # Rust core may not have this key yet
|
| 1171 |
+
|
| 1172 |
+
if key not in self.TUNABLE_PARAMS:
|
| 1173 |
+
raise KeyError(
|
| 1174 |
+
f"'{key}' is not a tunable parameter. "
|
| 1175 |
+
f"Allowed: {sorted(self.TUNABLE_PARAMS.keys())}"
|
| 1176 |
+
)
|
| 1177 |
+
lo, hi = self.TUNABLE_PARAMS[key]
|
| 1178 |
+
old_value = self.config[key]
|
| 1179 |
+
clamped = value < lo or value > hi
|
| 1180 |
+
new_value = max(lo, min(hi, float(value)))
|
| 1181 |
+
self.config[key] = new_value
|
| 1182 |
+
logger.info(
|
| 1183 |
+
"Tunable updated: %s %.6f → %.6f%s",
|
| 1184 |
+
key, old_value, new_value,
|
| 1185 |
+
" (clamped)" if clamped else "",
|
| 1186 |
+
)
|
| 1187 |
+
|
| 1188 |
+
# Punchlist #44: push relevance_threshold to connected bridge
|
| 1189 |
+
if key == "relevance_threshold" and self._bridge is not None:
|
| 1190 |
+
if hasattr(self._bridge, 'set_relevance_threshold'):
|
| 1191 |
+
self._bridge.set_relevance_threshold(new_value)
|
| 1192 |
+
|
| 1193 |
+
return {
|
| 1194 |
+
"key": key,
|
| 1195 |
+
"old_value": old_value,
|
| 1196 |
+
"new_value": new_value,
|
| 1197 |
+
"clamped": clamped,
|
| 1198 |
+
}
|
| 1199 |
+
|
| 1200 |
+
def get_tunables(self) -> Dict[str, Dict[str, float]]:
|
| 1201 |
+
"""Return current tunable values and their bounds."""
|
| 1202 |
+
result = {}
|
| 1203 |
+
for key, (lo, hi) in self.TUNABLE_PARAMS.items():
|
| 1204 |
+
result[key] = {
|
| 1205 |
+
"value": self.config[key],
|
| 1206 |
+
"min": lo,
|
| 1207 |
+
"max": hi,
|
| 1208 |
+
}
|
| 1209 |
+
return result
|
| 1210 |
+
|
| 1211 |
+
# -------------------------------------------------------------------
|
| 1212 |
+
# Stats & Telemetry
|
| 1213 |
+
# -------------------------------------------------------------------
|
| 1214 |
+
|
| 1215 |
+
def get_stats(self) -> Dict[str, Any]:
|
| 1216 |
+
"""Current state statistics.
|
| 1217 |
+
|
| 1218 |
+
Returns a dict suitable for logging, Observatory queries,
|
| 1219 |
+
or display to users. All routing/learning decisions should
|
| 1220 |
+
be queryable per transparency obligations.
|
| 1221 |
+
"""
|
| 1222 |
+
synapse_weights = [s.weight for s in self.synapses.values()]
|
| 1223 |
+
return {
|
| 1224 |
+
"version": __version__,
|
| 1225 |
+
"module_id": self.module_id,
|
| 1226 |
+
"node_count": len(self.nodes),
|
| 1227 |
+
"synapse_count": len(self.synapses),
|
| 1228 |
+
"max_nodes": self.config["max_nodes"],
|
| 1229 |
+
"max_synapses": self.config["max_synapses"],
|
| 1230 |
+
"memory_estimate_bytes": self._estimate_memory(),
|
| 1231 |
+
"total_outcomes": self._total_outcomes,
|
| 1232 |
+
"total_successes": self._total_successes,
|
| 1233 |
+
"success_rate": (
|
| 1234 |
+
self._total_successes / self._total_outcomes
|
| 1235 |
+
if self._total_outcomes > 0 else 0.0
|
| 1236 |
+
),
|
| 1237 |
+
"avg_synapse_weight": (
|
| 1238 |
+
float(np.mean(synapse_weights))
|
| 1239 |
+
if synapse_weights else 0.0
|
| 1240 |
+
),
|
| 1241 |
+
"bridge_connected": (
|
| 1242 |
+
self._bridge is not None
|
| 1243 |
+
and self._bridge.is_connected()
|
| 1244 |
+
),
|
| 1245 |
+
"embedding_cache_size": len(self._embedding_cache),
|
| 1246 |
+
}
|
| 1247 |
+
|
| 1248 |
+
# -------------------------------------------------------------------
|
| 1249 |
+
# Internal Methods
|
| 1250 |
+
# -------------------------------------------------------------------
|
| 1251 |
+
|
| 1252 |
+
def _serialize_node(self, node: NGLiteNode) -> Dict[str, Any]:
|
| 1253 |
+
"""Serialize a node to a JSON-compatible dict.
|
| 1254 |
+
|
| 1255 |
+
Converts embedding from np.ndarray to list for JSON.
|
| 1256 |
+
Omits embedding key when None (backward-compatible with
|
| 1257 |
+
state files created before embedding persistence).
|
| 1258 |
+
"""
|
| 1259 |
+
d = asdict(node)
|
| 1260 |
+
if node.embedding is not None:
|
| 1261 |
+
d["embedding"] = node.embedding.tolist()
|
| 1262 |
+
else:
|
| 1263 |
+
d.pop("embedding", None)
|
| 1264 |
+
return d
|
| 1265 |
+
|
| 1266 |
+
@staticmethod
|
| 1267 |
+
def _normalize(embedding: np.ndarray) -> np.ndarray:
|
| 1268 |
+
"""L2-normalize an embedding vector."""
|
| 1269 |
+
norm = np.linalg.norm(embedding)
|
| 1270 |
+
if norm < 1e-12:
|
| 1271 |
+
return embedding
|
| 1272 |
+
return embedding / norm
|
| 1273 |
+
|
| 1274 |
+
def _hash_embedding(self, embedding: np.ndarray) -> str:
|
| 1275 |
+
"""Hash embedding to a fixed-size string for fast lookup.
|
| 1276 |
+
|
| 1277 |
+
Uses the first ``hash_dims`` dimensions of the embedding,
|
| 1278 |
+
converted to bytes, then SHA-256 truncated to 32 hex chars.
|
| 1279 |
+
This gives a compact, collision-resistant key.
|
| 1280 |
+
"""
|
| 1281 |
+
dims = self.config["hash_dims"]
|
| 1282 |
+
truncated = embedding[:dims]
|
| 1283 |
+
hash_input = truncated.astype(np.float32).tobytes()
|
| 1284 |
+
return hashlib.sha256(hash_input).hexdigest()[:32]
|
| 1285 |
+
|
| 1286 |
+
# -------------------------------------------------------------------
|
| 1287 |
+
# Receptor Layer (#43) — Adaptive Vector Quantization
|
| 1288 |
+
# -------------------------------------------------------------------
|
| 1289 |
+
|
| 1290 |
+
def _snap_to_prototype(self, embedding: np.ndarray) -> np.ndarray:
|
| 1291 |
+
"""Snap an input vector to the nearest prototype centroid.
|
| 1292 |
+
|
| 1293 |
+
If receptor layer is not enabled or not yet initialized (still in
|
| 1294 |
+
warmup), returns the input unchanged. Otherwise, finds the nearest
|
| 1295 |
+
prototype above the similarity threshold and returns that prototype's
|
| 1296 |
+
centroid. If no prototype is close enough, returns the input as-is
|
| 1297 |
+
(novel pattern — passes through unquantized).
|
| 1298 |
+
|
| 1299 |
+
The matched prototype drifts toward the input via slow EMA, so
|
| 1300 |
+
prototypes are living tissue that adapts to the input distribution.
|
| 1301 |
+
Birth/death lifecycle is deferred to Elmer.
|
| 1302 |
+
|
| 1303 |
+
Args:
|
| 1304 |
+
embedding: L2-normalized input vector (D,).
|
| 1305 |
+
|
| 1306 |
+
Returns:
|
| 1307 |
+
Either the nearest prototype centroid or the original embedding.
|
| 1308 |
+
"""
|
| 1309 |
+
if not self.config.get("receptor_layer_enabled", False):
|
| 1310 |
+
return embedding
|
| 1311 |
+
|
| 1312 |
+
# Warmup phase: accumulate inputs before initializing prototypes
|
| 1313 |
+
self._receptor_input_count += 1
|
| 1314 |
+
if self._prototypes is None:
|
| 1315 |
+
if self._receptor_input_count >= self.config["receptor_warmup_count"]:
|
| 1316 |
+
self._init_prototypes()
|
| 1317 |
+
if self._prototypes is None:
|
| 1318 |
+
return embedding
|
| 1319 |
+
|
| 1320 |
+
# Vectorized cosine similarity against all prototypes
|
| 1321 |
+
threshold = self.config["receptor_prototype_threshold"]
|
| 1322 |
+
similarities = self._prototypes @ embedding # (K,)
|
| 1323 |
+
best_idx = int(np.argmax(similarities))
|
| 1324 |
+
best_sim = float(similarities[best_idx])
|
| 1325 |
+
|
| 1326 |
+
if best_sim >= threshold:
|
| 1327 |
+
# EMA drift: pull prototype toward input
|
| 1328 |
+
alpha = self.config["receptor_ema_alpha"]
|
| 1329 |
+
self._prototypes[best_idx] = self._normalize(
|
| 1330 |
+
(1.0 - alpha) * self._prototypes[best_idx] + alpha * embedding
|
| 1331 |
+
)
|
| 1332 |
+
self._prototype_counts[best_idx] += 1
|
| 1333 |
+
return self._prototypes[best_idx].copy()
|
| 1334 |
+
|
| 1335 |
+
# No prototype close enough — novel pattern passes through
|
| 1336 |
+
return embedding
|
| 1337 |
+
|
| 1338 |
+
def _init_prototypes(self) -> None:
|
| 1339 |
+
"""Initialize prototypes via k-means on existing node embeddings.
|
| 1340 |
+
|
| 1341 |
+
Uses a simple iterative k-means (no external dependencies). If fewer
|
| 1342 |
+
embeddings exist than K, uses all embeddings as prototypes.
|
| 1343 |
+
"""
|
| 1344 |
+
if not self._embedding_cache:
|
| 1345 |
+
return
|
| 1346 |
+
|
| 1347 |
+
embeddings = np.stack(list(self._embedding_cache.values()))
|
| 1348 |
+
n = len(embeddings)
|
| 1349 |
+
k = min(self.config["receptor_layer_k"], n)
|
| 1350 |
+
|
| 1351 |
+
if k < 2:
|
| 1352 |
+
return
|
| 1353 |
+
|
| 1354 |
+
# Simple k-means: random init from existing embeddings, 20 iterations
|
| 1355 |
+
rng = np.random.RandomState(42)
|
| 1356 |
+
indices = rng.choice(n, size=k, replace=False)
|
| 1357 |
+
centroids = embeddings[indices].copy()
|
| 1358 |
+
|
| 1359 |
+
for _ in range(20):
|
| 1360 |
+
# Assign each embedding to nearest centroid
|
| 1361 |
+
sims = embeddings @ centroids.T # (N, K)
|
| 1362 |
+
assignments = np.argmax(sims, axis=1)
|
| 1363 |
+
|
| 1364 |
+
# Recompute centroids
|
| 1365 |
+
new_centroids = np.zeros_like(centroids)
|
| 1366 |
+
for j in range(k):
|
| 1367 |
+
members = embeddings[assignments == j]
|
| 1368 |
+
if len(members) > 0:
|
| 1369 |
+
new_centroids[j] = members.mean(axis=0)
|
| 1370 |
+
else:
|
| 1371 |
+
new_centroids[j] = centroids[j]
|
| 1372 |
+
|
| 1373 |
+
# L2-normalize centroids
|
| 1374 |
+
norms = np.linalg.norm(new_centroids, axis=1, keepdims=True)
|
| 1375 |
+
norms = np.maximum(norms, 1e-12)
|
| 1376 |
+
centroids = new_centroids / norms
|
| 1377 |
+
|
| 1378 |
+
self._prototypes = centroids
|
| 1379 |
+
self._prototype_counts = np.zeros(k, dtype=np.int64)
|
| 1380 |
+
logger.info(
|
| 1381 |
+
"Receptor layer initialized: %d prototypes from %d embeddings",
|
| 1382 |
+
k, n,
|
| 1383 |
+
)
|
| 1384 |
+
|
| 1385 |
+
def _find_similar_node(self, embedding: np.ndarray) -> Optional[NGLiteNode]:
|
| 1386 |
+
"""Find a node with similar embedding (below novelty threshold).
|
| 1387 |
+
|
| 1388 |
+
Uses vectorized cosine similarity on all cached embeddings for
|
| 1389 |
+
performance (Grok review: batch dot product instead of per-node
|
| 1390 |
+
loop). Returns the most similar node if its similarity exceeds
|
| 1391 |
+
(1 - novelty_threshold).
|
| 1392 |
+
"""
|
| 1393 |
+
threshold = self.config["novelty_threshold"]
|
| 1394 |
+
|
| 1395 |
+
if not self._embedding_cache:
|
| 1396 |
+
return None
|
| 1397 |
+
|
| 1398 |
+
# Vectorized similarity: stack all cached embeddings into a matrix
|
| 1399 |
+
# and compute cosine similarities in one np.dot call.
|
| 1400 |
+
cache_keys = list(self._embedding_cache.keys())
|
| 1401 |
+
cache_matrix = np.stack(list(self._embedding_cache.values()))
|
| 1402 |
+
similarities = cache_matrix @ embedding # (N,) cosine similarities
|
| 1403 |
+
|
| 1404 |
+
best_idx = int(np.argmax(similarities))
|
| 1405 |
+
best_similarity = float(similarities[best_idx])
|
| 1406 |
+
|
| 1407 |
+
if best_similarity >= (1.0 - threshold):
|
| 1408 |
+
best_hash = cache_keys[best_idx]
|
| 1409 |
+
return self.nodes.get(best_hash)
|
| 1410 |
+
|
| 1411 |
+
return None
|
| 1412 |
+
|
| 1413 |
+
def _get_or_create_synapse(
|
| 1414 |
+
self,
|
| 1415 |
+
source_id: str,
|
| 1416 |
+
target_id: str,
|
| 1417 |
+
) -> NGLiteSynapse:
|
| 1418 |
+
"""Get existing synapse or create a new one with neutral weight."""
|
| 1419 |
+
key = (source_id, target_id)
|
| 1420 |
+
if key in self.synapses:
|
| 1421 |
+
return self.synapses[key]
|
| 1422 |
+
|
| 1423 |
+
if len(self.synapses) >= self.config["max_synapses"]:
|
| 1424 |
+
self._prune_weakest_synapse()
|
| 1425 |
+
|
| 1426 |
+
synapse = NGLiteSynapse(
|
| 1427 |
+
source_id=source_id,
|
| 1428 |
+
target_id=target_id,
|
| 1429 |
+
weight=0.5, # Neutral initial weight
|
| 1430 |
+
last_updated=time.time(),
|
| 1431 |
+
)
|
| 1432 |
+
self.synapses[key] = synapse
|
| 1433 |
+
return synapse
|
| 1434 |
+
|
| 1435 |
+
def _prune_least_used_node(self) -> None:
|
| 1436 |
+
"""Remove the node with the lowest activation count (LRU).
|
| 1437 |
+
|
| 1438 |
+
Constitutional nodes are never pruned — they are the rim.
|
| 1439 |
+
"""
|
| 1440 |
+
if not self.nodes:
|
| 1441 |
+
return
|
| 1442 |
+
|
| 1443 |
+
# Find least-used non-constitutional node
|
| 1444 |
+
prunable = [h for h in self.nodes if not self.nodes[h].constitutional]
|
| 1445 |
+
if not prunable:
|
| 1446 |
+
return
|
| 1447 |
+
|
| 1448 |
+
least_hash = min(
|
| 1449 |
+
prunable,
|
| 1450 |
+
key=lambda h: self.nodes[h].activation_count,
|
| 1451 |
+
)
|
| 1452 |
+
least_node = self.nodes[least_hash]
|
| 1453 |
+
|
| 1454 |
+
# Remove associated synapses
|
| 1455 |
+
keys_to_remove = [
|
| 1456 |
+
key for key in self.synapses
|
| 1457 |
+
if key[0] == least_node.node_id
|
| 1458 |
+
]
|
| 1459 |
+
for key in keys_to_remove:
|
| 1460 |
+
del self.synapses[key]
|
| 1461 |
+
|
| 1462 |
+
# Remove node and cached embedding
|
| 1463 |
+
del self.nodes[least_hash]
|
| 1464 |
+
self._embedding_cache.pop(least_hash, None)
|
| 1465 |
+
|
| 1466 |
+
def _prune_weakest_synapse(self) -> None:
|
| 1467 |
+
"""Remove the synapse with the lowest weight."""
|
| 1468 |
+
if not self.synapses:
|
| 1469 |
+
return
|
| 1470 |
+
|
| 1471 |
+
weakest_key = min(
|
| 1472 |
+
self.synapses,
|
| 1473 |
+
key=lambda k: self.synapses[k].weight,
|
| 1474 |
+
)
|
| 1475 |
+
del self.synapses[weakest_key]
|
| 1476 |
+
|
| 1477 |
+
def _record_history(self, entry: Dict[str, Any]) -> None:
|
| 1478 |
+
"""Append to bounded history."""
|
| 1479 |
+
entry["timestamp"] = time.time()
|
| 1480 |
+
self._history.append(entry)
|
| 1481 |
+
if len(self._history) > self._history_max:
|
| 1482 |
+
self._history = self._history[-self._history_max:]
|
| 1483 |
+
|
| 1484 |
+
def _estimate_memory(self) -> int:
|
| 1485 |
+
"""Rough estimate of memory footprint in bytes.
|
| 1486 |
+
|
| 1487 |
+
Node: ~200 bytes each (dataclass + hash key)
|
| 1488 |
+
Synapse: ~150 bytes each (dataclass + tuple key)
|
| 1489 |
+
Embedding cache: ~embedding_dim * 4 bytes each (float32)
|
| 1490 |
+
"""
|
| 1491 |
+
node_bytes = len(self.nodes) * 200
|
| 1492 |
+
synapse_bytes = len(self.synapses) * 150
|
| 1493 |
+
cache_bytes = len(self._embedding_cache) * self.config["embedding_dim"] * 4
|
| 1494 |
+
return node_bytes + synapse_bytes + cache_bytes
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
transformers>=5.5.0
|
| 2 |
+
torch>=2.0.0
|
| 3 |
+
accelerate>=0.27.0
|
| 4 |
+
onnxruntime>=1.16.0
|
| 5 |
+
tokenizers>=0.15.0
|
rust_lenia/Cargo.lock
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file is automatically @generated by Cargo.
|
| 2 |
+
# It is not intended for manual editing.
|
| 3 |
+
version = 4
|
| 4 |
+
|
| 5 |
+
[[package]]
|
| 6 |
+
name = "autocfg"
|
| 7 |
+
version = "1.5.0"
|
| 8 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 9 |
+
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
|
| 10 |
+
|
| 11 |
+
[[package]]
|
| 12 |
+
name = "cfg-if"
|
| 13 |
+
version = "1.0.4"
|
| 14 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 15 |
+
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
| 16 |
+
|
| 17 |
+
[[package]]
|
| 18 |
+
name = "heck"
|
| 19 |
+
version = "0.5.0"
|
| 20 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 21 |
+
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
| 22 |
+
|
| 23 |
+
[[package]]
|
| 24 |
+
name = "indoc"
|
| 25 |
+
version = "2.0.7"
|
| 26 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 27 |
+
checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
|
| 28 |
+
dependencies = [
|
| 29 |
+
"rustversion",
|
| 30 |
+
]
|
| 31 |
+
|
| 32 |
+
[[package]]
|
| 33 |
+
name = "libc"
|
| 34 |
+
version = "0.2.183"
|
| 35 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 36 |
+
checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
|
| 37 |
+
|
| 38 |
+
[[package]]
|
| 39 |
+
name = "matrixmultiply"
|
| 40 |
+
version = "0.3.10"
|
| 41 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 42 |
+
checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08"
|
| 43 |
+
dependencies = [
|
| 44 |
+
"autocfg",
|
| 45 |
+
"rawpointer",
|
| 46 |
+
]
|
| 47 |
+
|
| 48 |
+
[[package]]
|
| 49 |
+
name = "memoffset"
|
| 50 |
+
version = "0.9.1"
|
| 51 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 52 |
+
checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
|
| 53 |
+
dependencies = [
|
| 54 |
+
"autocfg",
|
| 55 |
+
]
|
| 56 |
+
|
| 57 |
+
[[package]]
|
| 58 |
+
name = "ndarray"
|
| 59 |
+
version = "0.16.1"
|
| 60 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 61 |
+
checksum = "882ed72dce9365842bf196bdeedf5055305f11fc8c03dee7bb0194a6cad34841"
|
| 62 |
+
dependencies = [
|
| 63 |
+
"matrixmultiply",
|
| 64 |
+
"num-complex",
|
| 65 |
+
"num-integer",
|
| 66 |
+
"num-traits",
|
| 67 |
+
"portable-atomic",
|
| 68 |
+
"portable-atomic-util",
|
| 69 |
+
"rawpointer",
|
| 70 |
+
]
|
| 71 |
+
|
| 72 |
+
[[package]]
|
| 73 |
+
name = "num-complex"
|
| 74 |
+
version = "0.4.6"
|
| 75 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 76 |
+
checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
|
| 77 |
+
dependencies = [
|
| 78 |
+
"num-traits",
|
| 79 |
+
]
|
| 80 |
+
|
| 81 |
+
[[package]]
|
| 82 |
+
name = "num-integer"
|
| 83 |
+
version = "0.1.46"
|
| 84 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 85 |
+
checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
|
| 86 |
+
dependencies = [
|
| 87 |
+
"num-traits",
|
| 88 |
+
]
|
| 89 |
+
|
| 90 |
+
[[package]]
|
| 91 |
+
name = "num-traits"
|
| 92 |
+
version = "0.2.19"
|
| 93 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 94 |
+
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
|
| 95 |
+
dependencies = [
|
| 96 |
+
"autocfg",
|
| 97 |
+
]
|
| 98 |
+
|
| 99 |
+
[[package]]
|
| 100 |
+
name = "numpy"
|
| 101 |
+
version = "0.24.0"
|
| 102 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 103 |
+
checksum = "a7cfbf3f0feededcaa4d289fe3079b03659e85c5b5a177f4ba6fb01ab4fb3e39"
|
| 104 |
+
dependencies = [
|
| 105 |
+
"libc",
|
| 106 |
+
"ndarray",
|
| 107 |
+
"num-complex",
|
| 108 |
+
"num-integer",
|
| 109 |
+
"num-traits",
|
| 110 |
+
"pyo3",
|
| 111 |
+
"pyo3-build-config",
|
| 112 |
+
"rustc-hash",
|
| 113 |
+
]
|
| 114 |
+
|
| 115 |
+
[[package]]
|
| 116 |
+
name = "nuwave_lenia"
|
| 117 |
+
version = "0.2.0"
|
| 118 |
+
dependencies = [
|
| 119 |
+
"numpy",
|
| 120 |
+
"pyo3",
|
| 121 |
+
]
|
| 122 |
+
|
| 123 |
+
[[package]]
|
| 124 |
+
name = "once_cell"
|
| 125 |
+
version = "1.21.4"
|
| 126 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 127 |
+
checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
|
| 128 |
+
|
| 129 |
+
[[package]]
|
| 130 |
+
name = "portable-atomic"
|
| 131 |
+
version = "1.13.1"
|
| 132 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 133 |
+
checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
|
| 134 |
+
|
| 135 |
+
[[package]]
|
| 136 |
+
name = "portable-atomic-util"
|
| 137 |
+
version = "0.2.6"
|
| 138 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 139 |
+
checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3"
|
| 140 |
+
dependencies = [
|
| 141 |
+
"portable-atomic",
|
| 142 |
+
]
|
| 143 |
+
|
| 144 |
+
[[package]]
|
| 145 |
+
name = "proc-macro2"
|
| 146 |
+
version = "1.0.106"
|
| 147 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 148 |
+
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
| 149 |
+
dependencies = [
|
| 150 |
+
"unicode-ident",
|
| 151 |
+
]
|
| 152 |
+
|
| 153 |
+
[[package]]
|
| 154 |
+
name = "pyo3"
|
| 155 |
+
version = "0.24.2"
|
| 156 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 157 |
+
checksum = "e5203598f366b11a02b13aa20cab591229ff0a89fd121a308a5df751d5fc9219"
|
| 158 |
+
dependencies = [
|
| 159 |
+
"cfg-if",
|
| 160 |
+
"indoc",
|
| 161 |
+
"libc",
|
| 162 |
+
"memoffset",
|
| 163 |
+
"once_cell",
|
| 164 |
+
"portable-atomic",
|
| 165 |
+
"pyo3-build-config",
|
| 166 |
+
"pyo3-ffi",
|
| 167 |
+
"pyo3-macros",
|
| 168 |
+
"unindent",
|
| 169 |
+
]
|
| 170 |
+
|
| 171 |
+
[[package]]
|
| 172 |
+
name = "pyo3-build-config"
|
| 173 |
+
version = "0.24.2"
|
| 174 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 175 |
+
checksum = "99636d423fa2ca130fa5acde3059308006d46f98caac629418e53f7ebb1e9999"
|
| 176 |
+
dependencies = [
|
| 177 |
+
"once_cell",
|
| 178 |
+
"target-lexicon",
|
| 179 |
+
]
|
| 180 |
+
|
| 181 |
+
[[package]]
|
| 182 |
+
name = "pyo3-ffi"
|
| 183 |
+
version = "0.24.2"
|
| 184 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 185 |
+
checksum = "78f9cf92ba9c409279bc3305b5409d90db2d2c22392d443a87df3a1adad59e33"
|
| 186 |
+
dependencies = [
|
| 187 |
+
"libc",
|
| 188 |
+
"pyo3-build-config",
|
| 189 |
+
]
|
| 190 |
+
|
| 191 |
+
[[package]]
|
| 192 |
+
name = "pyo3-macros"
|
| 193 |
+
version = "0.24.2"
|
| 194 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 195 |
+
checksum = "0b999cb1a6ce21f9a6b147dcf1be9ffedf02e0043aec74dc390f3007047cecd9"
|
| 196 |
+
dependencies = [
|
| 197 |
+
"proc-macro2",
|
| 198 |
+
"pyo3-macros-backend",
|
| 199 |
+
"quote",
|
| 200 |
+
"syn",
|
| 201 |
+
]
|
| 202 |
+
|
| 203 |
+
[[package]]
|
| 204 |
+
name = "pyo3-macros-backend"
|
| 205 |
+
version = "0.24.2"
|
| 206 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 207 |
+
checksum = "822ece1c7e1012745607d5cf0bcb2874769f0f7cb34c4cde03b9358eb9ef911a"
|
| 208 |
+
dependencies = [
|
| 209 |
+
"heck",
|
| 210 |
+
"proc-macro2",
|
| 211 |
+
"pyo3-build-config",
|
| 212 |
+
"quote",
|
| 213 |
+
"syn",
|
| 214 |
+
]
|
| 215 |
+
|
| 216 |
+
[[package]]
|
| 217 |
+
name = "quote"
|
| 218 |
+
version = "1.0.45"
|
| 219 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 220 |
+
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
|
| 221 |
+
dependencies = [
|
| 222 |
+
"proc-macro2",
|
| 223 |
+
]
|
| 224 |
+
|
| 225 |
+
[[package]]
|
| 226 |
+
name = "rawpointer"
|
| 227 |
+
version = "0.2.1"
|
| 228 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 229 |
+
checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
|
| 230 |
+
|
| 231 |
+
[[package]]
|
| 232 |
+
name = "rustc-hash"
|
| 233 |
+
version = "2.1.2"
|
| 234 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 235 |
+
checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
|
| 236 |
+
|
| 237 |
+
[[package]]
|
| 238 |
+
name = "rustversion"
|
| 239 |
+
version = "1.0.22"
|
| 240 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 241 |
+
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
|
| 242 |
+
|
| 243 |
+
[[package]]
|
| 244 |
+
name = "syn"
|
| 245 |
+
version = "2.0.117"
|
| 246 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 247 |
+
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
|
| 248 |
+
dependencies = [
|
| 249 |
+
"proc-macro2",
|
| 250 |
+
"quote",
|
| 251 |
+
"unicode-ident",
|
| 252 |
+
]
|
| 253 |
+
|
| 254 |
+
[[package]]
|
| 255 |
+
name = "target-lexicon"
|
| 256 |
+
version = "0.13.5"
|
| 257 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 258 |
+
checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca"
|
| 259 |
+
|
| 260 |
+
[[package]]
|
| 261 |
+
name = "unicode-ident"
|
| 262 |
+
version = "1.0.24"
|
| 263 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 264 |
+
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
| 265 |
+
|
| 266 |
+
[[package]]
|
| 267 |
+
name = "unindent"
|
| 268 |
+
version = "0.2.4"
|
| 269 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 270 |
+
checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
|
rust_lenia/Cargo.toml
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[package]
|
| 2 |
+
name = "nuwave_lenia"
|
| 3 |
+
version = "0.2.0"
|
| 4 |
+
edition = "2024"
|
| 5 |
+
description = "NuWave Lenia dynamics engine — zero-copy f32 operations on tensor memory"
|
| 6 |
+
license = "MIT"
|
| 7 |
+
|
| 8 |
+
[lib]
|
| 9 |
+
name = "nuwave_lenia"
|
| 10 |
+
crate-type = ["cdylib", "rlib"]
|
| 11 |
+
|
| 12 |
+
[dependencies]
|
| 13 |
+
pyo3 = { version = "0.24", features = ["extension-module"] }
|
| 14 |
+
numpy = "0.24"
|
| 15 |
+
|
| 16 |
+
[profile.release]
|
| 17 |
+
opt-level = 3
|
| 18 |
+
lto = true
|
rust_lenia/src/engine.rs
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! Lenia Dynamics Engine — zero-copy operations on tensor memory.
|
| 2 |
+
//!
|
| 3 |
+
//! Python passes numpy arrays (which share memory with PyTorch tensors).
|
| 4 |
+
//! Rust operates on the underlying f32 data directly. No copies.
|
| 5 |
+
//! Results are written back to the same memory.
|
| 6 |
+
//!
|
| 7 |
+
//! The hot path per weight matrix:
|
| 8 |
+
//! 1. Convolve with ring kernel → neighborhood potential
|
| 9 |
+
//! 2. Growth function → bell curve centered on target potential
|
| 10 |
+
//! 3. Modulate by activation magnitude
|
| 11 |
+
//! 4. Compute + clamp delta
|
| 12 |
+
//! 5. Apply delta IN PLACE
|
| 13 |
+
//! 6. Clip to bounds
|
| 14 |
+
//! 7. Mass conservation (L1 norm preservation)
|
| 15 |
+
|
| 16 |
+
use pyo3::prelude::*;
|
| 17 |
+
use numpy::{PyArray1, PyReadonlyArray1, PyArrayMethods};
|
| 18 |
+
use crate::kernel::Kernel2D;
|
| 19 |
+
use std::time::Instant;
|
| 20 |
+
|
| 21 |
+
/// Result from a full Lenia step across all matrices.
|
| 22 |
+
#[pyclass]
|
| 23 |
+
#[derive(Clone)]
|
| 24 |
+
pub struct LeniaStepResult {
|
| 25 |
+
#[pyo3(get)]
|
| 26 |
+
pub total_delta_norm: f64,
|
| 27 |
+
#[pyo3(get)]
|
| 28 |
+
pub matrices_processed: usize,
|
| 29 |
+
#[pyo3(get)]
|
| 30 |
+
pub matrices_skipped: usize,
|
| 31 |
+
#[pyo3(get)]
|
| 32 |
+
pub time_ms: f64,
|
| 33 |
+
#[pyo3(get)]
|
| 34 |
+
pub step_count: u64,
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
/// The Lenia dynamics engine. Operates directly on numpy array memory.
|
| 38 |
+
#[pyclass]
|
| 39 |
+
pub struct RustLeniaEngine {
|
| 40 |
+
kernel: Kernel2D,
|
| 41 |
+
growth_mu: f32,
|
| 42 |
+
growth_sigma: f32,
|
| 43 |
+
growth_scale: f32,
|
| 44 |
+
max_weight_delta: f32,
|
| 45 |
+
weight_clip_min: f32,
|
| 46 |
+
weight_clip_max: f32,
|
| 47 |
+
activation_coupling: f32,
|
| 48 |
+
step_count: u64,
|
| 49 |
+
total_time_ms: f64,
|
| 50 |
+
initial_norms: Vec<f64>,
|
| 51 |
+
/// Reusable scratch buffer for convolution output
|
| 52 |
+
scratch: Vec<f32>,
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
#[pymethods]
|
| 56 |
+
impl RustLeniaEngine {
|
| 57 |
+
#[new]
|
| 58 |
+
#[pyo3(signature = (
|
| 59 |
+
kernel_radius = 5,
|
| 60 |
+
kernel_sigma = 0.8,
|
| 61 |
+
growth_mu = 0.12,
|
| 62 |
+
growth_sigma = 0.02,
|
| 63 |
+
growth_scale = 0.005,
|
| 64 |
+
max_weight_delta = 0.05,
|
| 65 |
+
weight_clip_min = -3.0,
|
| 66 |
+
weight_clip_max = 3.0,
|
| 67 |
+
activation_coupling = 2.0,
|
| 68 |
+
))]
|
| 69 |
+
pub fn new(
|
| 70 |
+
kernel_radius: usize,
|
| 71 |
+
kernel_sigma: f32,
|
| 72 |
+
growth_mu: f32,
|
| 73 |
+
growth_sigma: f32,
|
| 74 |
+
growth_scale: f32,
|
| 75 |
+
max_weight_delta: f32,
|
| 76 |
+
weight_clip_min: f32,
|
| 77 |
+
weight_clip_max: f32,
|
| 78 |
+
activation_coupling: f32,
|
| 79 |
+
) -> Self {
|
| 80 |
+
RustLeniaEngine {
|
| 81 |
+
kernel: Kernel2D::new(kernel_radius, kernel_sigma),
|
| 82 |
+
growth_mu,
|
| 83 |
+
growth_sigma,
|
| 84 |
+
growth_scale,
|
| 85 |
+
max_weight_delta,
|
| 86 |
+
weight_clip_min,
|
| 87 |
+
weight_clip_max,
|
| 88 |
+
activation_coupling,
|
| 89 |
+
step_count: 0,
|
| 90 |
+
total_time_ms: 0.0,
|
| 91 |
+
initial_norms: Vec::new(),
|
| 92 |
+
scratch: Vec::new(),
|
| 93 |
+
}
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
/// Process a single weight matrix IN PLACE.
|
| 97 |
+
///
|
| 98 |
+
/// Args:
|
| 99 |
+
/// weights: numpy array (flattened f32) — MODIFIED IN PLACE
|
| 100 |
+
/// rows: matrix height
|
| 101 |
+
/// cols: matrix width
|
| 102 |
+
/// activation_mag: activation magnitude for this layer
|
| 103 |
+
/// matrix_idx: index for mass conservation tracking
|
| 104 |
+
///
|
| 105 |
+
/// Returns delta_norm for this matrix.
|
| 106 |
+
pub fn step_single_inplace(
|
| 107 |
+
&mut self,
|
| 108 |
+
py: Python<'_>,
|
| 109 |
+
weights: &Bound<'_, PyArray1<f32>>,
|
| 110 |
+
rows: usize,
|
| 111 |
+
cols: usize,
|
| 112 |
+
activation_mag: f32,
|
| 113 |
+
matrix_idx: usize,
|
| 114 |
+
) -> PyResult<f64> {
|
| 115 |
+
let n = rows * cols;
|
| 116 |
+
let min_size = 2 * self.kernel.radius + 1;
|
| 117 |
+
|
| 118 |
+
if rows < min_size || cols < min_size {
|
| 119 |
+
return Ok(0.0);
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
// Get mutable access to the numpy array's data — zero copy
|
| 123 |
+
let mut weights_rw = unsafe { weights.as_array_mut() };
|
| 124 |
+
let w_slice = weights_rw.as_slice_mut()
|
| 125 |
+
.ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Array not contiguous"))?;
|
| 126 |
+
|
| 127 |
+
// Initialize norm on first visit
|
| 128 |
+
while self.initial_norms.len() <= matrix_idx {
|
| 129 |
+
self.initial_norms.push(0.0);
|
| 130 |
+
}
|
| 131 |
+
if self.initial_norms[matrix_idx] == 0.0 {
|
| 132 |
+
self.initial_norms[matrix_idx] = w_slice.iter().map(|v| v.abs() as f64).sum();
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
// Ensure scratch buffer is large enough
|
| 136 |
+
if self.scratch.len() < n {
|
| 137 |
+
self.scratch.resize(n, 0.0);
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
// 1. Convolve — neighborhood potential
|
| 141 |
+
self.kernel.convolve(w_slice, rows, cols, &mut self.scratch[..n]);
|
| 142 |
+
|
| 143 |
+
// 2-5. Growth + modulation + delta + apply — all in one pass
|
| 144 |
+
let mu = self.growth_mu;
|
| 145 |
+
let sigma = self.growth_sigma;
|
| 146 |
+
let scale = self.growth_scale;
|
| 147 |
+
let max_d = self.max_weight_delta;
|
| 148 |
+
let clip_min = self.weight_clip_min;
|
| 149 |
+
let clip_max = self.weight_clip_max;
|
| 150 |
+
|
| 151 |
+
let act_scale = if self.activation_coupling > 0.0 && activation_mag > 0.0 {
|
| 152 |
+
(activation_mag * self.activation_coupling).tanh()
|
| 153 |
+
} else {
|
| 154 |
+
1.0
|
| 155 |
+
};
|
| 156 |
+
|
| 157 |
+
let mut delta_sum = 0.0f64;
|
| 158 |
+
|
| 159 |
+
for i in 0..n {
|
| 160 |
+
let p = self.scratch[i];
|
| 161 |
+
// Growth function: bell curve
|
| 162 |
+
let g = 2.0 * (-(p - mu).powi(2) / (2.0 * sigma * sigma)).exp() - 1.0;
|
| 163 |
+
// Modulate + scale + clamp
|
| 164 |
+
let d = (scale * g * act_scale).clamp(-max_d, max_d);
|
| 165 |
+
// Apply + clip
|
| 166 |
+
w_slice[i] = (w_slice[i] + d).clamp(clip_min, clip_max);
|
| 167 |
+
delta_sum += d.abs() as f64;
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
// 7. Mass conservation — preserve L1 norm
|
| 171 |
+
let current_norm: f64 = w_slice.iter().map(|v| v.abs() as f64).sum();
|
| 172 |
+
let target_norm = self.initial_norms[matrix_idx];
|
| 173 |
+
|
| 174 |
+
if current_norm > 1e-10 {
|
| 175 |
+
let factor = (target_norm / current_norm) as f32;
|
| 176 |
+
for v in w_slice.iter_mut() {
|
| 177 |
+
*v *= factor;
|
| 178 |
+
}
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
Ok(delta_sum / n as f64)
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
/// Process all weight matrices in one call.
|
| 185 |
+
///
|
| 186 |
+
/// Args:
|
| 187 |
+
/// weight_arrays: list of numpy arrays (each flattened, MODIFIED IN PLACE)
|
| 188 |
+
/// shapes: list of (rows, cols) tuples
|
| 189 |
+
/// activations: list of activation magnitudes
|
| 190 |
+
///
|
| 191 |
+
/// Returns LeniaStepResult.
|
| 192 |
+
pub fn step_all_inplace(
|
| 193 |
+
&mut self,
|
| 194 |
+
py: Python<'_>,
|
| 195 |
+
weight_arrays: Vec<Bound<'_, PyArray1<f32>>>,
|
| 196 |
+
shapes: Vec<(usize, usize)>,
|
| 197 |
+
activations: Vec<f32>,
|
| 198 |
+
) -> PyResult<LeniaStepResult> {
|
| 199 |
+
let start = Instant::now();
|
| 200 |
+
let n = weight_arrays.len();
|
| 201 |
+
let mut total_delta = 0.0f64;
|
| 202 |
+
let mut processed = 0usize;
|
| 203 |
+
let mut skipped = 0usize;
|
| 204 |
+
|
| 205 |
+
for (i, arr) in weight_arrays.iter().enumerate() {
|
| 206 |
+
let (rows, cols) = shapes[i];
|
| 207 |
+
let act = if i < activations.len() { activations[i] } else { 0.0 };
|
| 208 |
+
|
| 209 |
+
let delta = self.step_single_inplace(py, arr, rows, cols, act, i)?;
|
| 210 |
+
if delta > 0.0 {
|
| 211 |
+
total_delta += delta;
|
| 212 |
+
processed += 1;
|
| 213 |
+
} else {
|
| 214 |
+
skipped += 1;
|
| 215 |
+
}
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
let elapsed = start.elapsed().as_secs_f64() * 1000.0;
|
| 219 |
+
self.step_count += 1;
|
| 220 |
+
self.total_time_ms += elapsed;
|
| 221 |
+
|
| 222 |
+
Ok(LeniaStepResult {
|
| 223 |
+
total_delta_norm: total_delta,
|
| 224 |
+
matrices_processed: processed,
|
| 225 |
+
matrices_skipped: skipped,
|
| 226 |
+
time_ms: elapsed,
|
| 227 |
+
step_count: self.step_count,
|
| 228 |
+
})
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
pub fn get_summary(&self) -> (u64, f64, f64) {
|
| 232 |
+
let avg = if self.step_count > 0 {
|
| 233 |
+
self.total_time_ms / self.step_count as f64
|
| 234 |
+
} else {
|
| 235 |
+
0.0
|
| 236 |
+
};
|
| 237 |
+
(self.step_count, self.total_time_ms, avg)
|
| 238 |
+
}
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
#[cfg(test)]
|
| 242 |
+
mod tests {
|
| 243 |
+
use super::*;
|
| 244 |
+
|
| 245 |
+
#[test]
|
| 246 |
+
fn test_growth_function_shape() {
|
| 247 |
+
let mu = 0.12f32;
|
| 248 |
+
let sigma = 0.02f32;
|
| 249 |
+
let at_mu = 2.0 * (-(0.0f32).powi(2) / (2.0 * sigma * sigma)).exp() - 1.0;
|
| 250 |
+
assert!((at_mu - 1.0).abs() < 0.001);
|
| 251 |
+
|
| 252 |
+
let far = 2.0 * (-((1.0 - mu) / sigma).powi(2) / 2.0).exp() - 1.0;
|
| 253 |
+
assert!(far < -0.9);
|
| 254 |
+
}
|
| 255 |
+
}
|
rust_lenia/src/kernel.rs
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! Lenia spatial kernel — 2D ring-shaped bell curve.
|
| 2 |
+
//!
|
| 3 |
+
//! K(r) = exp(-((r - 0.5) / sigma)^2 / 2)
|
| 4 |
+
//! Center zeroed (a weight doesn't influence itself).
|
| 5 |
+
//! Normalized to sum to 1.
|
| 6 |
+
|
| 7 |
+
/// Precomputed 2D kernel for convolution.
|
| 8 |
+
pub struct Kernel2D {
|
| 9 |
+
pub data: Vec<f32>,
|
| 10 |
+
pub size: usize, // side length = 2*radius + 1
|
| 11 |
+
pub radius: usize,
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
impl Kernel2D {
|
| 15 |
+
/// Create a ring-shaped Lenia kernel.
|
| 16 |
+
pub fn new(radius: usize, sigma: f32) -> Self {
|
| 17 |
+
let size = 2 * radius + 1;
|
| 18 |
+
let mut data = vec![0.0f32; size * size];
|
| 19 |
+
let r = radius as f32;
|
| 20 |
+
let mut sum = 0.0f32;
|
| 21 |
+
|
| 22 |
+
for iy in 0..size {
|
| 23 |
+
for ix in 0..size {
|
| 24 |
+
let dy = iy as f32 - r;
|
| 25 |
+
let dx = ix as f32 - r;
|
| 26 |
+
let dist = (dx * dx + dy * dy).sqrt() / r;
|
| 27 |
+
|
| 28 |
+
// Ring kernel: peak at dist ~0.5
|
| 29 |
+
let val = (-(dist - 0.5).powi(2) / (2.0 * sigma * sigma)).exp();
|
| 30 |
+
data[iy * size + ix] = val;
|
| 31 |
+
sum += val;
|
| 32 |
+
}
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
// Zero center
|
| 36 |
+
data[radius * size + radius] = 0.0;
|
| 37 |
+
sum -= data[radius * size + radius]; // was already subtracted above since we set it after
|
| 38 |
+
|
| 39 |
+
// Recompute sum after zeroing center
|
| 40 |
+
sum = data.iter().sum();
|
| 41 |
+
|
| 42 |
+
// Normalize
|
| 43 |
+
if sum > 1e-8 {
|
| 44 |
+
for v in data.iter_mut() {
|
| 45 |
+
*v /= sum;
|
| 46 |
+
}
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
Kernel2D { data, size, radius }
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
/// Apply 2D convolution (same-size output, zero-padded).
|
| 53 |
+
/// input: row-major f32 array of shape (h, w)
|
| 54 |
+
/// output: same shape, each element = sum of kernel-weighted neighborhood
|
| 55 |
+
#[inline]
|
| 56 |
+
pub fn convolve(&self, input: &[f32], h: usize, w: usize, output: &mut [f32]) {
|
| 57 |
+
let r = self.radius as isize;
|
| 58 |
+
let ksize = self.size;
|
| 59 |
+
|
| 60 |
+
for iy in 0..h {
|
| 61 |
+
for ix in 0..w {
|
| 62 |
+
let mut acc = 0.0f32;
|
| 63 |
+
|
| 64 |
+
for ky in 0..ksize {
|
| 65 |
+
let sy = iy as isize + ky as isize - r;
|
| 66 |
+
if sy < 0 || sy >= h as isize {
|
| 67 |
+
continue;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
for kx in 0..ksize {
|
| 71 |
+
let sx = ix as isize + kx as isize - r;
|
| 72 |
+
if sx < 0 || sx >= w as isize {
|
| 73 |
+
continue;
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
acc += input[sy as usize * w + sx as usize]
|
| 77 |
+
* self.data[ky * ksize + kx];
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
output[iy * w + ix] = acc;
|
| 82 |
+
}
|
| 83 |
+
}
|
| 84 |
+
}
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
#[cfg(test)]
|
| 88 |
+
mod tests {
|
| 89 |
+
use super::*;
|
| 90 |
+
|
| 91 |
+
#[test]
|
| 92 |
+
fn test_kernel_creation() {
|
| 93 |
+
let k = Kernel2D::new(3, 1.0);
|
| 94 |
+
assert_eq!(k.size, 7);
|
| 95 |
+
assert_eq!(k.data.len(), 49);
|
| 96 |
+
|
| 97 |
+
// Center should be zero
|
| 98 |
+
assert_eq!(k.data[3 * 7 + 3], 0.0);
|
| 99 |
+
|
| 100 |
+
// Should sum to ~1.0 (normalized)
|
| 101 |
+
let sum: f32 = k.data.iter().sum();
|
| 102 |
+
assert!((sum - 1.0).abs() < 0.01, "Kernel sum: {}", sum);
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
#[test]
|
| 106 |
+
fn test_convolution() {
|
| 107 |
+
let k = Kernel2D::new(1, 0.5);
|
| 108 |
+
// 4x4 input, all ones
|
| 109 |
+
let input = vec![1.0f32; 16];
|
| 110 |
+
let mut output = vec![0.0f32; 16];
|
| 111 |
+
|
| 112 |
+
k.convolve(&input, 4, 4, &mut output);
|
| 113 |
+
|
| 114 |
+
// Interior elements should be ~1.0 (uniform input, normalized kernel)
|
| 115 |
+
// Edge elements will be less (zero padding)
|
| 116 |
+
assert!(output[5] > 0.5, "Interior value: {}", output[5]);
|
| 117 |
+
}
|
| 118 |
+
}
|
rust_lenia/src/lib.rs
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! NuWave Lenia Engine — Rust Core
|
| 2 |
+
//!
|
| 3 |
+
//! Applies Lenia dynamics to transformer weight matrices.
|
| 4 |
+
//! The hot path: 196 weight matrices × (conv2d + growth + conservation) per step.
|
| 5 |
+
//! Python/PyTorch: ~65s. Rust target: <5s.
|
| 6 |
+
//!
|
| 7 |
+
//! The growth function IS the learning rule. No backprop. No loss function.
|
| 8 |
+
//! Each weight neighborhood evolves based on its neighbors' states
|
| 9 |
+
//! and the activation flow through it.
|
| 10 |
+
//!
|
| 11 |
+
//! PyO3 bindings: Python calls step() with flat f32 arrays.
|
| 12 |
+
//! Rust does the math. Returns delta arrays Python applies to tensors.
|
| 13 |
+
|
| 14 |
+
mod engine;
|
| 15 |
+
mod kernel;
|
| 16 |
+
|
| 17 |
+
use pyo3::prelude::*;
|
| 18 |
+
|
| 19 |
+
#[pymodule]
|
| 20 |
+
fn nuwave_lenia(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
| 21 |
+
m.add_class::<engine::RustLeniaEngine>()?;
|
| 22 |
+
m.add_class::<engine::LeniaStepResult>()?;
|
| 23 |
+
Ok(())
|
| 24 |
+
}
|