v2 complete: NGC graft, causal energy, auto-expanding codebook, benchmark integration
Browse files- tensegrity/v2/causal_energy.py +180 -0
- tensegrity/v2/fhrr.py +8 -3
- tensegrity/v2/graft.py +316 -0
- tests/test_v2_bench.py +113 -0
tensegrity/v2/causal_energy.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Causal Energy: Pearl's SCMs as energy terms in the unified landscape.
|
| 3 |
+
|
| 4 |
+
Each SCM contributes a prediction error to the total energy:
|
| 5 |
+
E_causal(M_k) = Σ_v ||z_v - f_v(z_pa(v))||²
|
| 6 |
+
|
| 7 |
+
Where:
|
| 8 |
+
z_v = observed value of variable v
|
| 9 |
+
f_v(z_pa(v)) = structural equation's prediction from parents
|
| 10 |
+
pa(v) = parents of v in the causal DAG
|
| 11 |
+
|
| 12 |
+
Multiple SCMs compete. The model with lowest causal energy provides
|
| 13 |
+
the best explanation. This replaces the v1 causal arena's log-likelihood
|
| 14 |
+
comparison with a unified energy-based comparison.
|
| 15 |
+
|
| 16 |
+
The causal energy connects to the NGC energy through shared variables:
|
| 17 |
+
if a causal variable maps to an NGC layer's abstract state, then the
|
| 18 |
+
NGC prediction error and the causal prediction error are literally
|
| 19 |
+
the same quantity at different scales of description.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
import numpy as np
|
| 23 |
+
from typing import Dict, List, Optional, Any, Tuple
|
| 24 |
+
from tensegrity.causal.scm import StructuralCausalModel
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class CausalEnergyTerm:
|
| 28 |
+
"""
|
| 29 |
+
Computes causal prediction error energy for an SCM.
|
| 30 |
+
|
| 31 |
+
Given observations of some variables, computes how well
|
| 32 |
+
the SCM's structural equations predict them.
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
def __init__(self, scm: StructuralCausalModel, precision: float = 1.0):
|
| 36 |
+
self.scm = scm
|
| 37 |
+
self.precision = precision
|
| 38 |
+
|
| 39 |
+
def energy(self, observations: Dict[str, int]) -> float:
|
| 40 |
+
"""
|
| 41 |
+
Compute causal prediction error energy.
|
| 42 |
+
|
| 43 |
+
E = Σ_v (1/2σ²) ||obs_v - predicted_v||²
|
| 44 |
+
|
| 45 |
+
Where predicted_v = E[V | parents of V observed]
|
| 46 |
+
"""
|
| 47 |
+
total_energy = 0.0
|
| 48 |
+
order = self.scm.topological_order()
|
| 49 |
+
|
| 50 |
+
for var in order:
|
| 51 |
+
if var not in observations:
|
| 52 |
+
continue
|
| 53 |
+
|
| 54 |
+
mech = self.scm.mechanisms[var]
|
| 55 |
+
parent_vals = {p: observations.get(p, 0) for p in mech.parents}
|
| 56 |
+
|
| 57 |
+
# Expected value under the CPT
|
| 58 |
+
cpt = mech.cpt
|
| 59 |
+
config_idx = mech.parent_config_index(parent_vals)
|
| 60 |
+
probs = cpt[:, config_idx]
|
| 61 |
+
|
| 62 |
+
# Prediction = expected value index
|
| 63 |
+
expected = np.sum(probs * np.arange(len(probs)))
|
| 64 |
+
observed = float(observations[var])
|
| 65 |
+
|
| 66 |
+
# Squared prediction error
|
| 67 |
+
error = (observed - expected) ** 2
|
| 68 |
+
total_energy += 0.5 * self.precision * error
|
| 69 |
+
|
| 70 |
+
return total_energy
|
| 71 |
+
|
| 72 |
+
def prediction(self, observations: Dict[str, int],
|
| 73 |
+
target: str) -> np.ndarray:
|
| 74 |
+
"""Predict distribution over target given observed parents."""
|
| 75 |
+
mech = self.scm.mechanisms.get(target)
|
| 76 |
+
if mech is None:
|
| 77 |
+
return np.array([1.0])
|
| 78 |
+
|
| 79 |
+
parent_vals = {p: observations.get(p, 0) for p in mech.parents}
|
| 80 |
+
config_idx = mech.parent_config_index(parent_vals)
|
| 81 |
+
return mech.cpt[:, config_idx]
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
class CausalArenaV2:
|
| 85 |
+
"""
|
| 86 |
+
v2 causal arena: SCMs compete via energy, not log-likelihood.
|
| 87 |
+
|
| 88 |
+
Each model is wrapped in a CausalEnergyTerm. The model with
|
| 89 |
+
lowest energy wins. The tension is the ratio of energies
|
| 90 |
+
(or equivalently, the softmax distribution over models).
|
| 91 |
+
|
| 92 |
+
This integrates with the unified energy landscape:
|
| 93 |
+
E_total = E_perception(NGC) + E_memory(Hopfield) + E_causal(arena)
|
| 94 |
+
|
| 95 |
+
Where E_causal = min_k E_causal(M_k) — we use the best model's energy.
|
| 96 |
+
"""
|
| 97 |
+
|
| 98 |
+
def __init__(self, precision: float = 1.0, beta: float = 1.0):
|
| 99 |
+
"""
|
| 100 |
+
Args:
|
| 101 |
+
precision: Causal prediction error precision
|
| 102 |
+
beta: Inverse temperature for model selection softmax
|
| 103 |
+
"""
|
| 104 |
+
self.models: Dict[str, CausalEnergyTerm] = {}
|
| 105 |
+
self.beta = beta
|
| 106 |
+
self.precision = precision
|
| 107 |
+
self._history: List[Dict[str, float]] = []
|
| 108 |
+
|
| 109 |
+
def register(self, scm: StructuralCausalModel):
|
| 110 |
+
"""Add a competing causal model."""
|
| 111 |
+
self.models[scm.name] = CausalEnergyTerm(scm, self.precision)
|
| 112 |
+
|
| 113 |
+
def compete(self, observations: Dict[str, int]) -> Dict[str, Any]:
|
| 114 |
+
"""
|
| 115 |
+
All models compute their causal energy on the observation.
|
| 116 |
+
Returns energies, posteriors, and tension.
|
| 117 |
+
"""
|
| 118 |
+
energies = {}
|
| 119 |
+
for name, term in self.models.items():
|
| 120 |
+
energies[name] = term.energy(observations)
|
| 121 |
+
|
| 122 |
+
if not energies:
|
| 123 |
+
return {"winner": None, "tension": 1.0, "energies": {}}
|
| 124 |
+
|
| 125 |
+
# Softmax over negative energies (lower energy = higher weight)
|
| 126 |
+
vals = np.array(list(energies.values()))
|
| 127 |
+
neg_e = -self.beta * vals
|
| 128 |
+
neg_e -= neg_e.max()
|
| 129 |
+
weights = np.exp(neg_e)
|
| 130 |
+
weights /= weights.sum()
|
| 131 |
+
|
| 132 |
+
posteriors = dict(zip(energies.keys(), weights.tolist()))
|
| 133 |
+
|
| 134 |
+
# Tension = normalized entropy
|
| 135 |
+
probs = weights[weights > 0]
|
| 136 |
+
if len(probs) > 1:
|
| 137 |
+
entropy = -np.sum(probs * np.log(probs))
|
| 138 |
+
tension = float(entropy / np.log(len(probs)))
|
| 139 |
+
else:
|
| 140 |
+
tension = 0.0
|
| 141 |
+
|
| 142 |
+
winner = min(energies, key=energies.get)
|
| 143 |
+
best_energy = energies[winner]
|
| 144 |
+
|
| 145 |
+
result = {
|
| 146 |
+
"winner": winner,
|
| 147 |
+
"tension": tension,
|
| 148 |
+
"posteriors": posteriors,
|
| 149 |
+
"energies": energies,
|
| 150 |
+
"best_energy": best_energy,
|
| 151 |
+
}
|
| 152 |
+
self._history.append(energies)
|
| 153 |
+
|
| 154 |
+
return result
|
| 155 |
+
|
| 156 |
+
def best_energy(self, observations: Dict[str, int]) -> float:
|
| 157 |
+
"""Get the energy of the best-fitting model."""
|
| 158 |
+
result = self.compete(observations)
|
| 159 |
+
return result.get("best_energy", 0.0)
|
| 160 |
+
|
| 161 |
+
def update_models(self, observations: Dict[str, int]):
|
| 162 |
+
"""Update all models' parameters from observation (Dirichlet counting)."""
|
| 163 |
+
for name, term in self.models.items():
|
| 164 |
+
term.scm.update_from_data([observations])
|
| 165 |
+
|
| 166 |
+
@property
|
| 167 |
+
def tension(self) -> float:
|
| 168 |
+
"""Current tension (from last competition)."""
|
| 169 |
+
if not self._history:
|
| 170 |
+
return 1.0
|
| 171 |
+
last = self._history[-1]
|
| 172 |
+
vals = np.array(list(last.values()))
|
| 173 |
+
neg_e = -self.beta * vals
|
| 174 |
+
neg_e -= neg_e.max()
|
| 175 |
+
w = np.exp(neg_e)
|
| 176 |
+
w /= w.sum()
|
| 177 |
+
w = w[w > 0]
|
| 178 |
+
if len(w) > 1:
|
| 179 |
+
return float(-np.sum(w * np.log(w)) / np.log(len(w)))
|
| 180 |
+
return 0.0
|
tensegrity/v2/fhrr.py
CHANGED
|
@@ -58,11 +58,16 @@ class FHRRCodebook:
|
|
| 58 |
self._labels: Dict[str, int] = {}
|
| 59 |
|
| 60 |
def register(self, label: str) -> int:
|
| 61 |
-
"""Register a named symbol, return its index."""
|
| 62 |
if label not in self._labels:
|
| 63 |
idx = len(self._labels)
|
| 64 |
if idx >= self.n_symbols:
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
self._labels[label] = idx
|
| 67 |
return self._labels[label]
|
| 68 |
|
|
@@ -140,7 +145,7 @@ class FHRREncoder:
|
|
| 140 |
def __init__(self, dim: int = 2048,
|
| 141 |
n_position_moduli: int = 3,
|
| 142 |
position_range: int = 100000,
|
| 143 |
-
n_features: int =
|
| 144 |
n_roles: int = 32):
|
| 145 |
"""
|
| 146 |
Args:
|
|
|
|
| 58 |
self._labels: Dict[str, int] = {}
|
| 59 |
|
| 60 |
def register(self, label: str) -> int:
|
| 61 |
+
"""Register a named symbol, return its index. Auto-expands if full."""
|
| 62 |
if label not in self._labels:
|
| 63 |
idx = len(self._labels)
|
| 64 |
if idx >= self.n_symbols:
|
| 65 |
+
# Auto-expand: generate more random vectors
|
| 66 |
+
rng = np.random.RandomState(hash(label) % 2**31)
|
| 67 |
+
new_phases = rng.uniform(0, 2 * np.pi, size=(256, self.dim))
|
| 68 |
+
new_vecs = np.exp(1j * new_phases).astype(np.complex64)
|
| 69 |
+
self.vectors = np.concatenate([self.vectors, new_vecs], axis=0)
|
| 70 |
+
self.n_symbols += 256
|
| 71 |
self._labels[label] = idx
|
| 72 |
return self._labels[label]
|
| 73 |
|
|
|
|
| 145 |
def __init__(self, dim: int = 2048,
|
| 146 |
n_position_moduli: int = 3,
|
| 147 |
position_range: int = 100000,
|
| 148 |
+
n_features: int = 4096,
|
| 149 |
n_roles: int = 32):
|
| 150 |
"""
|
| 151 |
Args:
|
tensegrity/v2/graft.py
ADDED
|
@@ -0,0 +1,316 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
v2 Graft: NGC prediction errors → per-step logit biases during LLM decoding.
|
| 3 |
+
|
| 4 |
+
This bridges the gap between the manifold approach (continuous constraint
|
| 5 |
+
propagation inside the decode loop) and Tensegrity's causal reasoning
|
| 6 |
+
(epistemically grounded beliefs about what's true).
|
| 7 |
+
|
| 8 |
+
At each decode step:
|
| 9 |
+
1. The generated tokens so far are encoded as an FHRR sequence
|
| 10 |
+
2. The NGC circuit settles on this observation (minimizing VFE)
|
| 11 |
+
3. The prediction error at each NGC layer is computed
|
| 12 |
+
4. These errors are projected into vocabulary space as logit biases
|
| 13 |
+
|
| 14 |
+
The projection works because:
|
| 15 |
+
- Layer 0 errors (sensory) → token-level constraints (word choice)
|
| 16 |
+
- Layer 1 errors (hidden) → phrase-level constraints (coherence)
|
| 17 |
+
- Layer L errors (abstract) → semantic constraints (topic, logic)
|
| 18 |
+
|
| 19 |
+
Each layer's projection is a fixed random matrix (no learning needed
|
| 20 |
+
at the graft interface — all learning happens inside the NGC circuit).
|
| 21 |
+
|
| 22 |
+
Convergence gating:
|
| 23 |
+
- Only emit bias when NGC has settled (energy delta < threshold)
|
| 24 |
+
- Scale bias by inverse entropy (confident beliefs → strong bias)
|
| 25 |
+
- Never worse than base: ungated fallback to native LLM logits
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
import numpy as np
|
| 29 |
+
from typing import Dict, List, Optional, Callable, Set, Tuple
|
| 30 |
+
import math
|
| 31 |
+
import logging
|
| 32 |
+
|
| 33 |
+
logger = logging.getLogger(__name__)
|
| 34 |
+
|
| 35 |
+
# Lazy torch import
|
| 36 |
+
torch = None
|
| 37 |
+
def _ensure_torch():
|
| 38 |
+
global torch
|
| 39 |
+
if torch is None:
|
| 40 |
+
import importlib
|
| 41 |
+
torch = importlib.import_module('torch')
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class NGCLogitsProcessor:
|
| 45 |
+
"""
|
| 46 |
+
HuggingFace LogitsProcessor that runs NGC settling at each decode step.
|
| 47 |
+
|
| 48 |
+
This is the v2 equivalent of TensegrityLogitsProcessor, but instead of
|
| 49 |
+
projecting flat hypothesis posteriors, it projects hierarchical prediction
|
| 50 |
+
errors from the NGC circuit.
|
| 51 |
+
|
| 52 |
+
The manifold ran ~47 internal steps per decode step until coherence > 0.96.
|
| 53 |
+
We do the same: the NGC circuit settles until energy delta < threshold,
|
| 54 |
+
then projects its state into logit space.
|
| 55 |
+
"""
|
| 56 |
+
|
| 57 |
+
supports_continuous_batching = False # Stateful
|
| 58 |
+
|
| 59 |
+
def __init__(self,
|
| 60 |
+
field, # UnifiedField instance
|
| 61 |
+
tokenizer,
|
| 62 |
+
vocab_projections: Optional[List[np.ndarray]] = None,
|
| 63 |
+
scale: float = 1.0,
|
| 64 |
+
energy_gate: float = 0.1,
|
| 65 |
+
max_settle_steps: int = 30,
|
| 66 |
+
max_bias: float = 5.0):
|
| 67 |
+
"""
|
| 68 |
+
Args:
|
| 69 |
+
field: UnifiedField instance (owns NGC + FHRR + Hopfield)
|
| 70 |
+
tokenizer: HuggingFace tokenizer
|
| 71 |
+
vocab_projections: Per-NGC-layer projection matrices to vocab space.
|
| 72 |
+
If None, generated randomly (fixed, not learned).
|
| 73 |
+
scale: Overall bias magnitude multiplier
|
| 74 |
+
energy_gate: Only emit bias when NGC energy change < this per step
|
| 75 |
+
max_settle_steps: NGC settling budget per decode step
|
| 76 |
+
max_bias: Clamp per-token bias magnitude
|
| 77 |
+
"""
|
| 78 |
+
_ensure_torch()
|
| 79 |
+
|
| 80 |
+
self.field = field
|
| 81 |
+
self.tokenizer = tokenizer
|
| 82 |
+
self.scale = scale
|
| 83 |
+
self.energy_gate = energy_gate
|
| 84 |
+
self.max_settle_steps = max_settle_steps
|
| 85 |
+
self.max_bias = max_bias
|
| 86 |
+
|
| 87 |
+
self.vocab_size = tokenizer.vocab_size
|
| 88 |
+
|
| 89 |
+
# Build per-layer projection matrices: NGC layer dim → vocab_size
|
| 90 |
+
# These are fixed random projections, not learned
|
| 91 |
+
if vocab_projections is not None:
|
| 92 |
+
self.projections = vocab_projections
|
| 93 |
+
else:
|
| 94 |
+
self.projections = self._build_projections()
|
| 95 |
+
|
| 96 |
+
# Tracking
|
| 97 |
+
self._step_count = 0
|
| 98 |
+
self._emissions = 0
|
| 99 |
+
self._total_settle_steps = 0
|
| 100 |
+
|
| 101 |
+
def _build_projections(self) -> List[np.ndarray]:
|
| 102 |
+
"""
|
| 103 |
+
Build random projection matrices from NGC error space to vocab space.
|
| 104 |
+
|
| 105 |
+
Higher layers get stronger projection weights (semantic > surface).
|
| 106 |
+
Layer weights: [1.0, 2.0, 4.0, ...] (doubling per level).
|
| 107 |
+
"""
|
| 108 |
+
projections = []
|
| 109 |
+
rng = np.random.RandomState(7777)
|
| 110 |
+
|
| 111 |
+
for ell, size in enumerate(self.field.ngc.layer_sizes):
|
| 112 |
+
# Random projection: (vocab_size, layer_size)
|
| 113 |
+
# Scaled by 1/sqrt(layer_size) for variance normalization
|
| 114 |
+
# Higher layers get more weight
|
| 115 |
+
layer_weight = 2.0 ** ell
|
| 116 |
+
P = rng.randn(self.vocab_size, size).astype(np.float64)
|
| 117 |
+
P *= layer_weight / np.sqrt(size)
|
| 118 |
+
projections.append(P)
|
| 119 |
+
|
| 120 |
+
return projections
|
| 121 |
+
|
| 122 |
+
def _tokens_to_observation(self, input_ids) -> np.ndarray:
|
| 123 |
+
"""
|
| 124 |
+
Convert generated tokens so far into an FHRR observation vector,
|
| 125 |
+
then project to NGC sensory space.
|
| 126 |
+
|
| 127 |
+
Uses the last N tokens as a sequence encoding.
|
| 128 |
+
"""
|
| 129 |
+
# Decode last 16 tokens to text
|
| 130 |
+
ids = input_ids[0].tolist()
|
| 131 |
+
recent_ids = ids[-16:] # Last 16 tokens
|
| 132 |
+
text = self.tokenizer.decode(recent_ids, skip_special_tokens=True)
|
| 133 |
+
tokens = text.lower().split()
|
| 134 |
+
|
| 135 |
+
if not tokens:
|
| 136 |
+
return np.zeros(self.field.obs_dim, dtype=np.float64)
|
| 137 |
+
|
| 138 |
+
# Encode as FHRR sequence → project to NGC observation space
|
| 139 |
+
fhrr_vec = self.field.encoder.encode_sequence(tokens)
|
| 140 |
+
obs_vec = self.field._fhrr_to_obs(fhrr_vec)
|
| 141 |
+
|
| 142 |
+
return obs_vec
|
| 143 |
+
|
| 144 |
+
def _error_to_bias(self) -> np.ndarray:
|
| 145 |
+
"""
|
| 146 |
+
Project NGC prediction errors into vocabulary space.
|
| 147 |
+
|
| 148 |
+
bias = Σ_ℓ P_ℓ · error_ℓ
|
| 149 |
+
|
| 150 |
+
Where P_ℓ is the fixed random projection for layer ℓ,
|
| 151 |
+
and error_ℓ is the precision-weighted prediction error.
|
| 152 |
+
|
| 153 |
+
Low-level errors → token-level biases (surface form)
|
| 154 |
+
High-level errors → semantic biases (topic/logic)
|
| 155 |
+
"""
|
| 156 |
+
bias = np.zeros(self.vocab_size, dtype=np.float64)
|
| 157 |
+
|
| 158 |
+
for ell in range(self.field.ngc.n_layers):
|
| 159 |
+
error = self.field.ngc.layers[ell].error
|
| 160 |
+
if np.linalg.norm(error) < 1e-10:
|
| 161 |
+
continue
|
| 162 |
+
|
| 163 |
+
# Project error into vocab space
|
| 164 |
+
layer_bias = self.projections[ell] @ error
|
| 165 |
+
bias += layer_bias
|
| 166 |
+
|
| 167 |
+
# Normalize by number of layers
|
| 168 |
+
bias /= max(self.field.ngc.n_layers, 1)
|
| 169 |
+
|
| 170 |
+
return bias
|
| 171 |
+
|
| 172 |
+
def __call__(self, input_ids, scores):
|
| 173 |
+
"""
|
| 174 |
+
Called at each decode step by model.generate().
|
| 175 |
+
|
| 176 |
+
1. Convert generated tokens → FHRR observation
|
| 177 |
+
2. Settle NGC circuit on this observation
|
| 178 |
+
3. If converged: project prediction errors into logit biases
|
| 179 |
+
4. If not: pass through unmodified
|
| 180 |
+
"""
|
| 181 |
+
self._step_count += 1
|
| 182 |
+
|
| 183 |
+
# Convert tokens to observation
|
| 184 |
+
obs = self._tokens_to_observation(input_ids)
|
| 185 |
+
|
| 186 |
+
# Settle NGC
|
| 187 |
+
settle_result = self.field.ngc.settle(obs, steps=self.max_settle_steps)
|
| 188 |
+
self._total_settle_steps += self.max_settle_steps
|
| 189 |
+
|
| 190 |
+
# Check convergence: did the energy stabilize?
|
| 191 |
+
energy_trace = settle_result["energy_trace"]
|
| 192 |
+
if len(energy_trace) >= 2:
|
| 193 |
+
energy_delta = abs(energy_trace[-1] - energy_trace[-2])
|
| 194 |
+
converged = energy_delta < self.energy_gate
|
| 195 |
+
else:
|
| 196 |
+
converged = False
|
| 197 |
+
|
| 198 |
+
if not converged:
|
| 199 |
+
return scores # Graceful fallback — native LLM behavior
|
| 200 |
+
|
| 201 |
+
# Query Hopfield memory with abstract state (top NGC layer)
|
| 202 |
+
abstract = self.field.ngc.get_abstract_state(level=-1)
|
| 203 |
+
retrieved, mem_energy = self.field.memory.retrieve(abstract, steps=3)
|
| 204 |
+
|
| 205 |
+
# Compute bias from prediction errors
|
| 206 |
+
bias = self._error_to_bias()
|
| 207 |
+
|
| 208 |
+
# Scale by inverse energy (lower energy = more confident = stronger bias)
|
| 209 |
+
current_energy = settle_result["final_energy"]
|
| 210 |
+
confidence = 1.0 / (1.0 + current_energy) # Sigmoid-like scaling
|
| 211 |
+
bias *= self.scale * confidence
|
| 212 |
+
|
| 213 |
+
# Clamp
|
| 214 |
+
np.clip(bias, -self.max_bias, self.max_bias, out=bias)
|
| 215 |
+
|
| 216 |
+
# Convert to torch and apply
|
| 217 |
+
bias_tensor = torch.tensor(bias, device=scores.device, dtype=scores.dtype)
|
| 218 |
+
|
| 219 |
+
self._emissions += 1
|
| 220 |
+
|
| 221 |
+
return scores + bias_tensor.unsqueeze(0)
|
| 222 |
+
|
| 223 |
+
@property
|
| 224 |
+
def statistics(self):
|
| 225 |
+
return {
|
| 226 |
+
"decode_steps": self._step_count,
|
| 227 |
+
"emissions": self._emissions,
|
| 228 |
+
"emission_rate": self._emissions / max(self._step_count, 1),
|
| 229 |
+
"total_settle_steps": self._total_settle_steps,
|
| 230 |
+
"avg_settle_per_decode": self._total_settle_steps / max(self._step_count, 1),
|
| 231 |
+
"ngc_energy": self.field.ngc.total_energy,
|
| 232 |
+
"memory_patterns": self.field.memory.n_patterns,
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
class V2ScoringBridge:
|
| 237 |
+
"""
|
| 238 |
+
Bridge between v2 architecture and the benchmark harness.
|
| 239 |
+
|
| 240 |
+
Converts a TaskSample's choices into FHRR observations,
|
| 241 |
+
runs the NGC circuit on each, and scores choices by
|
| 242 |
+
prediction error: lower error = better fit = higher score.
|
| 243 |
+
|
| 244 |
+
This replaces v1's flat Bayesian posterior scoring with
|
| 245 |
+
hierarchical predictive coding scoring.
|
| 246 |
+
"""
|
| 247 |
+
|
| 248 |
+
def __init__(self, field=None, obs_dim: int = 128,
|
| 249 |
+
hidden_dims: Optional[List[int]] = None):
|
| 250 |
+
from tensegrity.v2.field import UnifiedField
|
| 251 |
+
|
| 252 |
+
self.field = field or UnifiedField(
|
| 253 |
+
obs_dim=obs_dim,
|
| 254 |
+
hidden_dims=hidden_dims or [64, 16],
|
| 255 |
+
fhrr_dim=1024,
|
| 256 |
+
hopfield_beta=0.05,
|
| 257 |
+
ngc_settle_steps=20,
|
| 258 |
+
ngc_learning_rate=0.005,
|
| 259 |
+
)
|
| 260 |
+
|
| 261 |
+
def score_choices(self, prompt: str, choices: List[str]) -> Tuple[List[float], float]:
|
| 262 |
+
"""
|
| 263 |
+
Score each choice via v2 predictive coding.
|
| 264 |
+
|
| 265 |
+
For each choice:
|
| 266 |
+
1. Encode prompt as FHRR → settle NGC (establish context beliefs)
|
| 267 |
+
2. Encode prompt+choice as FHRR → settle NGC (observe with choice)
|
| 268 |
+
3. Score = negative prediction error (lower error = better fit)
|
| 269 |
+
|
| 270 |
+
Returns:
|
| 271 |
+
(scores, entropy) where scores[i] = score for choice i
|
| 272 |
+
"""
|
| 273 |
+
# First, establish context by observing the prompt
|
| 274 |
+
prompt_tokens = prompt.lower().split()[:32] # Cap at 32 tokens
|
| 275 |
+
if prompt_tokens:
|
| 276 |
+
self.field.observe(prompt_tokens, input_type="tokens")
|
| 277 |
+
|
| 278 |
+
# Score each choice by prediction error
|
| 279 |
+
scores = []
|
| 280 |
+
for choice in choices:
|
| 281 |
+
choice_tokens = (prompt + " " + choice).lower().split()[-32:]
|
| 282 |
+
|
| 283 |
+
# Create a fresh copy of the NGC state for counterfactual scoring
|
| 284 |
+
# (we don't want scoring one choice to affect scoring another)
|
| 285 |
+
saved_layers = [
|
| 286 |
+
(l.z.copy(), l.z_bar.copy(), l.error.copy())
|
| 287 |
+
for l in self.field.ngc.layers
|
| 288 |
+
]
|
| 289 |
+
|
| 290 |
+
# Observe the choice
|
| 291 |
+
fhrr_vec = self.field.encoder.encode_sequence(choice_tokens)
|
| 292 |
+
obs = self.field._fhrr_to_obs(fhrr_vec)
|
| 293 |
+
settle_result = self.field.ngc.settle(obs, steps=10)
|
| 294 |
+
|
| 295 |
+
# Score = negative energy (lower energy = better explanation)
|
| 296 |
+
score = -settle_result["final_energy"]
|
| 297 |
+
scores.append(score)
|
| 298 |
+
|
| 299 |
+
# Restore NGC state
|
| 300 |
+
for i, (z, z_bar, err) in enumerate(saved_layers):
|
| 301 |
+
self.field.ngc.layers[i].z = z
|
| 302 |
+
self.field.ngc.layers[i].z_bar = z_bar
|
| 303 |
+
self.field.ngc.layers[i].error = err
|
| 304 |
+
|
| 305 |
+
# Entropy of softmax(scores) for confidence estimation
|
| 306 |
+
scores_arr = np.array(scores)
|
| 307 |
+
shifted = scores_arr - scores_arr.max()
|
| 308 |
+
probs = np.exp(shifted) / np.exp(shifted).sum()
|
| 309 |
+
entropy = float(-np.sum(probs * np.log(probs + 1e-16)) / np.log(max(len(probs), 2)))
|
| 310 |
+
|
| 311 |
+
return scores, entropy
|
| 312 |
+
|
| 313 |
+
def reset(self):
|
| 314 |
+
"""Reset the field's NGC state between samples."""
|
| 315 |
+
self.field.ngc._initialized = False
|
| 316 |
+
self.field.ngc.layers = []
|
tests/test_v2_bench.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Test v2 scoring bridge against benchmarks.
|
| 3 |
+
"""
|
| 4 |
+
import sys
|
| 5 |
+
sys.path.insert(0, '/app')
|
| 6 |
+
import numpy as np
|
| 7 |
+
np.random.seed(42)
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def test_v2_scoring():
|
| 11 |
+
"""Test v2 NGC-based scoring on benchmark samples."""
|
| 12 |
+
print("=" * 60)
|
| 13 |
+
print("TEST: v2 NGC Scoring vs v1 Baseline on Sample Tasks")
|
| 14 |
+
print("=" * 60)
|
| 15 |
+
|
| 16 |
+
from tensegrity.v2.graft import V2ScoringBridge
|
| 17 |
+
from tensegrity.bench.tasks import load_task_samples
|
| 18 |
+
|
| 19 |
+
bridge = V2ScoringBridge(obs_dim=128, hidden_dims=[64, 16])
|
| 20 |
+
|
| 21 |
+
tasks = ["copa", "sciq", "arc_challenge"]
|
| 22 |
+
|
| 23 |
+
for task_name in tasks:
|
| 24 |
+
try:
|
| 25 |
+
samples = load_task_samples(task_name, max_samples=30)
|
| 26 |
+
except Exception as e:
|
| 27 |
+
print(f"\n {task_name}: SKIP ({e})")
|
| 28 |
+
continue
|
| 29 |
+
|
| 30 |
+
correct = 0
|
| 31 |
+
total = 0
|
| 32 |
+
|
| 33 |
+
for sample in samples:
|
| 34 |
+
bridge.reset()
|
| 35 |
+
scores, entropy = bridge.score_choices(sample.prompt, sample.choices)
|
| 36 |
+
pred = int(np.argmax(scores))
|
| 37 |
+
if pred == sample.gold:
|
| 38 |
+
correct += 1
|
| 39 |
+
total += 1
|
| 40 |
+
|
| 41 |
+
acc = correct / max(total, 1)
|
| 42 |
+
print(f"\n {task_name}: {correct}/{total} = {acc:.1%}")
|
| 43 |
+
|
| 44 |
+
print(f"\n ✓ v2 scoring bridge functional")
|
| 45 |
+
return True
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def test_causal_energy():
|
| 49 |
+
"""Test the causal energy term."""
|
| 50 |
+
print("\n" + "=" * 60)
|
| 51 |
+
print("TEST: Causal Energy Arena v2")
|
| 52 |
+
print("=" * 60)
|
| 53 |
+
|
| 54 |
+
from tensegrity.causal.scm import StructuralCausalModel
|
| 55 |
+
from tensegrity.v2.causal_energy import CausalArenaV2
|
| 56 |
+
|
| 57 |
+
# Two competing models
|
| 58 |
+
m_correct = StructuralCausalModel("correct")
|
| 59 |
+
m_correct.add_variable("X", n_values=3)
|
| 60 |
+
m_correct.add_variable("Y", n_values=3, parents=["X"])
|
| 61 |
+
|
| 62 |
+
m_wrong = StructuralCausalModel("wrong")
|
| 63 |
+
m_wrong.add_variable("X", n_values=3)
|
| 64 |
+
m_wrong.add_variable("Y", n_values=3) # No causal link
|
| 65 |
+
|
| 66 |
+
# Train correct model on data where X causes Y
|
| 67 |
+
data = m_correct.sample(100)
|
| 68 |
+
m_correct.update_from_data(data)
|
| 69 |
+
m_wrong.update_from_data(data)
|
| 70 |
+
|
| 71 |
+
arena = CausalArenaV2(precision=1.0, beta=2.0)
|
| 72 |
+
arena.register(m_correct)
|
| 73 |
+
arena.register(m_wrong)
|
| 74 |
+
|
| 75 |
+
# Test on 20 observations
|
| 76 |
+
test_data = m_correct.sample(20)
|
| 77 |
+
winners = []
|
| 78 |
+
for obs in test_data:
|
| 79 |
+
result = arena.compete(obs)
|
| 80 |
+
winners.append(result["winner"])
|
| 81 |
+
arena.update_models(obs)
|
| 82 |
+
|
| 83 |
+
correct_wins = sum(1 for w in winners if w == "correct")
|
| 84 |
+
print(f" Correct model wins: {correct_wins}/{len(winners)}")
|
| 85 |
+
print(f" Final tension: {arena.tension:.3f}")
|
| 86 |
+
|
| 87 |
+
# Energy comparison
|
| 88 |
+
last_result = arena.compete(test_data[-1])
|
| 89 |
+
print(f" Last energies: {last_result['energies']}")
|
| 90 |
+
print(f" Last posteriors: {last_result['posteriors']}")
|
| 91 |
+
|
| 92 |
+
print(f" ✓ Causal energy arena functional")
|
| 93 |
+
return True
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
if __name__ == "__main__":
|
| 97 |
+
tests = [
|
| 98 |
+
("v2 Scoring", test_v2_scoring),
|
| 99 |
+
("Causal Energy", test_causal_energy),
|
| 100 |
+
]
|
| 101 |
+
|
| 102 |
+
print("\n" + "█" * 60)
|
| 103 |
+
print(" v2 Integration Tests")
|
| 104 |
+
print("█" * 60)
|
| 105 |
+
|
| 106 |
+
for name, fn in tests:
|
| 107 |
+
try:
|
| 108 |
+
fn()
|
| 109 |
+
except Exception as e:
|
| 110 |
+
print(f"\n ✗ {name} FAILED: {e}")
|
| 111 |
+
import traceback; traceback.print_exc()
|
| 112 |
+
|
| 113 |
+
print()
|