feat: Self-tuning engine — Friston precisions, Dirichlet channels, joint settling, structured projection

#2
tensegrity/engine/unified_field.py CHANGED
@@ -192,10 +192,22 @@ class UnifiedField:
192
  # FHRR encoder
193
  self.encoder = FHRREncoder(dim=fhrr_dim)
194
 
195
- # Random projection: FHRR (complex, fhrr_dim) → real (obs_dim)
196
- # Fixed, not learned this is the sensory transduction
197
- rng = np.random.RandomState(42)
198
- self._proj = rng.randn(obs_dim, fhrr_dim).astype(np.float64) / np.sqrt(fhrr_dim)
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
  # NGC circuit: hierarchical predictive coding
201
  layer_sizes = [obs_dim] + hidden_dims
@@ -215,9 +227,22 @@ class UnifiedField:
215
  self.energy_history: Deque[EnergyDecomposition] = deque(maxlen=max(1, int(energy_history_maxlen)))
216
 
217
  def _fhrr_to_obs(self, fhrr_vec: np.ndarray) -> np.ndarray:
218
- """Project FHRR complex vector to real observation space."""
 
 
 
 
 
 
219
  real_part = np.real(fhrr_vec).astype(np.float64)
220
- return self._proj @ real_part
 
 
 
 
 
 
 
221
 
222
  def observe(self, raw_input: Any, input_type: str = "numeric") -> Dict[str, Any]:
223
  """
@@ -258,13 +283,16 @@ class UnifiedField:
258
  settle_result = self.ngc.settle(obs_vec)
259
  perception_energy = settle_result["final_energy"]
260
 
261
- prediction_error_post_settle = self.ngc.prediction_error(obs_vec)
262
-
263
- # === 4. REMEMBER: query Hopfield with abstract state ===
 
 
 
264
  abstract_state = self.ngc.get_abstract_state(level=-1)
265
  retrieved, memory_energy = self.memory.retrieve(abstract_state)
266
 
267
- # Compute memory consistency: how similar is this observation to stored patterns?
268
  abstract_norm = np.linalg.norm(abstract_state)
269
  retrieved_norm = np.linalg.norm(retrieved)
270
  if abstract_norm > 1e-8 and retrieved_norm > 1e-8:
@@ -273,6 +301,32 @@ class UnifiedField:
273
  else:
274
  memory_similarity = 0.0
275
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  # === 5. LEARN: Precision-modulated Hebbian update ===
277
  # Learning modulation: high when observation is consistent with memory,
278
  # low when it contradicts stored patterns.
 
192
  # FHRR encoder
193
  self.encoder = FHRREncoder(dim=fhrr_dim)
194
 
195
+ # Structure-preserving projection: FHRR (complex, fhrr_dim) → real (obs_dim)
196
+ # Instead of a random matrix that destroys semantic structure, we use
197
+ # a fixed projection derived from the FHRR basis itself. The real part
198
+ # of the FHRR vector is sliced/averaged into obs_dim buckets. This
199
+ # preserves the phasor structure: similar FHRR vectors → similar obs.
200
+ #
201
+ # For obs_dim < fhrr_dim: average adjacent blocks of size fhrr_dim/obs_dim.
202
+ # For obs_dim >= fhrr_dim: pad with zeros (rare in practice).
203
+ self._proj_mode = "structured"
204
+ if obs_dim <= fhrr_dim:
205
+ # Structured averaging: each obs dimension = mean of a block of FHRR dims
206
+ self._proj_block_size = fhrr_dim // obs_dim
207
+ self._proj_remainder = fhrr_dim % obs_dim
208
+ else:
209
+ self._proj_block_size = 1
210
+ self._proj_remainder = 0
211
 
212
  # NGC circuit: hierarchical predictive coding
213
  layer_sizes = [obs_dim] + hidden_dims
 
227
  self.energy_history: Deque[EnergyDecomposition] = deque(maxlen=max(1, int(energy_history_maxlen)))
228
 
229
  def _fhrr_to_obs(self, fhrr_vec: np.ndarray) -> np.ndarray:
230
+ """Project FHRR complex vector to real observation space.
231
+
232
+ Uses structure-preserving block averaging instead of random projection.
233
+ Each obs dimension = mean of a contiguous block of FHRR real components.
234
+ This preserves semantic similarity: if two FHRR vectors have similar
235
+ phasor angles, their block averages will also be similar.
236
+ """
237
  real_part = np.real(fhrr_vec).astype(np.float64)
238
+ bs = self._proj_block_size
239
+ obs = np.zeros(self.obs_dim, dtype=np.float64)
240
+ for i in range(self.obs_dim):
241
+ start = i * bs
242
+ end = min(start + bs, len(real_part))
243
+ if start < len(real_part):
244
+ obs[i] = np.mean(real_part[start:end])
245
+ return obs
246
 
247
  def observe(self, raw_input: Any, input_type: str = "numeric") -> Dict[str, Any]:
248
  """
 
283
  settle_result = self.ngc.settle(obs_vec)
284
  perception_energy = settle_result["final_energy"]
285
 
286
+ # === 4. JOINT SETTLING: Hopfield retrieval feeds back into NGC ===
287
+ # This closes the loop that was previously sequential:
288
+ # settle NGC query Hopfield DONE (old: pipeline)
289
+ # Now: settle NGC → query Hopfield → inject memory → re-settle NGC
290
+ # The second settle integrates memory evidence, making the energy
291
+ # decomposition genuinely joint rather than a sequential pipeline.
292
  abstract_state = self.ngc.get_abstract_state(level=-1)
293
  retrieved, memory_energy = self.memory.retrieve(abstract_state)
294
 
295
+ # Compute memory consistency
296
  abstract_norm = np.linalg.norm(abstract_state)
297
  retrieved_norm = np.linalg.norm(retrieved)
298
  if abstract_norm > 1e-8 and retrieved_norm > 1e-8:
 
301
  else:
302
  memory_similarity = 0.0
303
 
304
+ # Memory-guided re-settle: blend retrieved memory into top NGC layer
305
+ # and re-settle to integrate memory evidence into the full hierarchy.
306
+ # The blend weight is derived from memory_similarity itself:
307
+ # high similarity → strong blend (memory confirms), low → weak blend.
308
+ if self.memory.n_patterns > 2 and retrieved_norm > 1e-8:
309
+ # Blend weight = sigmoid(memory_similarity * 3) clamped to [0, 0.5]
310
+ # This means memory can provide up to 50% of the top-layer state,
311
+ # but only when it strongly matches the current abstract state.
312
+ blend = float(1.0 / (1.0 + np.exp(-3.0 * memory_similarity)))
313
+ blend = min(blend, 0.5)
314
+
315
+ # Inject retrieved memory into the top NGC layer
316
+ top_layer = self.ngc.layers[-1]
317
+ top_layer.z = (1.0 - blend) * top_layer.z + blend * retrieved
318
+
319
+ # Re-settle with memory evidence integrated
320
+ # Use fewer steps since we're refining, not starting from scratch
321
+ re_settle = self.ngc.settle(obs_vec, steps=max(3, self.ngc.settle_steps // 3))
322
+ perception_energy = re_settle["final_energy"]
323
+
324
+ # Re-query Hopfield with the refined abstract state
325
+ abstract_state = self.ngc.get_abstract_state(level=-1)
326
+ retrieved, memory_energy = self.memory.retrieve(abstract_state)
327
+
328
+ prediction_error_post_settle = self.ngc.prediction_error(obs_vec)
329
+
330
  # === 5. LEARN: Precision-modulated Hebbian update ===
331
  # Learning modulation: high when observation is consistent with memory,
332
  # low when it contradicts stored patterns.
tensegrity/pipeline/canonical.py CHANGED
@@ -130,14 +130,15 @@ class CanonicalPipeline:
130
  model_name: str = "meta-llama/Llama-3.2-1B-Instruct",
131
  # Loop budget
132
  max_iterations: int = 4,
133
- # Convergence: top1/top2 ratio above which we commit. Default 2.0
134
- # means the leader must be at least twice the runner-up in mass.
135
  commit_ratio: float = 2.0,
136
  # Falsification: how many NGC steps to settle each choice for the
137
  # top-down-predict-the-prompt operation.
138
  falsify_settle_steps: int = 20,
139
- # Bayesian update strength when integrating falsification likelihood
140
- # into the controller's hypothesis posteriors.
 
141
  falsify_update_strength: float = 1.0,
142
  # Energy-arena precision (passed through to CausalEnergyTerm).
143
  energy_arena_precision: float = 1.0,
@@ -151,8 +152,6 @@ class CanonicalPipeline:
151
  # Persistent episodic recall enters as a memory-evidence channel.
152
  memory_evidence_weight: float = 0.75,
153
  # SBERT sentence similarity enters as a semantic-evidence channel.
154
- # This is the strongest signal source: it compares the prompt against
155
- # each (prompt+choice) concatenation using frozen sentence embeddings.
156
  sbert_evidence_weight: float = 0.8,
157
  feedback_learning_rate: float = 1.0,
158
  persistent_state_path: Optional[str] = None,
@@ -163,11 +162,32 @@ class CanonicalPipeline:
163
  self.falsify_settle_steps = int(falsify_settle_steps)
164
  self.falsify_update_strength = float(falsify_update_strength)
165
  self.max_hypotheses = max(2, int(max_hypotheses))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  self.llm_evidence_weight = float(llm_evidence_weight)
167
  self.memory_evidence_weight = float(memory_evidence_weight)
168
  self.sbert_evidence_weight = float(sbert_evidence_weight)
169
- self.feedback_learning_rate = float(feedback_learning_rate)
170
- self.persistent_state_path = persistent_state_path
171
 
172
  initial_labels = list(hypothesis_labels or [])
173
  while len(initial_labels) < self.max_hypotheses:
@@ -539,6 +559,78 @@ class CanonicalPipeline:
539
  return top > 0
540
  return top >= ratio * second
541
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542
  # ---------- main entry: score one item ----------
543
 
544
  def score_multichoice(
@@ -602,21 +694,32 @@ class CanonicalPipeline:
602
  )
603
 
604
  # 3. Bayesian update of controller's hypothesis posteriors:
605
- # new_p_i ∝ old_p_i * exp(strength * z(falsify_i)) * energy_post_i.
 
 
606
  old_belief = self._belief_from_controller(n)
607
  fz = self._znorm(falsify)
608
  lz = self._znorm(linguistic)
609
  mz = self._znorm(memory_scores)
610
  sz = self._znorm(sbert_scores)
611
- log_lik_falsify = self.falsify_update_strength * fz
 
612
  log_post = (
613
  np.log(np.maximum(old_belief, 1e-12))
614
- + log_lik_falsify
615
- + self.llm_evidence_weight * lz
616
- + self.memory_evidence_weight * mz
617
- + self.sbert_evidence_weight * sz
618
- + np.log(np.maximum(energy_post, 1e-12))
619
  )
 
 
 
 
 
 
 
 
620
  log_post -= log_post.max()
621
  new_belief = np.exp(log_post)
622
  sb = new_belief.sum()
@@ -657,7 +760,12 @@ class CanonicalPipeline:
657
  top_p=top_p,
658
  ))
659
 
660
- if self._converged(new_belief, self.commit_ratio):
 
 
 
 
 
661
  converged = True
662
  break
663
 
@@ -665,6 +773,9 @@ class CanonicalPipeline:
665
  final_belief = self._belief_from_controller(n)
666
  committed_idx = int(np.argmax(final_belief))
667
 
 
 
 
668
  # Calibrated score for the harness: belief shifted away from uniform,
669
  # bounded in [-1, 1]. Comparable in magnitude to the previous z-scored
670
  # outputs; the harness's confidence-gated blending stays sane.
@@ -813,6 +924,24 @@ class CanonicalPipeline:
813
  return {"learned": False, "reason": "invalid sample"}
814
 
815
  correct = int(committed_idx) == int(sample.gold)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
816
  field = self.controller.agent.field
817
  prompt_fhrr = self._encode_text_fhrr(sample.prompt, max_tokens=96)
818
  correct_fhrr = self._encode_text_fhrr(
 
130
  model_name: str = "meta-llama/Llama-3.2-1B-Instruct",
131
  # Loop budget
132
  max_iterations: int = 4,
133
+ # Convergence is now self-tuning: derived from belief entropy dynamics.
134
+ # commit_ratio is kept as an initial value but will be overridden.
135
  commit_ratio: float = 2.0,
136
  # Falsification: how many NGC steps to settle each choice for the
137
  # top-down-predict-the-prompt operation.
138
  falsify_settle_steps: int = 20,
139
+ # These weights are now INITIAL values for the Dirichlet channel
140
+ # reliability tracker. They will be dynamically updated based on each
141
+ # channel's prediction accuracy. The system auto-tunes them.
142
  falsify_update_strength: float = 1.0,
143
  # Energy-arena precision (passed through to CausalEnergyTerm).
144
  energy_arena_precision: float = 1.0,
 
152
  # Persistent episodic recall enters as a memory-evidence channel.
153
  memory_evidence_weight: float = 0.75,
154
  # SBERT sentence similarity enters as a semantic-evidence channel.
 
 
155
  sbert_evidence_weight: float = 0.8,
156
  feedback_learning_rate: float = 1.0,
157
  persistent_state_path: Optional[str] = None,
 
162
  self.falsify_settle_steps = int(falsify_settle_steps)
163
  self.falsify_update_strength = float(falsify_update_strength)
164
  self.max_hypotheses = max(2, int(max_hypotheses))
165
+ self.feedback_learning_rate = float(feedback_learning_rate)
166
+ self.persistent_state_path = persistent_state_path
167
+
168
+ # --- Dirichlet channel reliability tracking ---
169
+ # Instead of fixed weights, each evidence channel has a Dirichlet
170
+ # pseudo-count that grows when the channel's top-ranked choice matches
171
+ # the committed belief (cross-channel agreement) or the gold label
172
+ # (post-feedback). Fusion weights = normalized counts.
173
+ #
174
+ # This is the VFE-minimizing closed form from pymdp:
175
+ # α* = α₀ + Σ_t obs_t ⊗ qs_t
176
+ # where α₀ is the initial prior strength.
177
+ #
178
+ # Channels: falsify, llm, memory, sbert, energy_arena
179
+ self._channel_names = ["falsify", "llm", "memory", "sbert", "energy"]
180
+ self._channel_alpha = {
181
+ "falsify": float(falsify_update_strength),
182
+ "llm": float(llm_evidence_weight),
183
+ "memory": float(memory_evidence_weight),
184
+ "sbert": float(sbert_evidence_weight),
185
+ "energy": float(energy_arena_beta),
186
+ }
187
+ # Expose derived weights (computed from alpha each call)
188
  self.llm_evidence_weight = float(llm_evidence_weight)
189
  self.memory_evidence_weight = float(memory_evidence_weight)
190
  self.sbert_evidence_weight = float(sbert_evidence_weight)
 
 
191
 
192
  initial_labels = list(hypothesis_labels or [])
193
  while len(initial_labels) < self.max_hypotheses:
 
559
  return top > 0
560
  return top >= ratio * second
561
 
562
+ def _channel_weights(self) -> Dict[str, float]:
563
+ """Compute normalized fusion weights from Dirichlet pseudo-counts.
564
+
565
+ weights_m = alpha_m / sum(alpha)
566
+
567
+ This is the expected value of the Dirichlet posterior over channel
568
+ reliabilities. As channels accumulate evidence of correctness,
569
+ their weight grows; unreliable channels fade toward zero.
570
+ """
571
+ total = sum(self._channel_alpha.values())
572
+ if total <= 0:
573
+ n = len(self._channel_names)
574
+ return {c: 1.0 / n for c in self._channel_names}
575
+ return {c: self._channel_alpha[c] / total for c in self._channel_names}
576
+
577
+ def _update_channel_reliability(
578
+ self, channel_scores: Dict[str, np.ndarray], committed_idx: int, n: int
579
+ ) -> None:
580
+ """Update Dirichlet pseudo-counts via cross-channel agreement.
581
+
582
+ Each channel earns pseudo-counts when its top-ranked choice agrees
583
+ with other channels. This is the consensus-based reliability update
584
+ from the IterativeCognitiveScorer, elevated to the canonical pipeline.
585
+
586
+ After feedback (gold label revealed), the channel that ranked the
587
+ gold answer highest gets a bonus pseudo-count — this is the
588
+ VFE-minimizing Dirichlet update from pymdp.
589
+ """
590
+ if n < 2:
591
+ return
592
+
593
+ # Get each channel's top pick
594
+ picks = {}
595
+ for name, scores in channel_scores.items():
596
+ if scores is not None and len(scores) >= n:
597
+ s = scores[:n]
598
+ if np.any(np.abs(s) > 1e-12):
599
+ picks[name] = int(np.argmax(s))
600
+
601
+ if len(picks) < 2:
602
+ return
603
+
604
+ # Cross-channel agreement: each channel gets credit for agreeing
605
+ # with others. This is NOT self-fulfilling — the anchor is the
606
+ # consensus structure, not any single channel.
607
+ for name_i, pick_i in picks.items():
608
+ agreements = sum(1 for name_j, pick_j in picks.items()
609
+ if name_j != name_i and pick_j == pick_i)
610
+ if agreements > 0:
611
+ credit = float(agreements) / max(len(picks) - 1, 1)
612
+ self._channel_alpha[name_i] += credit * 0.1 # slow accumulation
613
+
614
+ def _adaptive_commit_ratio(self, belief: np.ndarray) -> float:
615
+ """Derive the convergence commit ratio from belief entropy dynamics.
616
+
617
+ Instead of a fixed commit_ratio=2.0, the threshold adapts:
618
+ - When entropy is high (uniform beliefs), require higher separation (more cautious)
619
+ - When entropy is low (concentrated beliefs), require less separation (confident)
620
+
621
+ commit_ratio = 1.5 + entropy * 1.5
622
+ At max entropy (1.0): ratio = 3.0 (very cautious)
623
+ At min entropy (0.0): ratio = 1.5 (quick commit)
624
+ """
625
+ n = len(belief)
626
+ if n < 2:
627
+ return self.commit_ratio
628
+ nz = belief[belief > 0]
629
+ if len(nz) < 2:
630
+ return 1.5
631
+ entropy = float(-np.sum(nz * np.log(nz)) / np.log(n))
632
+ return 1.5 + entropy * 1.5
633
+
634
  # ---------- main entry: score one item ----------
635
 
636
  def score_multichoice(
 
694
  )
695
 
696
  # 3. Bayesian update of controller's hypothesis posteriors:
697
+ # new_p_i ∝ old_p_i * exp(w_c * z(channel_c_i)) for each channel c.
698
+ # Channel weights w_c are derived from Dirichlet pseudo-counts,
699
+ # not hardcoded — they auto-tune based on reliability.
700
  old_belief = self._belief_from_controller(n)
701
  fz = self._znorm(falsify)
702
  lz = self._znorm(linguistic)
703
  mz = self._znorm(memory_scores)
704
  sz = self._znorm(sbert_scores)
705
+
706
+ w = self._channel_weights()
707
  log_post = (
708
  np.log(np.maximum(old_belief, 1e-12))
709
+ + w["falsify"] * fz
710
+ + w["llm"] * lz
711
+ + w["memory"] * mz
712
+ + w["sbert"] * sz
713
+ + w["energy"] * np.log(np.maximum(energy_post, 1e-12))
714
  )
715
+
716
+ # Track per-channel scores for reliability update
717
+ _channel_scores = {
718
+ "falsify": falsify, "llm": linguistic,
719
+ "memory": memory_scores, "sbert": sbert_scores,
720
+ "energy": energy_post,
721
+ }
722
+ self._last_channel_scores_iter = _channel_scores
723
  log_post -= log_post.max()
724
  new_belief = np.exp(log_post)
725
  sb = new_belief.sum()
 
760
  top_p=top_p,
761
  ))
762
 
763
+ # Update channel reliability via cross-channel agreement
764
+ self._update_channel_reliability(_channel_scores, top_idx, n)
765
+
766
+ # Adaptive convergence: commit ratio derived from belief entropy
767
+ adaptive_ratio = self._adaptive_commit_ratio(new_belief)
768
+ if self._converged(new_belief, adaptive_ratio):
769
  converged = True
770
  break
771
 
 
773
  final_belief = self._belief_from_controller(n)
774
  committed_idx = int(np.argmax(final_belief))
775
 
776
+ # Save last channel scores for gold-label Dirichlet update in learn_from_feedback
777
+ self._last_channel_scores = getattr(self, '_last_channel_scores_iter', {})
778
+
779
  # Calibrated score for the harness: belief shifted away from uniform,
780
  # bounded in [-1, 1]. Comparable in magnitude to the previous z-scored
781
  # outputs; the harness's confidence-gated blending stays sane.
 
924
  return {"learned": False, "reason": "invalid sample"}
925
 
926
  correct = int(committed_idx) == int(sample.gold)
927
+ # --- Dirichlet channel reliability update from gold label ---
928
+ # This is the VFE-minimizing update: channels that ranked the gold
929
+ # answer higher get more pseudo-counts. This is the ONLY place where
930
+ # external supervision enters the channel weighting system.
931
+ # The update is: α_m += correctness_score_m (how well channel m
932
+ # ranked the gold answer relative to its ranking of other choices).
933
+ if hasattr(self, '_last_channel_scores') and self._last_channel_scores:
934
+ for name, scores in self._last_channel_scores.items():
935
+ if scores is not None and len(scores) >= n and sample.gold < n:
936
+ s = scores[:n]
937
+ s_range = float(np.max(s) - np.min(s))
938
+ if s_range > 1e-12:
939
+ # How well did this channel rank the gold answer?
940
+ # Normalized to [0, 1]: 1 = gold was ranked highest
941
+ gold_rank_score = float((s[sample.gold] - np.min(s)) / s_range)
942
+ else:
943
+ gold_rank_score = 1.0 / n # no discrimination
944
+ self._channel_alpha[name] += gold_rank_score * 0.5
945
  field = self.controller.agent.field
946
  prompt_fhrr = self._encode_text_fhrr(sample.prompt, max_tokens=96)
947
  correct_fhrr = self._encode_text_fhrr(