feat: Self-tuning engine — Friston precisions, Dirichlet channels, joint settling, structured projection (#2)

- feat: self-tuning unified_field.py (d9cf00b3acd6ac798ffe552aca8276f284b9cd15)
- feat: self-tuning ngc.py (e6f258cb86a70347cf9b9a562ab1a9b793818432)
- feat: self-tuning canonical.py (bb45ec3d6d837f759d8f2cd94f04fb7e18f19588)

Files changed (3) hide show

tensegrity/engine/ngc.py +82 -20
tensegrity/engine/unified_field.py +64 -10
tensegrity/pipeline/canonical.py +145 -16

tensegrity/engine/ngc.py CHANGED Viewed

@@ -93,7 +93,12 @@ class PredictiveCodingCircuit:
                  precision_momentum: float = 0.9,
                  precision_min: float = 0.1,
                  precision_max: float = 100.0,
-                 max_history_length: int = 2000):
         """
         Args:
             layer_sizes: [dim_sensory, dim_hidden1, ..., dim_top]
@@ -102,15 +107,28 @@ class PredictiveCodingCircuit:
                        If None, defaults to 1.0 everywhere. Length must equal ``n_layers`` when given.
             tau: Membrane time constant (settling speed)
             gamma: State decay rate (leaky integration)
-            settle_steps: How many steps to run before declaring convergence
             settle_steps_warm: Steps when the observation is nearly unchanged (warm-started z)
-            obs_change_threshold: L2 change above this triggers full settle_steps
             learning_rate: Hebbian learning rate for synaptic updates
             activation: Nonlinearity: "tanh", "relu", "sigmoid", or "linear"
-            adaptive_precision: If True, update precisions from prediction-error variance in learn()
             precision_momentum: EMA factor for precision updates (higher = slower change)
             precision_min / precision_max: Clamp learned precisions
             max_history_length: Max entries retained in energy / error history (ring buffer)
         """
         self.n_layers = len(layer_sizes)
         self.layer_sizes = layer_sizes
@@ -126,19 +144,29 @@ class PredictiveCodingCircuit:
         self.precision_max = precision_max
         self.max_history_length = max(1, int(max_history_length))
         self.activation = activation
         # Activation function
         self._phi, self._phi_deriv = self._get_activation(activation)
-        # Precisions (per layer): ρ = 1/σ²
         if precisions is None:
             self.precisions = [1.0] * self.n_layers
         else:
             if len(precisions) != self.n_layers:
                 raise ValueError(
                     f"precisions must have length n_layers={self.n_layers}, got {len(precisions)}"
                 )
             self.precisions = list(precisions)
         # Generative weights W[ℓ]: maps layer ℓ+1 → prediction of layer ℓ
         # W[ℓ] has shape (layer_sizes[ℓ], layer_sizes[ℓ+1])
@@ -281,6 +309,9 @@ class PredictiveCodingCircuit:
         if steps is not None:
             n_steps = steps
         elif not self._initialized:
             n_steps = self.settle_steps
         elif obs_changed:
@@ -336,6 +367,13 @@ class PredictiveCodingCircuit:
             energy_trace.append(total_energy)
             error_norms.append(step_error_norms)
         self.energy_history.append(energy_trace[-1])
         self.error_history.append(error_norms[-1])
@@ -351,18 +389,23 @@ class PredictiveCodingCircuit:
     def learn(self, modulation: float = 1.0):
         """
-        Hebbian synaptic update after settling.
         ΔWℓ = modulation * lr * (e^{ℓ-1} · (φ(z^ℓ))ᵀ)
-        The modulation parameter gates learning: when the current observation
-        is inconsistent with established beliefs (high prediction error +
-        low memory similarity), modulation should be low, preventing the
-        system from learning from contradictory evidence.
-        This is precision-weighted Hebbian learning: the effective learning
-        rate is lr * modulation, where modulation encodes the system's
-        confidence that this observation is trustworthy.
         """
         effective_lr = self.lr * modulation
@@ -370,25 +413,41 @@ class PredictiveCodingCircuit:
             for ell in range(self.n_layers):
                 residual = self.layers[ell].z - self.layers[ell].z_bar
                 sq_error = float(np.mean(residual ** 2))
-                target_precision = 1.0 / max(sq_error, 1e-6)
-                mom = self.precision_momentum
-                self.precisions[ell] = mom * self.precisions[ell] + (1.0 - mom) * target_precision
-                self.precisions[ell] = float(
-                    np.clip(self.precisions[ell], self.precision_min, self.precision_max)
                 )
                 self.layers[ell].precision = self.precisions[ell]
         for ell in range(self.n_layers - 1):
             error_below = self.layers[ell].error
             z_above = self._phi(self.layers[ell + 1].z)
             # Generative weight update: Hebbian + decay
             dW = np.outer(error_below, z_above)
-            self.W[ell] += effective_lr * dW - effective_lr * self.gamma * self.W[ell]
             # Feedback weight update
             dE = np.outer(self.layers[ell + 1].z, error_below)
-            self.E[ell] += effective_lr * dE - effective_lr * self.gamma * self.E[ell]
             # Spectral normalization (power iteration — cheaper than full SVD)
             w_norm = _spectral_norm_power_iteration(self.W[ell])
@@ -441,6 +500,7 @@ class PredictiveCodingCircuit:
             "W": [w.copy() for w in self.W],
             "E": [e.copy() for e in self.E],
             "precisions": list(self.precisions),
             "_initialized": self._initialized,
             "_last_obs": None if self._last_obs is None else self._last_obs.copy(),
         }
@@ -448,6 +508,8 @@ class PredictiveCodingCircuit:
     def restore_state(self, state: Dict[str, Any]) -> None:
         """Restore from ``save_state()``."""
         self.precisions = list(state["precisions"])
         self.W = [w.copy() for w in state["W"]]
         self.E = [e.copy() for e in state["E"]]
         self._initialized = bool(state["_initialized"])

                  precision_momentum: float = 0.9,
                  precision_min: float = 0.1,
                  precision_max: float = 100.0,
+                 max_history_length: int = 2000,
+                 # --- Self-tuning settle parameters ---
+                 adaptive_settle: bool = True,
+                 settle_convergence_threshold: float = 0.01,
+                 settle_min_steps: int = 5,
+                 settle_max_steps: int = 100):
         """
         Args:
             layer_sizes: [dim_sensory, dim_hidden1, ..., dim_top]
                        If None, defaults to 1.0 everywhere. Length must equal ``n_layers`` when given.
             tau: Membrane time constant (settling speed)
             gamma: State decay rate (leaky integration)
+            settle_steps: Default settling steps (used as fallback if adaptive_settle=False)
             settle_steps_warm: Steps when the observation is nearly unchanged (warm-started z)
+            obs_change_threshold: L2 change above this triggers full settle
             learning_rate: Hebbian learning rate for synaptic updates
             activation: Nonlinearity: "tanh", "relu", "sigmoid", or "linear"
+            adaptive_precision: If True, update precisions from prediction-error variance
+                via Friston's log-precision gradient (Millidge et al. 2021, Eq 20-22):
+                    dΣ/dt = ε̃·ε̃ᵀ − Σ⁻¹
+                At fixed point, Σ_l = Var[ε̃] → precision = 1/Var[ε̃].
+                Implemented as EMA: Σ_l ← (1-α)·Σ_l + α·mean(ε²)
             precision_momentum: EMA factor for precision updates (higher = slower change)
             precision_min / precision_max: Clamp learned precisions
             max_history_length: Max entries retained in energy / error history (ring buffer)
+            adaptive_settle: If True, settle until energy convergence instead of fixed steps.
+                The system monitors ||E_t - E_{t-1}|| and stops when the energy
+                change drops below settle_convergence_threshold, bounded by
+                [settle_min_steps, settle_max_steps]. This replaces the fixed
+                settle_steps parameter with a self-tuning criterion derived from
+                the system's own dynamics.
+            settle_convergence_threshold: Energy change threshold for early stopping
+            settle_min_steps: Minimum settling steps (even if converged)
+            settle_max_steps: Maximum settling steps (hard ceiling)
         """
         self.n_layers = len(layer_sizes)
         self.layer_sizes = layer_sizes
         self.precision_max = precision_max
         self.max_history_length = max(1, int(max_history_length))
         self.activation = activation
+        self.adaptive_settle = adaptive_settle
+        self.settle_convergence_threshold = settle_convergence_threshold
+        self.settle_min_steps = max(1, int(settle_min_steps))
+        self.settle_max_steps = max(self.settle_min_steps, int(settle_max_steps))
         # Activation function
         self._phi, self._phi_deriv = self._get_activation(activation)
+        # --- Friston log-precision state (per-layer) ---
+        # γ_l = log(precision_l). Updated via gradient descent on VFE:
+        #   F_γ = 0.5·(mean(ε̃²) − 1)  (Millidge Eq 21)
+        # At fixed point: precision = 1/Var[ε] = exp(γ)
+        # Initialized to log(1.0) = 0.0 (unit precision = maximum uncertainty)
         if precisions is None:
             self.precisions = [1.0] * self.n_layers
+            self._log_precisions = [0.0] * self.n_layers
         else:
             if len(precisions) != self.n_layers:
                 raise ValueError(
                     f"precisions must have length n_layers={self.n_layers}, got {len(precisions)}"
                 )
             self.precisions = list(precisions)
+            self._log_precisions = [float(np.log(max(p, 1e-8))) for p in precisions]
         # Generative weights W[ℓ]: maps layer ℓ+1 → prediction of layer ℓ
         # W[ℓ] has shape (layer_sizes[ℓ], layer_sizes[ℓ+1])
         if steps is not None:
             n_steps = steps
+        elif self.adaptive_settle:
+            # Adaptive: we'll settle until convergence, bounded by min/max
+            n_steps = self.settle_max_steps
         elif not self._initialized:
             n_steps = self.settle_steps
         elif obs_changed:
             energy_trace.append(total_energy)
             error_norms.append(step_error_norms)
+            # --- Adaptive settle: early exit when energy converges ---
+            if self.adaptive_settle and steps is None and step >= self.settle_min_steps - 1:
+                if len(energy_trace) >= 2:
+                    delta_e = abs(energy_trace[-1] - energy_trace[-2])
+                    if delta_e < self.settle_convergence_threshold:
+                        break
         self.energy_history.append(energy_trace[-1])
         self.error_history.append(error_norms[-1])
     def learn(self, modulation: float = 1.0):
         """
+        Hebbian synaptic update after settling, with Friston precision update.
         ΔWℓ = modulation * lr * (e^{ℓ-1} · (φ(z^ℓ))ᵀ)
+        Precision update (Millidge et al. 2021, Eq 20-22):
+            dΣ/dt = ε̃·ε̃ᵀ − Σ⁻¹
+            At fixed point: Σ_l = Var[ε̃] → precision = 1/Var[ε̃]
+        Implemented in log-space for numerical stability:
+            γ_l = log(precision_l)
+            F_γ = 0.5 · (mean(ε̃²) − 1)    (gradient of VFE w.r.t. log-precision)
+            γ_l ← γ_l − lr_precision · F_γ
+        The learning rate for Hebbian weights is precision-scaled:
+            η_eff = lr · modulation · precision_l
+        This is the natural gradient preconditioning from Friston's theory:
+        more precise layers learn faster because their errors are more trustworthy.
         """
         effective_lr = self.lr * modulation
             for ell in range(self.n_layers):
                 residual = self.layers[ell].z - self.layers[ell].z_bar
                 sq_error = float(np.mean(residual ** 2))
+                # Friston log-precision gradient: F_γ = 0.5·(precision·mean(ε²) − 1)
+                # At fixed point: precision·Var[ε] = 1 → precision = 1/Var[ε]
+                current_prec = self.precisions[ell]
+                f_gamma = 0.5 * (current_prec * sq_error - 1.0)
+                # Update log-precision via gradient descent
+                # lr_precision = 0.1 · (1-momentum) to match EMA time constant
+                lr_precision = 0.1 * (1.0 - self.precision_momentum)
+                self._log_precisions[ell] -= lr_precision * f_gamma
+                # Clamp and exponentiate
+                log_min = np.log(max(self.precision_min, 1e-8))
+                log_max = np.log(self.precision_max)
+                self._log_precisions[ell] = float(
+                    np.clip(self._log_precisions[ell], log_min, log_max)
                 )
+                self.precisions[ell] = float(np.exp(self._log_precisions[ell]))
                 self.layers[ell].precision = self.precisions[ell]
         for ell in range(self.n_layers - 1):
             error_below = self.layers[ell].error
             z_above = self._phi(self.layers[ell + 1].z)
+            # Precision-scaled learning rate: more precise layers learn faster.
+            # This IS the natural gradient from Friston's theory.
+            layer_lr = effective_lr * min(self.precisions[ell], 10.0)
             # Generative weight update: Hebbian + decay
             dW = np.outer(error_below, z_above)
+            self.W[ell] += layer_lr * dW - layer_lr * self.gamma * self.W[ell]
             # Feedback weight update
             dE = np.outer(self.layers[ell + 1].z, error_below)
+            self.E[ell] += layer_lr * dE - layer_lr * self.gamma * self.E[ell]
             # Spectral normalization (power iteration — cheaper than full SVD)
             w_norm = _spectral_norm_power_iteration(self.W[ell])
             "W": [w.copy() for w in self.W],
             "E": [e.copy() for e in self.E],
             "precisions": list(self.precisions),
+            "_log_precisions": list(self._log_precisions),
             "_initialized": self._initialized,
             "_last_obs": None if self._last_obs is None else self._last_obs.copy(),
         }
     def restore_state(self, state: Dict[str, Any]) -> None:
         """Restore from ``save_state()``."""
         self.precisions = list(state["precisions"])
+        self._log_precisions = list(state.get("_log_precisions",
+            [float(np.log(max(p, 1e-8))) for p in self.precisions]))
         self.W = [w.copy() for w in state["W"]]
         self.E = [e.copy() for e in state["E"]]
         self._initialized = bool(state["_initialized"])

tensegrity/engine/unified_field.py CHANGED Viewed

@@ -192,10 +192,22 @@ class UnifiedField:
         # FHRR encoder
         self.encoder = FHRREncoder(dim=fhrr_dim)
-        # Random projection: FHRR (complex, fhrr_dim) → real (obs_dim)
-        # Fixed, not learned — this is the sensory transduction
-        rng = np.random.RandomState(42)
-        self._proj = rng.randn(obs_dim, fhrr_dim).astype(np.float64) / np.sqrt(fhrr_dim)
         # NGC circuit: hierarchical predictive coding
         layer_sizes = [obs_dim] + hidden_dims
@@ -215,9 +227,22 @@ class UnifiedField:
         self.energy_history: Deque[EnergyDecomposition] = deque(maxlen=max(1, int(energy_history_maxlen)))
     def _fhrr_to_obs(self, fhrr_vec: np.ndarray) -> np.ndarray:
-        """Project FHRR complex vector to real observation space."""
         real_part = np.real(fhrr_vec).astype(np.float64)
-        return self._proj @ real_part
     def observe(self, raw_input: Any, input_type: str = "numeric") -> Dict[str, Any]:
         """
@@ -258,13 +283,16 @@ class UnifiedField:
         settle_result = self.ngc.settle(obs_vec)
         perception_energy = settle_result["final_energy"]
-        prediction_error_post_settle = self.ngc.prediction_error(obs_vec)
-        # === 4. REMEMBER: query Hopfield with abstract state ===
         abstract_state = self.ngc.get_abstract_state(level=-1)
         retrieved, memory_energy = self.memory.retrieve(abstract_state)
-        # Compute memory consistency: how similar is this observation to stored patterns?
         abstract_norm = np.linalg.norm(abstract_state)
         retrieved_norm = np.linalg.norm(retrieved)
         if abstract_norm > 1e-8 and retrieved_norm > 1e-8:
@@ -273,6 +301,32 @@ class UnifiedField:
         else:
             memory_similarity = 0.0
         # === 5. LEARN: Precision-modulated Hebbian update ===
         # Learning modulation: high when observation is consistent with memory,
         # low when it contradicts stored patterns.

         # FHRR encoder
         self.encoder = FHRREncoder(dim=fhrr_dim)
+        # Structure-preserving projection: FHRR (complex, fhrr_dim) → real (obs_dim)
+        # Instead of a random matrix that destroys semantic structure, we use
+        # a fixed projection derived from the FHRR basis itself. The real part
+        # of the FHRR vector is sliced/averaged into obs_dim buckets. This
+        # preserves the phasor structure: similar FHRR vectors → similar obs.
+        #
+        # For obs_dim < fhrr_dim: average adjacent blocks of size fhrr_dim/obs_dim.
+        # For obs_dim >= fhrr_dim: pad with zeros (rare in practice).
+        self._proj_mode = "structured"
+        if obs_dim <= fhrr_dim:
+            # Structured averaging: each obs dimension = mean of a block of FHRR dims
+            self._proj_block_size = fhrr_dim // obs_dim
+            self._proj_remainder = fhrr_dim % obs_dim
+        else:
+            self._proj_block_size = 1
+            self._proj_remainder = 0
         # NGC circuit: hierarchical predictive coding
         layer_sizes = [obs_dim] + hidden_dims
         self.energy_history: Deque[EnergyDecomposition] = deque(maxlen=max(1, int(energy_history_maxlen)))
     def _fhrr_to_obs(self, fhrr_vec: np.ndarray) -> np.ndarray:
+        """Project FHRR complex vector to real observation space.
+        Uses structure-preserving block averaging instead of random projection.
+        Each obs dimension = mean of a contiguous block of FHRR real components.
+        This preserves semantic similarity: if two FHRR vectors have similar
+        phasor angles, their block averages will also be similar.
+        """
         real_part = np.real(fhrr_vec).astype(np.float64)
+        bs = self._proj_block_size
+        obs = np.zeros(self.obs_dim, dtype=np.float64)
+        for i in range(self.obs_dim):
+            start = i * bs
+            end = min(start + bs, len(real_part))
+            if start < len(real_part):
+                obs[i] = np.mean(real_part[start:end])
+        return obs
     def observe(self, raw_input: Any, input_type: str = "numeric") -> Dict[str, Any]:
         """
         settle_result = self.ngc.settle(obs_vec)
         perception_energy = settle_result["final_energy"]
+        # === 4. JOINT SETTLING: Hopfield retrieval feeds back into NGC ===
+        # This closes the loop that was previously sequential:
+        #   settle NGC → query Hopfield → DONE (old: pipeline)
+        # Now: settle NGC → query Hopfield → inject memory → re-settle NGC
+        # The second settle integrates memory evidence, making the energy
+        # decomposition genuinely joint rather than a sequential pipeline.
         abstract_state = self.ngc.get_abstract_state(level=-1)
         retrieved, memory_energy = self.memory.retrieve(abstract_state)
+        # Compute memory consistency
         abstract_norm = np.linalg.norm(abstract_state)
         retrieved_norm = np.linalg.norm(retrieved)
         if abstract_norm > 1e-8 and retrieved_norm > 1e-8:
         else:
             memory_similarity = 0.0
+        # Memory-guided re-settle: blend retrieved memory into top NGC layer
+        # and re-settle to integrate memory evidence into the full hierarchy.
+        # The blend weight is derived from memory_similarity itself:
+        # high similarity → strong blend (memory confirms), low → weak blend.
+        if self.memory.n_patterns > 2 and retrieved_norm > 1e-8:
+            # Blend weight = sigmoid(memory_similarity * 3) clamped to [0, 0.5]
+            # This means memory can provide up to 50% of the top-layer state,
+            # but only when it strongly matches the current abstract state.
+            blend = float(1.0 / (1.0 + np.exp(-3.0 * memory_similarity)))
+            blend = min(blend, 0.5)
+            # Inject retrieved memory into the top NGC layer
+            top_layer = self.ngc.layers[-1]
+            top_layer.z = (1.0 - blend) * top_layer.z + blend * retrieved
+            # Re-settle with memory evidence integrated
+            # Use fewer steps since we're refining, not starting from scratch
+            re_settle = self.ngc.settle(obs_vec, steps=max(3, self.ngc.settle_steps // 3))
+            perception_energy = re_settle["final_energy"]
+            # Re-query Hopfield with the refined abstract state
+            abstract_state = self.ngc.get_abstract_state(level=-1)
+            retrieved, memory_energy = self.memory.retrieve(abstract_state)
+        prediction_error_post_settle = self.ngc.prediction_error(obs_vec)
         # === 5. LEARN: Precision-modulated Hebbian update ===
         # Learning modulation: high when observation is consistent with memory,
         # low when it contradicts stored patterns.

tensegrity/pipeline/canonical.py CHANGED Viewed

@@ -130,14 +130,15 @@ class CanonicalPipeline:
         model_name: str = "meta-llama/Llama-3.2-1B-Instruct",
         # Loop budget
         max_iterations: int = 4,
-        # Convergence: top1/top2 ratio above which we commit. Default 2.0
-        # means the leader must be at least twice the runner-up in mass.
         commit_ratio: float = 2.0,
         # Falsification: how many NGC steps to settle each choice for the
         # top-down-predict-the-prompt operation.
         falsify_settle_steps: int = 20,
-        # Bayesian update strength when integrating falsification likelihood
-        # into the controller's hypothesis posteriors.
         falsify_update_strength: float = 1.0,
         # Energy-arena precision (passed through to CausalEnergyTerm).
         energy_arena_precision: float = 1.0,
@@ -151,8 +152,6 @@ class CanonicalPipeline:
         # Persistent episodic recall enters as a memory-evidence channel.
         memory_evidence_weight: float = 0.75,
         # SBERT sentence similarity enters as a semantic-evidence channel.
-        # This is the strongest signal source: it compares the prompt against
-        # each (prompt+choice) concatenation using frozen sentence embeddings.
         sbert_evidence_weight: float = 0.8,
         feedback_learning_rate: float = 1.0,
         persistent_state_path: Optional[str] = None,
@@ -163,11 +162,32 @@ class CanonicalPipeline:
         self.falsify_settle_steps = int(falsify_settle_steps)
         self.falsify_update_strength = float(falsify_update_strength)
         self.max_hypotheses = max(2, int(max_hypotheses))
         self.llm_evidence_weight = float(llm_evidence_weight)
         self.memory_evidence_weight = float(memory_evidence_weight)
         self.sbert_evidence_weight = float(sbert_evidence_weight)
-        self.feedback_learning_rate = float(feedback_learning_rate)
-        self.persistent_state_path = persistent_state_path
         initial_labels = list(hypothesis_labels or [])
         while len(initial_labels) < self.max_hypotheses:
@@ -539,6 +559,78 @@ class CanonicalPipeline:
             return top > 0
         return top >= ratio * second
     # ---------- main entry: score one item ----------
     def score_multichoice(
@@ -602,21 +694,32 @@ class CanonicalPipeline:
             )
             # 3. Bayesian update of controller's hypothesis posteriors:
-            #    new_p_i ∝ old_p_i * exp(strength * z(falsify_i)) * energy_post_i.
             old_belief = self._belief_from_controller(n)
             fz = self._znorm(falsify)
             lz = self._znorm(linguistic)
             mz = self._znorm(memory_scores)
             sz = self._znorm(sbert_scores)
-            log_lik_falsify = self.falsify_update_strength * fz
             log_post = (
                 np.log(np.maximum(old_belief, 1e-12))
-                + log_lik_falsify
-                + self.llm_evidence_weight * lz
-                + self.memory_evidence_weight * mz
-                + self.sbert_evidence_weight * sz
-                + np.log(np.maximum(energy_post, 1e-12))
             )
             log_post -= log_post.max()
             new_belief = np.exp(log_post)
             sb = new_belief.sum()
@@ -657,7 +760,12 @@ class CanonicalPipeline:
                 top_p=top_p,
             ))
-            if self._converged(new_belief, self.commit_ratio):
                 converged = True
                 break
@@ -665,6 +773,9 @@ class CanonicalPipeline:
         final_belief = self._belief_from_controller(n)
         committed_idx = int(np.argmax(final_belief))
         # Calibrated score for the harness: belief shifted away from uniform,
         # bounded in [-1, 1]. Comparable in magnitude to the previous z-scored
         # outputs; the harness's confidence-gated blending stays sane.
@@ -813,6 +924,24 @@ class CanonicalPipeline:
             return {"learned": False, "reason": "invalid sample"}
         correct = int(committed_idx) == int(sample.gold)
         field = self.controller.agent.field
         prompt_fhrr = self._encode_text_fhrr(sample.prompt, max_tokens=96)
         correct_fhrr = self._encode_text_fhrr(

         model_name: str = "meta-llama/Llama-3.2-1B-Instruct",
         # Loop budget
         max_iterations: int = 4,
+        # Convergence is now self-tuning: derived from belief entropy dynamics.
+        # commit_ratio is kept as an initial value but will be overridden.
         commit_ratio: float = 2.0,
         # Falsification: how many NGC steps to settle each choice for the
         # top-down-predict-the-prompt operation.
         falsify_settle_steps: int = 20,
+        # These weights are now INITIAL values for the Dirichlet channel
+        # reliability tracker. They will be dynamically updated based on each
+        # channel's prediction accuracy. The system auto-tunes them.
         falsify_update_strength: float = 1.0,
         # Energy-arena precision (passed through to CausalEnergyTerm).
         energy_arena_precision: float = 1.0,
         # Persistent episodic recall enters as a memory-evidence channel.
         memory_evidence_weight: float = 0.75,
         # SBERT sentence similarity enters as a semantic-evidence channel.
         sbert_evidence_weight: float = 0.8,
         feedback_learning_rate: float = 1.0,
         persistent_state_path: Optional[str] = None,
         self.falsify_settle_steps = int(falsify_settle_steps)
         self.falsify_update_strength = float(falsify_update_strength)
         self.max_hypotheses = max(2, int(max_hypotheses))
+        self.feedback_learning_rate = float(feedback_learning_rate)
+        self.persistent_state_path = persistent_state_path
+        # --- Dirichlet channel reliability tracking ---
+        # Instead of fixed weights, each evidence channel has a Dirichlet
+        # pseudo-count that grows when the channel's top-ranked choice matches
+        # the committed belief (cross-channel agreement) or the gold label
+        # (post-feedback). Fusion weights = normalized counts.
+        #
+        # This is the VFE-minimizing closed form from pymdp:
+        #   α* = α₀ + Σ_t obs_t ⊗ qs_t
+        # where α₀ is the initial prior strength.
+        #
+        # Channels: falsify, llm, memory, sbert, energy_arena
+        self._channel_names = ["falsify", "llm", "memory", "sbert", "energy"]
+        self._channel_alpha = {
+            "falsify": float(falsify_update_strength),
+            "llm": float(llm_evidence_weight),
+            "memory": float(memory_evidence_weight),
+            "sbert": float(sbert_evidence_weight),
+            "energy": float(energy_arena_beta),
+        }
+        # Expose derived weights (computed from alpha each call)
         self.llm_evidence_weight = float(llm_evidence_weight)
         self.memory_evidence_weight = float(memory_evidence_weight)
         self.sbert_evidence_weight = float(sbert_evidence_weight)
         initial_labels = list(hypothesis_labels or [])
         while len(initial_labels) < self.max_hypotheses:
             return top > 0
         return top >= ratio * second
+    def _channel_weights(self) -> Dict[str, float]:
+        """Compute normalized fusion weights from Dirichlet pseudo-counts.
+        weights_m = alpha_m / sum(alpha)
+        This is the expected value of the Dirichlet posterior over channel
+        reliabilities. As channels accumulate evidence of correctness,
+        their weight grows; unreliable channels fade toward zero.
+        """
+        total = sum(self._channel_alpha.values())
+        if total <= 0:
+            n = len(self._channel_names)
+            return {c: 1.0 / n for c in self._channel_names}
+        return {c: self._channel_alpha[c] / total for c in self._channel_names}
+    def _update_channel_reliability(
+        self, channel_scores: Dict[str, np.ndarray], committed_idx: int, n: int
+    ) -> None:
+        """Update Dirichlet pseudo-counts via cross-channel agreement.
+        Each channel earns pseudo-counts when its top-ranked choice agrees
+        with other channels. This is the consensus-based reliability update
+        from the IterativeCognitiveScorer, elevated to the canonical pipeline.
+        After feedback (gold label revealed), the channel that ranked the
+        gold answer highest gets a bonus pseudo-count — this is the
+        VFE-minimizing Dirichlet update from pymdp.
+        """
+        if n < 2:
+            return
+        # Get each channel's top pick
+        picks = {}
+        for name, scores in channel_scores.items():
+            if scores is not None and len(scores) >= n:
+                s = scores[:n]
+                if np.any(np.abs(s) > 1e-12):
+                    picks[name] = int(np.argmax(s))
+        if len(picks) < 2:
+            return
+        # Cross-channel agreement: each channel gets credit for agreeing
+        # with others. This is NOT self-fulfilling — the anchor is the
+        # consensus structure, not any single channel.
+        for name_i, pick_i in picks.items():
+            agreements = sum(1 for name_j, pick_j in picks.items()
+                           if name_j != name_i and pick_j == pick_i)
+            if agreements > 0:
+                credit = float(agreements) / max(len(picks) - 1, 1)
+                self._channel_alpha[name_i] += credit * 0.1  # slow accumulation
+    def _adaptive_commit_ratio(self, belief: np.ndarray) -> float:
+        """Derive the convergence commit ratio from belief entropy dynamics.
+        Instead of a fixed commit_ratio=2.0, the threshold adapts:
+        - When entropy is high (uniform beliefs), require higher separation (more cautious)
+        - When entropy is low (concentrated beliefs), require less separation (confident)
+        commit_ratio = 1.5 + entropy * 1.5
+        At max entropy (1.0): ratio = 3.0 (very cautious)
+        At min entropy (0.0): ratio = 1.5 (quick commit)
+        """
+        n = len(belief)
+        if n < 2:
+            return self.commit_ratio
+        nz = belief[belief > 0]
+        if len(nz) < 2:
+            return 1.5
+        entropy = float(-np.sum(nz * np.log(nz)) / np.log(n))
+        return 1.5 + entropy * 1.5
     # ---------- main entry: score one item ----------
     def score_multichoice(
             )
             # 3. Bayesian update of controller's hypothesis posteriors:
+            #    new_p_i ∝ old_p_i * exp(w_c * z(channel_c_i)) for each channel c.
+            #    Channel weights w_c are derived from Dirichlet pseudo-counts,
+            #    not hardcoded — they auto-tune based on reliability.
             old_belief = self._belief_from_controller(n)
             fz = self._znorm(falsify)
             lz = self._znorm(linguistic)
             mz = self._znorm(memory_scores)
             sz = self._znorm(sbert_scores)
+            w = self._channel_weights()
             log_post = (
                 np.log(np.maximum(old_belief, 1e-12))
+                + w["falsify"] * fz
+                + w["llm"] * lz
+                + w["memory"] * mz
+                + w["sbert"] * sz
+                + w["energy"] * np.log(np.maximum(energy_post, 1e-12))
             )
+            # Track per-channel scores for reliability update
+            _channel_scores = {
+                "falsify": falsify, "llm": linguistic,
+                "memory": memory_scores, "sbert": sbert_scores,
+                "energy": energy_post,
+            }
+            self._last_channel_scores_iter = _channel_scores
             log_post -= log_post.max()
             new_belief = np.exp(log_post)
             sb = new_belief.sum()
                 top_p=top_p,
             ))
+            # Update channel reliability via cross-channel agreement
+            self._update_channel_reliability(_channel_scores, top_idx, n)
+            # Adaptive convergence: commit ratio derived from belief entropy
+            adaptive_ratio = self._adaptive_commit_ratio(new_belief)
+            if self._converged(new_belief, adaptive_ratio):
                 converged = True
                 break
         final_belief = self._belief_from_controller(n)
         committed_idx = int(np.argmax(final_belief))
+        # Save last channel scores for gold-label Dirichlet update in learn_from_feedback
+        self._last_channel_scores = getattr(self, '_last_channel_scores_iter', {})
         # Calibrated score for the harness: belief shifted away from uniform,
         # bounded in [-1, 1]. Comparable in magnitude to the previous z-scored
         # outputs; the harness's confidence-gated blending stays sane.
             return {"learned": False, "reason": "invalid sample"}
         correct = int(committed_idx) == int(sample.gold)
+        # --- Dirichlet channel reliability update from gold label ---
+        # This is the VFE-minimizing update: channels that ranked the gold
+        # answer higher get more pseudo-counts. This is the ONLY place where
+        # external supervision enters the channel weighting system.
+        # The update is: α_m += correctness_score_m (how well channel m
+        # ranked the gold answer relative to its ranking of other choices).
+        if hasattr(self, '_last_channel_scores') and self._last_channel_scores:
+            for name, scores in self._last_channel_scores.items():
+                if scores is not None and len(scores) >= n and sample.gold < n:
+                    s = scores[:n]
+                    s_range = float(np.max(s) - np.min(s))
+                    if s_range > 1e-12:
+                        # How well did this channel rank the gold answer?
+                        # Normalized to [0, 1]: 1 = gold was ranked highest
+                        gold_rank_score = float((s[sample.gold] - np.min(s)) / s_range)
+                    else:
+                        gold_rank_score = 1.0 / n  # no discrimination
+                    self._channel_alpha[name] += gold_rank_score * 0.5
         field = self.controller.agent.field
         prompt_fhrr = self._encode_text_fhrr(sample.prompt, max_tokens=96)
         correct_fhrr = self._encode_text_fhrr(