fix: Benchmark regression fixes — emission gate, negation bug, noise dampening, SBERT channel

by theapemachine - opened 18 days ago

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+71

-6

Files changed (2) hide show

tensegrity/bench/runner.py +22 -6
tensegrity/pipeline/canonical.py +49 -0

tensegrity/bench/runner.py CHANGED Viewed

@@ -322,13 +322,20 @@ class EvalRunner:
                 model_name=self.model_name,
                 max_iterations=3,
                 commit_ratio=2.0,
-                falsify_settle_steps=15,
-                falsify_update_strength=1.0,
                 energy_arena_precision=1.0,
-                energy_arena_beta=1.0,
                 max_hypotheses=8,
                 llm_evidence_weight=self.lam,
                 memory_evidence_weight=0.75,
                 persistent_state_path=self.state_path,
             )
         else:
@@ -485,10 +492,19 @@ class EvalRunner:
             hypothesis_tokens=hypothesis_tokens,
             belief_fn=lambda: beliefs,
             vocab_size=vocab_size,
-            scale=max(0.0, float(self.lam)) * 2.5,
             suppress_threshold=0.0,
-            entropy_gate=1.01,
-            min_confidence=0.0,
             async_beliefs=False,
         )

                 model_name=self.model_name,
                 max_iterations=3,
                 commit_ratio=2.0,
+                falsify_settle_steps=20,
+                # Dampened from 1.0: NGC falsification scores on randomly-projected
+                # FHRR observations are noisy; high strength amplifies noise into
+                # confident wrong beliefs through the Bayesian update.
+                falsify_update_strength=0.3,
                 energy_arena_precision=1.0,
+                # Dampened from 1.0: per-choice SCMs start with uniform CPTs and
+                # get ~1 observation per iteration — their energy posterior is
+                # nearly uniform and contributes noise, not signal.
+                energy_arena_beta=0.1,
                 max_hypotheses=8,
                 llm_evidence_weight=self.lam,
                 memory_evidence_weight=0.75,
+                sbert_evidence_weight=0.8,
                 persistent_state_path=self.state_path,
             )
         else:
             hypothesis_tokens=hypothesis_tokens,
             belief_fn=lambda: beliefs,
             vocab_size=vocab_size,
+            # Reduced from scale=lam*2.5: the previous aggressive scaling
+            # overpowered LLM logits even when the cognitive layer had low
+            # confidence, causing universal regressions in local mode.
+            scale=max(0.0, float(self.lam)) * 0.5,
             suppress_threshold=0.0,
+            # Tightened from 1.01 (always emit) to 0.65: only emit logit bias
+            # when the cognitive layer has genuinely converged. When entropy is
+            # high (beliefs are near-uniform), the LLM's own logits are
+            # preserved — "never worse than base."
+            entropy_gate=0.65,
+            # Raised from 0.0 to 0.4: require the leading hypothesis to have
+            # at least 40% mass before emitting any bias.
+            min_confidence=0.4,
             async_beliefs=False,
         )

tensegrity/pipeline/canonical.py CHANGED Viewed

@@ -150,6 +150,10 @@ class CanonicalPipeline:
         llm_evidence_weight: float = 1.0,
         # Persistent episodic recall enters as a memory-evidence channel.
         memory_evidence_weight: float = 0.75,
         feedback_learning_rate: float = 1.0,
         persistent_state_path: Optional[str] = None,
     ):
@@ -161,6 +165,7 @@ class CanonicalPipeline:
         self.max_hypotheses = max(2, int(max_hypotheses))
         self.llm_evidence_weight = float(llm_evidence_weight)
         self.memory_evidence_weight = float(memory_evidence_weight)
         self.feedback_learning_rate = float(feedback_learning_rate)
         self.persistent_state_path = persistent_state_path
@@ -564,6 +569,7 @@ class CanonicalPipeline:
         # if causal tension is high (the controller wires this internally).
         initial_perception = self.ingest_prompt(sample.prompt)
         memory_scores = self._memory_choice_scores(sample)
         trace: List[IterationStep] = []
         converged = False
@@ -601,12 +607,14 @@ class CanonicalPipeline:
             fz = self._znorm(falsify)
             lz = self._znorm(linguistic)
             mz = self._znorm(memory_scores)
             log_lik_falsify = self.falsify_update_strength * fz
             log_post = (
                 np.log(np.maximum(old_belief, 1e-12))
                 + log_lik_falsify
                 + self.llm_evidence_weight * lz
                 + self.memory_evidence_weight * mz
                 + np.log(np.maximum(energy_post, 1e-12))
             )
             log_post -= log_post.max()
@@ -697,6 +705,47 @@ class CanonicalPipeline:
         norm = np.linalg.norm(v)
         return v / norm if norm > 1e-10 else v
     def _memory_choice_scores(self, sample: TaskSample) -> np.ndarray:
         """Retrieve prior successful episodes and score choices by similarity.

         llm_evidence_weight: float = 1.0,
         # Persistent episodic recall enters as a memory-evidence channel.
         memory_evidence_weight: float = 0.75,
+        # SBERT sentence similarity enters as a semantic-evidence channel.
+        # This is the strongest signal source: it compares the prompt against
+        # each (prompt+choice) concatenation using frozen sentence embeddings.
+        sbert_evidence_weight: float = 0.8,
         feedback_learning_rate: float = 1.0,
         persistent_state_path: Optional[str] = None,
     ):
         self.max_hypotheses = max(2, int(max_hypotheses))
         self.llm_evidence_weight = float(llm_evidence_weight)
         self.memory_evidence_weight = float(memory_evidence_weight)
+        self.sbert_evidence_weight = float(sbert_evidence_weight)
         self.feedback_learning_rate = float(feedback_learning_rate)
         self.persistent_state_path = persistent_state_path
         # if causal tension is high (the controller wires this internally).
         initial_perception = self.ingest_prompt(sample.prompt)
         memory_scores = self._memory_choice_scores(sample)
+        sbert_scores = self._sbert_choice_scores(sample)
         trace: List[IterationStep] = []
         converged = False
             fz = self._znorm(falsify)
             lz = self._znorm(linguistic)
             mz = self._znorm(memory_scores)
+            sz = self._znorm(sbert_scores)
             log_lik_falsify = self.falsify_update_strength * fz
             log_post = (
                 np.log(np.maximum(old_belief, 1e-12))
                 + log_lik_falsify
                 + self.llm_evidence_weight * lz
                 + self.memory_evidence_weight * mz
+                + self.sbert_evidence_weight * sz
                 + np.log(np.maximum(energy_post, 1e-12))
             )
             log_post -= log_post.max()
         norm = np.linalg.norm(v)
         return v / norm if norm > 1e-10 else v
+    def _sbert_choice_scores(self, sample: TaskSample) -> np.ndarray:
+        """Score choices by SBERT sentence-level cosine similarity.
+        This is the strongest semantic signal: it compares the prompt against
+        each choice using frozen sentence embeddings from a pretrained SBERT
+        model. Unlike the NGC falsification path, this signal is NOT destroyed
+        by the random FHRR→obs projection and directly measures semantic
+        relatedness in the original embedding space.
+        """
+        n = len(sample.choices)
+        scores = np.zeros(n, dtype=np.float64)
+        if n == 0:
+            return scores
+        field = self.controller.agent.field
+        features = field.encoder.features
+        # Try to get the SBERT model from the semantic codebook
+        getter = getattr(features, "get_sbert_model", None)
+        sbert = getter() if callable(getter) else None
+        if sbert is None:
+            return scores
+        try:
+            texts = [sample.prompt] + [
+                f"{sample.prompt} {c}" for c in sample.choices
+            ]
+            embs = sbert.encode(texts, show_progress_bar=False)
+            pe = embs[0]
+            pn = float(np.linalg.norm(pe))
+            if pn < 1e-8:
+                return scores
+            for i in range(n):
+                ce = embs[i + 1]
+                cn = float(np.linalg.norm(ce))
+                if cn > 1e-8:
+                    scores[i] = float(np.dot(pe, ce) / (pn * cn))
+        except Exception as e:
+            logger.debug("SBERT choice scoring failed: %s", e)
+        return scores
     def _memory_choice_scores(self, sample: TaskSample) -> np.ndarray:
         """Retrieve prior successful episodes and score choices by similarity.