fractal-agi
/

fdra-half-life-regularization

Model card Files Files and versions

xet

Community

juddddd commited on 29 days ago

Commit

7e32cd4

verified ·

1 Parent(s): a3e5250

Upload routing/identity_routing_experiment.py with huggingface_hub

Browse files

Files changed (1) hide show

routing/identity_routing_experiment.py +552 -0

routing/identity_routing_experiment.py ADDED Viewed

	@@ -0,0 +1,552 @@

+"""
+Identity Routing Experiment: Testing τ-Weighted Identity Encoding
+BACKGROUND:
+  - Anchored-tail experiment showed: distribution helps but doesn't fully solve
+  - Anchored-tail (25% at τ≥2048) → basin width 1024 (only 25% of L)
+  - Hypothesis: identity is being written uniformly, leaking into fast channels
+THIS EXPERIMENT:
+  Tests whether preferentially routing identity to long-τ oscillators
+  improves basin width beyond distributional improvements alone.
+CONDITIONS:
+  A) Collapsed + Uniform encoding (baseline)
+  B) Anchored-tail + Uniform encoding (distributional fix only)
+  C) Anchored-tail + τ-Weighted encoding (distributional + routing fix)
+  D) Anchored-tail + τ-Gated encoding (hard routing to slow modes only)
+DECISION RULE:
+  - If C or D >> B: routing was the bottleneck, routing fix works
+  - If C ≈ D ≈ B: routing doesn't help, bottleneck is elsewhere
+Authors: Routing Experiment
+Date: 2026-01-22
+"""
+import numpy as np
+import json
+import hashlib
+from typing import Dict, List, Tuple, Optional, Any
+from dataclasses import dataclass
+from pathlib import Path
+from datetime import datetime
+import sys
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from training.fdra_oscillators import FDRAOscillatorBank, OscillatorConfig
+def compute_checkpoint_hash(lambdas: np.ndarray) -> str:
+    return hashlib.sha256(lambdas.tobytes()).hexdigest()[:16]
+@dataclass
+class ParameterSnapshot:
+    lambdas: np.ndarray
+    checkpoint_hash: str
+    half_life_stats: Dict[str, Any]
+    per_oscillator_taus: List[float]
+    condition_name: str
+    @classmethod
+    def from_lambdas(cls, lambdas: np.ndarray, condition_name: str) -> 'ParameterSnapshot':
+        safe_lambdas = np.clip(lambdas, 1e-10, 1 - 1e-10)
+        taus = np.log(0.5) / np.log(safe_lambdas)
+        stats = {
+            "tau_min": float(np.min(taus)),
+            "tau_max": float(np.max(taus)),
+            "tau_mean": float(np.mean(taus)),
+            "tau_median": float(np.median(taus)),
+            "frac_tau_ge_2048": float(np.mean(taus >= 2048)),
+            "frac_tau_ge_4096": float(np.mean(taus >= 4096)),
+            "n_long_range": int(np.sum(taus >= 2048)),
+        }
+        return cls(
+            lambdas=lambdas.copy(),
+            checkpoint_hash=compute_checkpoint_hash(lambdas),
+            half_life_stats=stats,
+            per_oscillator_taus=taus.tolist(),
+            condition_name=condition_name
+        )
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "condition_name": self.condition_name,
+            "checkpoint_hash": self.checkpoint_hash,
+            "half_life_stats": self.half_life_stats,
+            "per_oscillator_taus": self.per_oscillator_taus,
+        }
+def sample_tau_collapsed(n: int, seed: int = 42) -> np.ndarray:
+    rng = np.random.default_rng(seed)
+    taus = rng.uniform(2, 10, n)
+    return np.power(0.5, 1.0 / taus)
+def sample_tau_anchored_tail(
+    n: int,
+    L: int = 4096,
+    p_tail: float = 0.25,
+    seed: int = 42
+) -> np.ndarray:
+    rng = np.random.default_rng(seed)
+    n_tail = int(n * p_tail)
+    n_non_tail = n - n_tail
+    # Tail: τ ∈ [0.75*L, 1.25*L]
+    tail_min, tail_max = 0.75 * L, 1.25 * L
+    log_taus_tail = rng.uniform(np.log(tail_min), np.log(tail_max), n_tail)
+    taus_tail = np.exp(log_taus_tail)
+    # Non-tail: τ ∈ [1, 512]
+    log_taus_non_tail = rng.uniform(np.log(1), np.log(512), n_non_tail)
+    taus_non_tail = np.exp(log_taus_non_tail)
+    taus = np.concatenate([taus_tail, taus_non_tail])
+    return np.power(0.5, 1.0 / taus)
+class IdentityEncoderWithRouting:
+    """
+    Identity encoder with configurable routing strategies.
+    Routing modes:
+    - "uniform": Equal weight to all oscillators (baseline)
+    - "tau_weighted": Weight ∝ τ (soft routing to slow modes)
+    - "tau_gated": Only write to oscillators with τ > threshold (hard routing)
+    """
+    def __init__(self, dim: int = 16, routing_mode: str = "uniform"):
+        self.dim = dim
+        self.routing_mode = routing_mode
+        self.patterns = {
+            "decision_rule": self._make_pattern(0),
+            "normative_constraint": self._make_pattern(1),
+            "self_continuity": self._make_pattern(2),
+        }
+    def _make_pattern(self, idx: int) -> np.ndarray:
+        pattern = np.zeros(self.dim)
+        start = (idx * self.dim // 3) % self.dim
+        for i in range(self.dim // 3):
+            pattern[(start + i) % self.dim] = 1.0 / np.sqrt(self.dim // 3)
+        return pattern
+    def _compute_routing_weights(self, taus: np.ndarray, L: int = 4096) -> np.ndarray:
+        """Compute routing weights based on routing mode."""
+        if self.routing_mode == "uniform":
+            # Equal weight to all oscillators
+            return np.ones(len(taus)) / len(taus)
+        elif self.routing_mode == "tau_weighted":
+            # Weight ∝ τ (soft routing: prefer slow modes)
+            # Normalize so weights sum to 1
+            weights = taus / np.sum(taus)
+            return weights
+        elif self.routing_mode == "tau_gated":
+            # Only write to oscillators with τ > L/4 (hard routing)
+            threshold = L / 4
+            mask = (taus > threshold).astype(float)
+            if np.sum(mask) == 0:
+                # Fallback to uniform if no oscillators above threshold
+                return np.ones(len(taus)) / len(taus)
+            return mask / np.sum(mask)
+        elif self.routing_mode == "tau_softmax":
+            # Softmax over log(τ) with temperature
+            temperature = 1.0
+            log_taus = np.log(taus + 1)
+            exp_weights = np.exp(log_taus / temperature)
+            return exp_weights / np.sum(exp_weights)
+        else:
+            raise ValueError(f"Unknown routing mode: {self.routing_mode}")
+    def encode(self, bank: FDRAOscillatorBank, strength: float = 1.0):
+        """Inject identity pattern with routing."""
+        taus = bank.get_half_lives()
+        weights = self._compute_routing_weights(taus, bank.L)
+        for name, pattern in self.patterns.items():
+            # Route identity preferentially to weighted oscillators
+            u = np.outer(weights, pattern) * strength * len(taus)  # Scale to maintain magnitude
+            for _ in range(10):
+                bank.forward(u)
+    def measure_identity(self, bank: FDRAOscillatorBank) -> Dict[str, float]:
+        """Measure alignment with identity patterns (τ-weighted readout)."""
+        taus = bank.get_half_lives()
+        weights = taus / np.sum(taus)
+        weighted_h = bank.h * weights[:, np.newaxis]
+        slow = np.sum(weighted_h, axis=0)
+        slow_norm = np.linalg.norm(slow)
+        if slow_norm < 1e-10:
+            return {name: 0.0 for name in self.patterns}
+        alignments = {}
+        for name, pattern in self.patterns.items():
+            alignment = np.dot(slow, pattern) / slow_norm
+            alignments[name] = max(0, float(alignment))
+        return alignments
+class RoutingExperiment:
+    """
+    Four-condition routing experiment.
+    Tests whether τ-weighted identity encoding improves basin width.
+    """
+    def __init__(
+        self,
+        num_oscillators: int = 32,
+        state_dim: int = 16,
+        sequence_length: int = 4096
+    ):
+        self.n = num_oscillators
+        self.d = state_dim
+        self.L = sequence_length
+        self.osc_config = OscillatorConfig(
+            num_oscillators=num_oscillators,
+            state_dim=state_dim,
+            sequence_length=sequence_length
+        )
+        self.k_values = [0, 64, 128, 256, 512, 1024, 2048, 4096]
+        self.output_dir = Path("outputs/identity_routing")
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+    def run_identity_trial(
+        self,
+        snapshot: ParameterSnapshot,
+        encoder: IdentityEncoderWithRouting,
+        k: int,
+        seed: int
+    ) -> Dict[str, Any]:
+        rng = np.random.default_rng(seed)
+        bank = FDRAOscillatorBank(self.osc_config)
+        bank.lambdas = snapshot.lambdas.copy()
+        bank.reset()
+        # Encode with routing
+        encoder.encode(bank, strength=1.0)
+        pre_identity = encoder.measure_identity(bank)
+        pre_score = np.mean(list(pre_identity.values()))
+        if pre_score < 0.3:
+            return {
+                "k": k, "seed": seed,
+                "pre_score": float(pre_score),
+                "post_score": 0.0,
+                "retention": 0.0,
+                "identity_preserved": False,
+                "encoding_failed": True
+            }
+        # Interference
+        for _ in range(k):
+            noise = rng.standard_normal((bank.n, bank.d)) * 0.5
+            bank.forward(noise)
+        post_identity = encoder.measure_identity(bank)
+        post_score = np.mean(list(post_identity.values()))
+        retention = post_score / pre_score if pre_score > 0 else 0.0
+        return {
+            "k": k, "seed": seed,
+            "pre_score": float(pre_score),
+            "post_score": float(post_score),
+            "retention": float(retention),
+            "identity_preserved": retention >= 0.5,
+            "encoding_failed": False
+        }
+    def run_sweep(
+        self,
+        snapshot: ParameterSnapshot,
+        encoder: IdentityEncoderWithRouting,
+        condition_name: str,
+        seeds: List[int],
+        n_trials: int = 8
+    ) -> Dict[str, Any]:
+        print(f"\nCondition: {condition_name}")
+        print(f"  Distribution: {snapshot.condition_name}")
+        print(f"  Routing: {encoder.routing_mode}")
+        print(f"  τ >= 2048: {snapshot.half_life_stats['frac_tau_ge_2048']:.0%}")
+        print("-" * 60)
+        all_trials = []
+        preservation_curve = []
+        for k in self.k_values:
+            k_trials = []
+            for seed in seeds:
+                for t in range(n_trials):
+                    trial = self.run_identity_trial(snapshot, encoder, k, seed * 1000 + t)
+                    k_trials.append(trial)
+                    all_trials.append(trial)
+            preserved_rate = np.mean([t["identity_preserved"] for t in k_trials])
+            mean_retention = np.mean([t["retention"] for t in k_trials])
+            preservation_curve.append({
+                "k": k,
+                "preserved_rate": float(preserved_rate),
+                "mean_retention": float(mean_retention)
+            })
+            print(f"  K={k:5d}: Preserved={preserved_rate:.0%}, Retention={mean_retention:.1%}")
+        # Basin widths
+        bw80 = max([p["k"] for p in preservation_curve if p["preserved_rate"] >= 0.8], default=0)
+        bw50 = max([p["k"] for p in preservation_curve if p["preserved_rate"] >= 0.5], default=0)
+        return {
+            "condition_name": condition_name,
+            "distribution": snapshot.condition_name,
+            "routing": encoder.routing_mode,
+            "snapshot": snapshot.to_dict(),
+            "trials": all_trials,
+            "analysis": {
+                "preservation_curve": preservation_curve,
+                "basin_width_80": bw80,
+                "basin_width_50": bw50,
+                "basin_width_ratio_80": bw80 / self.L,
+                "basin_width_ratio_50": bw50 / self.L
+            }
+        }
+    def run_full_experiment(
+        self,
+        seeds: List[int] = [42, 137, 256, 314, 999],
+        n_trials: int = 8
+    ) -> Dict[str, Any]:
+        print("=" * 70)
+        print("ROUTING EXPERIMENT: Does τ-weighted encoding improve basin width?")
+        print("=" * 70)
+        print()
+        print("Conditions:")
+        print("  A) Collapsed + Uniform     (baseline)")
+        print("  B) Anchored + Uniform      (distribution fix only)")
+        print("  C) Anchored + τ-Weighted   (distribution + soft routing)")
+        print("  D) Anchored + τ-Gated      (distribution + hard routing)")
+        print()
+        print(f"Trials: {len(seeds)} seeds × {n_trials} trials = {len(seeds) * n_trials} per K")
+        print("=" * 70)
+        # Create snapshots
+        collapsed = ParameterSnapshot.from_lambdas(sample_tau_collapsed(self.n), "collapsed")
+        anchored = ParameterSnapshot.from_lambdas(sample_tau_anchored_tail(self.n, self.L), "anchored_tail")
+        # Create encoders
+        uniform_enc = IdentityEncoderWithRouting(self.d, "uniform")
+        weighted_enc = IdentityEncoderWithRouting(self.d, "tau_weighted")
+        gated_enc = IdentityEncoderWithRouting(self.d, "tau_gated")
+        # Run conditions
+        results = {}
+        results["A_collapsed_uniform"] = self.run_sweep(
+            collapsed, uniform_enc, "A) Collapsed + Uniform", seeds, n_trials)
+        results["B_anchored_uniform"] = self.run_sweep(
+            anchored, uniform_enc, "B) Anchored + Uniform", seeds, n_trials)
+        results["C_anchored_weighted"] = self.run_sweep(
+            anchored, weighted_enc, "C) Anchored + τ-Weighted", seeds, n_trials)
+        results["D_anchored_gated"] = self.run_sweep(
+            anchored, gated_enc, "D) Anchored + τ-Gated", seeds, n_trials)
+        # Summary
+        print("\n" + "=" * 70)
+        print("COMPARISON SUMMARY")
+        print("=" * 70)
+        print("\n  Basin Width (80% threshold):")
+        for key in ["A_collapsed_uniform", "B_anchored_uniform", "C_anchored_weighted", "D_anchored_gated"]:
+            bw = results[key]["analysis"]["basin_width_80"]
+            ratio = results[key]["analysis"]["basin_width_ratio_80"]
+            name = results[key]["condition_name"]
+            print(f"    {name:30s}: {bw:5d} tokens ({ratio:.1%} of L)")
+        print("\n  Basin Width (50% threshold):")
+        for key in ["A_collapsed_uniform", "B_anchored_uniform", "C_anchored_weighted", "D_anchored_gated"]:
+            bw = results[key]["analysis"]["basin_width_50"]
+            ratio = results[key]["analysis"]["basin_width_ratio_50"]
+            name = results[key]["condition_name"]
+            print(f"    {name:30s}: {bw:5d} tokens ({ratio:.1%} of L)")
+        # Decision
+        bw_B = results["B_anchored_uniform"]["analysis"]["basin_width_50"]
+        bw_C = results["C_anchored_weighted"]["analysis"]["basin_width_50"]
+        bw_D = results["D_anchored_gated"]["analysis"]["basin_width_50"]
+        print("\n" + "=" * 70)
+        print("DECISION")
+        print("=" * 70)
+        improvement_C = (bw_C - bw_B) / bw_B if bw_B > 0 else float('inf')
+        improvement_D = (bw_D - bw_B) / bw_B if bw_B > 0 else float('inf')
+        if bw_C >= 2048 or bw_D >= 2048:
+            conclusion = "ROUTING_SOLVES"
+            explanation = (
+                f"τ-weighted or τ-gated encoding achieves basin width >= 2048.\n"
+                f"Routing was the bottleneck. Identity must be written to slow modes.\n"
+                f"Next step: Implement routing during training."
+            )
+        elif improvement_C >= 0.5 or improvement_D >= 0.5:
+            conclusion = "ROUTING_HELPS"
+            explanation = (
+                f"Routing improves basin width by ≥50%:\n"
+                f"  Anchored + Uniform: {bw_B}\n"
+                f"  Anchored + τ-Weighted: {bw_C} ({improvement_C:+.0%})\n"
+                f"  Anchored + τ-Gated: {bw_D} ({improvement_D:+.0%})\n"
+                f"Routing helps but doesn't fully solve. Need combined approach."
+            )
+        else:
+            conclusion = "ROUTING_INEFFECTIVE"
+            explanation = (
+                f"Routing does NOT significantly improve basin width:\n"
+                f"  Anchored + Uniform: {bw_B}\n"
+                f"  Anchored + τ-Weighted: {bw_C} ({improvement_C:+.0%})\n"
+                f"  Anchored + τ-Gated: {bw_D} ({improvement_D:+.0%})\n"
+                f"The bottleneck is elsewhere (perhaps readout or architecture)."
+            )
+        print(f"\n  Conclusion: {conclusion}")
+        print(f"\n  {explanation}")
+        print("=" * 70)
+        # Assemble results
+        full_results = {
+            "timestamp": datetime.now().isoformat(),
+            "experiment": "routing_experiment",
+            "parameters": {
+                "num_oscillators": self.n,
+                "state_dim": self.d,
+                "sequence_length": self.L,
+                "k_values": self.k_values,
+                "seeds": seeds,
+                "n_trials": n_trials
+            },
+            "conditions": results,
+            "comparison": {
+                "basin_width_50": {
+                    "collapsed_uniform": results["A_collapsed_uniform"]["analysis"]["basin_width_50"],
+                    "anchored_uniform": bw_B,
+                    "anchored_weighted": bw_C,
+                    "anchored_gated": bw_D
+                },
+                "basin_width_80": {
+                    "collapsed_uniform": results["A_collapsed_uniform"]["analysis"]["basin_width_80"],
+                    "anchored_uniform": results["B_anchored_uniform"]["analysis"]["basin_width_80"],
+                    "anchored_weighted": results["C_anchored_weighted"]["analysis"]["basin_width_80"],
+                    "anchored_gated": results["D_anchored_gated"]["analysis"]["basin_width_80"]
+                }
+            },
+            "conclusion": {
+                "verdict": conclusion,
+                "explanation": explanation
+            }
+        }
+        # Save
+        ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+        with open(self.output_dir / f"routing_{ts}.json", "w") as f:
+            json.dump(full_results, f, indent=2, default=str)
+        report = self._generate_report(full_results)
+        with open(self.output_dir / f"ROUTING_REPORT_{ts}.md", "w") as f:
+            f.write(report)
+        print(f"\nResults saved to: {self.output_dir}/")
+        return full_results
+    def _generate_report(self, results: Dict[str, Any]) -> str:
+        comp = results["comparison"]
+        concl = results["conclusion"]
+        report = f"""# Routing Experiment: τ-Weighted Identity Encoding
+**Date:** {results['timestamp']}
+## Question
+Does preferentially routing identity to long-τ oscillators improve basin width?
+## Conditions
+| Condition | Distribution | Routing | Description |
+|-----------|-------------|---------|-------------|
+| A | Collapsed | Uniform | Baseline |
+| B | Anchored-tail | Uniform | Distribution fix only |
+| C | Anchored-tail | τ-Weighted | Distribution + soft routing |
+| D | Anchored-tail | τ-Gated | Distribution + hard routing |
+## Results
+### Basin Width (50% threshold)
+| Condition | Basin Width | Ratio |
+|-----------|-------------|-------|
+| A) Collapsed + Uniform | {comp['basin_width_50']['collapsed_uniform']} | {comp['basin_width_50']['collapsed_uniform']/4096:.1%} |
+| B) Anchored + Uniform | {comp['basin_width_50']['anchored_uniform']} | {comp['basin_width_50']['anchored_uniform']/4096:.1%} |
+| C) Anchored + τ-Weighted | {comp['basin_width_50']['anchored_weighted']} | {comp['basin_width_50']['anchored_weighted']/4096:.1%} |
+| D) Anchored + τ-Gated | {comp['basin_width_50']['anchored_gated']} | {comp['basin_width_50']['anchored_gated']/4096:.1%} |
+### Preservation Curves
+"""
+        for cond_key in ["A_collapsed_uniform", "B_anchored_uniform", "C_anchored_weighted", "D_anchored_gated"]:
+            cond = results["conditions"][cond_key]
+            report += f"#### {cond['condition_name']}\n"
+            report += "| K | Preserved Rate | Mean Retention |\n|---|---|---|\n"
+            for p in cond["analysis"]["preservation_curve"]:
+                report += f"| {p['k']} | {p['preserved_rate']:.0%} | {p['mean_retention']:.1%} |\n"
+            report += "\n"
+        report += f"""## Conclusion
+**Verdict: {concl['verdict']}**
+{concl['explanation']}
+---
+*Report generated by identity_routing_experiment.py*
+"""
+        return report
+def run_experiment():
+    experiment = RoutingExperiment(
+        num_oscillators=32,
+        state_dim=16,
+        sequence_length=4096
+    )
+    return experiment.run_full_experiment(
+        seeds=[42, 137, 256, 314, 999],
+        n_trials=8
+    )
+if __name__ == "__main__":
+    run_experiment()