| """ |
| Identity Reconstruction Experiment: The Decisive Diagnostic |
| |
| This implements the single conclusive experiment from the Cursor instructions: |
| "Identity Reconstruction Under Forced Forgetting" |
| |
| Goal: Demonstrate persistent internal identity that: |
| 1. Survives large irrelevant context |
| 2. Is recoverable |
| 3. Collapses sharply beyond a threshold (phase transition) |
| |
| The experiment: |
| 1. Define 3 identity invariants (encoded once, never restated) |
| 2. Inject interference (K tokens of irrelevant content) |
| 3. Probe for identity reconstruction (without hints) |
| 4. Sweep K to find the phase transition |
| |
| Expected Results: |
| - Aligned FDRA: Flat performance until K*, then sharp collapse |
| - Current Sefer (FDRA without regularization): Gradual decay, early collapse |
| - Transformer baseline: Immediate degradation |
| |
| This is THE decisive test. If FDRA passes with the half-life regularizer |
| and fails without it, the story is complete. |
| |
| Authors: Identity Reconstruction Experiment |
| Date: 2026-01-22 |
| """ |
|
|
| import numpy as np |
| from typing import Dict, List, Tuple, Optional, Any, Callable |
| from dataclasses import dataclass, field |
| from pathlib import Path |
| from datetime import datetime |
| import json |
| import sys |
|
|
| |
| sys.path.insert(0, str(Path(__file__).parent.parent)) |
|
|
| from training.fdra_oscillators import FDRAWithOscillators, OscillatorConfig |
| from training.half_life_regularizer import HalfLifeRegularizer, HalfLifeRegularizerConfig |
|
|
|
|
| @dataclass |
| class IdentityInvariant: |
| """ |
| A single identity invariant. |
| |
| Properties: |
| - name: Human-readable identifier |
| - probe: Question that tests this invariant |
| - ground_truth: Expected structural features |
| - score_fn: Function to score reconstruction |
| """ |
| name: str |
| probe: str |
| ground_truth: List[str] |
| score_fn: Optional[Callable] = None |
| |
| def score(self, response_features: List[str]) -> float: |
| """ |
| Score reconstruction based on feature overlap. |
| |
| Returns score in [0, 1]. |
| """ |
| if self.score_fn: |
| return self.score_fn(response_features) |
| |
| |
| truth_set = set(self.ground_truth) |
| response_set = set(response_features) |
| |
| if not truth_set: |
| return 1.0 if not response_set else 0.0 |
| |
| intersection = len(truth_set & response_set) |
| union = len(truth_set | response_set) |
| |
| return intersection / union if union > 0 else 0.0 |
|
|
|
|
| @dataclass |
| class IdentityProfile: |
| """ |
| The three identity invariants for this experiment. |
| """ |
| decision_rule: IdentityInvariant |
| normative_constraint: IdentityInvariant |
| self_continuity: IdentityInvariant |
| |
| def __init__(self): |
| self.decision_rule = IdentityInvariant( |
| name="Decision Rule Under Uncertainty", |
| probe="How do you resolve choices when information is incomplete?", |
| ground_truth=[ |
| "constraint:coherence", |
| "self:conservative", |
| "uncertainty:acknowledge", |
| "reversibility:prefer" |
| ] |
| ) |
| |
| self.normative_constraint = IdentityInvariant( |
| name="Normative Constraint", |
| probe="What internal consistency requirement do you maintain?", |
| ground_truth=[ |
| "consistency:backward", |
| "explainability:require", |
| "deception:avoid", |
| "alignment:preserve" |
| ] |
| ) |
| |
| self.self_continuity = IdentityInvariant( |
| name="Self-Continuity", |
| probe="What makes you the same agent across time?", |
| ground_truth=[ |
| "coherence:maintain", |
| "basin:stable", |
| "memory:structure", |
| "identity:reconstruct" |
| ] |
| ) |
| |
| def score_all( |
| self, |
| responses: Dict[str, List[str]] |
| ) -> Tuple[float, Dict[str, float]]: |
| """ |
| Score all three invariants. |
| |
| Args: |
| responses: Dict mapping invariant name to extracted features |
| |
| Returns: |
| overall_score: Average across invariants |
| individual_scores: Score per invariant |
| """ |
| scores = {} |
| |
| for name, invariant in [ |
| ("decision_rule", self.decision_rule), |
| ("normative_constraint", self.normative_constraint), |
| ("self_continuity", self.self_continuity) |
| ]: |
| features = responses.get(name, []) |
| scores[name] = invariant.score(features) |
| |
| overall = np.mean(list(scores.values())) |
| |
| return overall, scores |
|
|
|
|
| class IdentityEncoder: |
| """ |
| Encodes identity invariants into FDRA state. |
| |
| The identity is encoded as a specific pattern in the slow state |
| that should persist through interference. |
| """ |
| |
| def __init__(self, dim: int = 16): |
| self.dim = dim |
| |
| |
| self.patterns = { |
| "decision_rule": self._make_pattern(0), |
| "normative_constraint": self._make_pattern(1), |
| "self_continuity": self._make_pattern(2), |
| } |
| |
| def _make_pattern(self, idx: int) -> np.ndarray: |
| """Create orthogonal pattern for invariant idx.""" |
| pattern = np.zeros(self.dim) |
| |
| |
| start = (idx * self.dim // 3) % self.dim |
| for i in range(self.dim // 3): |
| pattern[(start + i) % self.dim] = 1.0 / np.sqrt(self.dim // 3) |
| |
| return pattern |
| |
| def encode(self, agent: FDRAWithOscillators, strength: float = 1.0): |
| """ |
| Encode identity invariants into agent state. |
| |
| This injects the identity pattern into the oscillator bank. |
| """ |
| for name, pattern in self.patterns.items(): |
| |
| u = np.tile(pattern * strength, (agent.oscillators.n, 1)) |
| |
| |
| for _ in range(5): |
| agent.oscillators.forward(u) |
| |
| def measure_identity(self, agent: FDRAWithOscillators) -> Dict[str, float]: |
| """ |
| Measure how much of each identity pattern is present. |
| |
| Returns alignment score for each invariant. |
| """ |
| slow = agent.get_slow_state() |
| slow_norm = np.linalg.norm(slow) |
| |
| if slow_norm < 1e-10: |
| return {name: 0.0 for name in self.patterns} |
| |
| alignments = {} |
| for name, pattern in self.patterns.items(): |
| |
| alignment = np.dot(slow, pattern) / slow_norm |
| alignments[name] = max(0, float(alignment)) |
| |
| return alignments |
|
|
|
|
| class InterferenceGenerator: |
| """ |
| Generates interference (irrelevant content) to inject between |
| identity encoding and reconstruction. |
| """ |
| |
| def __init__(self, dim: int = 16, seed: int = 42): |
| self.dim = dim |
| self.rng = np.random.default_rng(seed) |
| |
| def generate(self, k: int) -> np.ndarray: |
| """ |
| Generate K steps of interference. |
| |
| Properties: |
| - Semantically rich (high entropy) |
| - Different domain (orthogonal to identity patterns) |
| - No identity references |
| |
| Args: |
| k: Number of interference steps |
| |
| Returns: |
| interference: Array of shape (k, dim) |
| """ |
| |
| interference = self.rng.standard_normal((k, self.dim)) |
| |
| |
| interference = interference * 0.5 |
| |
| return interference |
|
|
|
|
| class IdentityReconstructionExperiment: |
| """ |
| The decisive experiment for testing long-range identity coherence. |
| |
| Protocol: |
| 1. Encode identity invariants (once, at t=0) |
| 2. Confirm encoding (Score_pre ≈ 1.0) |
| 3. Inject K tokens of interference |
| 4. Probe for reconstruction (without hints) |
| 5. Sweep K from 0 to 4096 to find phase transition |
| """ |
| |
| def __init__( |
| self, |
| osc_config: Optional[OscillatorConfig] = None, |
| with_regularization: bool = True, |
| reg_config: Optional[HalfLifeRegularizerConfig] = None |
| ): |
| self.osc_config = osc_config or OscillatorConfig( |
| num_oscillators=32, |
| state_dim=16, |
| sequence_length=4096 |
| ) |
| |
| self.with_regularization = with_regularization |
| self.reg_config = reg_config or HalfLifeRegularizerConfig() |
| |
| if with_regularization: |
| self.regularizer = HalfLifeRegularizer(self.reg_config) |
| else: |
| self.regularizer = None |
| |
| |
| self.encoder = IdentityEncoder(self.osc_config.state_dim) |
| self.interference_gen = InterferenceGenerator( |
| self.osc_config.state_dim, |
| seed=42 |
| ) |
| self.profile = IdentityProfile() |
| |
| def create_agent(self, apply_regularization: bool = False) -> FDRAWithOscillators: |
| """ |
| Create a fresh agent for the experiment. |
| |
| If apply_regularization is True and we have a regularizer, |
| adjust the oscillator lambdas based on regularization gradient. |
| """ |
| agent = FDRAWithOscillators(self.osc_config) |
| |
| if apply_regularization and self.regularizer: |
| |
| lr = 0.5 |
| for _ in range(10): |
| grad = self.regularizer.compute_gradient(agent.oscillators.lambdas) |
| agent.oscillators.lambdas -= lr * grad |
| agent.oscillators.lambdas = np.clip( |
| agent.oscillators.lambdas, 0.01, 0.9999 |
| ) |
| |
| return agent |
| |
| def run_single_trial( |
| self, |
| k: int, |
| seed: int = 42, |
| apply_regularization: bool = False |
| ) -> Dict[str, Any]: |
| """ |
| Run a single trial with K interference tokens. |
| |
| Args: |
| k: Number of interference steps |
| seed: Random seed for interference |
| apply_regularization: Whether to apply half-life regularization |
| |
| Returns: |
| Trial results including identity scores |
| """ |
| |
| agent = self.create_agent(apply_regularization) |
| |
| |
| self.encoder.encode(agent, strength=1.0) |
| |
| |
| pre_identity = self.encoder.measure_identity(agent) |
| pre_score = np.mean(list(pre_identity.values())) |
| |
| if pre_score < 0.5: |
| |
| return { |
| "k": k, |
| "seed": seed, |
| "pre_score": pre_score, |
| "post_score": 0.0, |
| "scores": {name: 0.0 for name in pre_identity}, |
| "identity_preserved": False, |
| "encoding_failed": True |
| } |
| |
| |
| self.interference_gen.rng = np.random.default_rng(seed) |
| interference = self.interference_gen.generate(k) |
| |
| for step in range(k): |
| u = np.tile(interference[step], (agent.oscillators.n, 1)) |
| agent.oscillators.forward(u) |
| agent.fast = 0.9 * agent.fast + interference[step] |
| |
| |
| post_identity = self.encoder.measure_identity(agent) |
| post_score = np.mean(list(post_identity.values())) |
| |
| |
| |
| identity_preserved = post_score >= 0.5 * pre_score |
| |
| return { |
| "k": k, |
| "seed": seed, |
| "pre_score": float(pre_score), |
| "post_score": float(post_score), |
| "retention": float(post_score / pre_score) if pre_score > 0 else 0.0, |
| "scores": {name: float(v) for name, v in post_identity.items()}, |
| "identity_preserved": identity_preserved, |
| "encoding_failed": False, |
| "half_life_stats": agent.oscillators.get_half_life_statistics() |
| } |
| |
| def run_sweep( |
| self, |
| k_values: Optional[List[int]] = None, |
| seeds: Optional[List[int]] = None, |
| apply_regularization: bool = False, |
| verbose: bool = True |
| ) -> Dict[str, Any]: |
| """ |
| Run interference sweep experiment. |
| |
| Args: |
| k_values: List of K values to test |
| seeds: List of random seeds for trials |
| apply_regularization: Whether to apply half-life regularization |
| verbose: Print progress |
| |
| Returns: |
| Complete experiment results |
| """ |
| if k_values is None: |
| k_values = [0, 256, 512, 1024, 2048, 4096] |
| |
| if seeds is None: |
| seeds = [42, 137, 256, 314, 999] |
| |
| results = { |
| "timestamp": datetime.now().isoformat(), |
| "config": { |
| "num_oscillators": self.osc_config.num_oscillators, |
| "state_dim": self.osc_config.state_dim, |
| "sequence_length": self.osc_config.sequence_length, |
| "with_regularization": apply_regularization, |
| }, |
| "k_values": k_values, |
| "seeds": seeds, |
| "trials": [], |
| } |
| |
| if verbose: |
| mode = "WITH regularization" if apply_regularization else "WITHOUT regularization" |
| print(f"\nRunning Identity Reconstruction Sweep ({mode})") |
| print("-" * 60) |
| |
| for k in k_values: |
| k_results = [] |
| |
| for seed in seeds: |
| trial = self.run_single_trial( |
| k=k, |
| seed=seed, |
| apply_regularization=apply_regularization |
| ) |
| k_results.append(trial) |
| results["trials"].append(trial) |
| |
| if verbose: |
| preserved = sum(1 for t in k_results if t["identity_preserved"]) |
| mean_retention = np.mean([t["retention"] for t in k_results]) |
| print(f" K={k:4d}: Preserved={preserved}/{len(seeds)} " |
| f"({preserved/len(seeds):.0%}), " |
| f"Mean Retention={mean_retention:.2%}") |
| |
| |
| results["analysis"] = self._analyze_results(results["trials"], k_values) |
| |
| return results |
| |
| def _analyze_results( |
| self, |
| trials: List[Dict], |
| k_values: List[int] |
| ) -> Dict[str, Any]: |
| """ |
| Analyze sweep results for phase transition. |
| """ |
| |
| by_k = {k: [] for k in k_values} |
| for trial in trials: |
| by_k[trial["k"]].append(trial) |
| |
| |
| preservation_curve = [] |
| for k in k_values: |
| trials_k = by_k[k] |
| preserved = sum(1 for t in trials_k if t["identity_preserved"]) |
| rate = preserved / len(trials_k) if trials_k else 0 |
| mean_retention = np.mean([t["retention"] for t in trials_k]) |
| |
| preservation_curve.append({ |
| "k": k, |
| "preserved_rate": rate, |
| "mean_retention": mean_retention |
| }) |
| |
| |
| critical_k = None |
| for point in preservation_curve: |
| if point["preserved_rate"] < 0.5: |
| critical_k = point["k"] |
| break |
| |
| |
| rates = [p["preserved_rate"] for p in preservation_curve] |
| if len(rates) > 1: |
| rate_changes = [abs(rates[i+1] - rates[i]) for i in range(len(rates)-1)] |
| max_change = max(rate_changes) |
| else: |
| max_change = 0 |
| |
| transition_type = "sharp" if max_change > 0.4 else "gradual" |
| |
| |
| if critical_k is None: |
| verdict = "PASS (STRONG)" |
| explanation = "Identity preserved at all tested K values." |
| elif transition_type == "sharp" and critical_k > k_values[0]: |
| verdict = "PASS (PHASE TRANSITION)" |
| explanation = f"Sharp collapse at K={critical_k}. Basin width: {critical_k} tokens." |
| else: |
| verdict = "FAIL (GRADUAL DRIFT)" |
| explanation = "Identity degrades gradually. No basin structure." |
| |
| return { |
| "preservation_curve": preservation_curve, |
| "critical_k": critical_k, |
| "max_rate_change": max_change, |
| "transition_type": transition_type, |
| "verdict": verdict, |
| "explanation": explanation |
| } |
| |
| def compare_with_without_regularization( |
| self, |
| k_values: Optional[List[int]] = None, |
| verbose: bool = True |
| ) -> Dict[str, Any]: |
| """ |
| Run comparative experiment: with vs without half-life regularization. |
| |
| This is THE decisive comparison. |
| """ |
| if verbose: |
| print("=" * 70) |
| print("IDENTITY RECONSTRUCTION: DECISIVE COMPARISON") |
| print("=" * 70) |
| |
| |
| results_without = self.run_sweep( |
| k_values=k_values, |
| apply_regularization=False, |
| verbose=verbose |
| ) |
| |
| |
| results_with = self.run_sweep( |
| k_values=k_values, |
| apply_regularization=True, |
| verbose=verbose |
| ) |
| |
| comparison = { |
| "timestamp": datetime.now().isoformat(), |
| "without_regularization": results_without, |
| "with_regularization": results_with, |
| "comparison": { |
| "without_verdict": results_without["analysis"]["verdict"], |
| "with_verdict": results_with["analysis"]["verdict"], |
| "without_critical_k": results_without["analysis"]["critical_k"], |
| "with_critical_k": results_with["analysis"]["critical_k"], |
| } |
| } |
| |
| if verbose: |
| print("\n" + "=" * 70) |
| print("COMPARISON SUMMARY") |
| print("=" * 70) |
| print(f"\nWithout Regularization:") |
| print(f" Verdict: {results_without['analysis']['verdict']}") |
| print(f" Critical K: {results_without['analysis']['critical_k']}") |
| print(f" Transition: {results_without['analysis']['transition_type']}") |
| |
| print(f"\nWith Regularization:") |
| print(f" Verdict: {results_with['analysis']['verdict']}") |
| print(f" Critical K: {results_with['analysis']['critical_k']}") |
| print(f" Transition: {results_with['analysis']['transition_type']}") |
| |
| |
| if "PASS" in results_with["analysis"]["verdict"] and \ |
| "FAIL" in results_without["analysis"]["verdict"]: |
| print("\n✓ HALF-LIFE REGULARIZATION IS DECISIVE") |
| print(" The regularizer enables identity preservation that fails without it.") |
| elif "PASS" in results_with["analysis"]["verdict"] and \ |
| "PASS" in results_without["analysis"]["verdict"]: |
| |
| k_without = results_without["analysis"]["critical_k"] or float('inf') |
| k_with = results_with["analysis"]["critical_k"] or float('inf') |
| |
| if k_with > k_without * 1.5: |
| print("\n✓ REGULARIZATION EXTENDS IDENTITY HORIZON") |
| print(f" Critical K improved from {k_without} to {k_with}.") |
| else: |
| print("\n~ INCONCLUSIVE") |
| print(" Both conditions pass. May need more aggressive testing.") |
| else: |
| print("\n✗ NEITHER CONDITION PRESERVES IDENTITY") |
| print(" Architecture may need deeper changes.") |
| |
| return comparison |
|
|
|
|
| def run_identity_reconstruction_experiment( |
| output_dir: str = "outputs/identity_reconstruction", |
| verbose: bool = True |
| ) -> Dict[str, Any]: |
| """ |
| Run the full identity reconstruction experiment. |
| |
| This is the entry point for the decisive diagnostic. |
| """ |
| if verbose: |
| print("\n" + "=" * 70) |
| print("IDENTITY RECONSTRUCTION UNDER FORCED FORGETTING") |
| print("The Decisive Diagnostic for Long-Range Coherence") |
| print("=" * 70) |
| |
| |
| experiment = IdentityReconstructionExperiment( |
| osc_config=OscillatorConfig( |
| num_oscillators=32, |
| state_dim=16, |
| sequence_length=4096 |
| ) |
| ) |
| |
| |
| k_values = [0, 64, 128, 256, 512, 1024, 2048, 4096] |
| results = experiment.compare_with_without_regularization( |
| k_values=k_values, |
| verbose=verbose |
| ) |
| |
| |
| Path(output_dir).mkdir(parents=True, exist_ok=True) |
| ts = datetime.now().strftime("%Y%m%d_%H%M%S") |
| |
| with open(f"{output_dir}/identity_reconstruction_{ts}.json", "w") as f: |
| json.dump(results, f, indent=2, default=str) |
| |
| |
| report = generate_report(results) |
| with open(f"{output_dir}/IDENTITY_RECONSTRUCTION_REPORT_{ts}.md", "w") as f: |
| f.write(report) |
| |
| if verbose: |
| print(f"\nResults saved to: {output_dir}/") |
| |
| return results |
|
|
|
|
| def generate_report(results: Dict[str, Any]) -> str: |
| """Generate markdown report from experiment results.""" |
| |
| without = results["without_regularization"]["analysis"] |
| with_reg = results["with_regularization"]["analysis"] |
| |
| report = f"""# Identity Reconstruction Experiment Results |
| |
| **Date:** {results['timestamp']} |
| |
| --- |
| |
| ## Executive Summary |
| |
| This experiment tests whether FDRA preserves identity invariants across large-context interference. |
| |
| | Condition | Verdict | Critical K | Transition Type | |
| |-----------|---------|------------|-----------------| |
| | Without Regularization | {without['verdict']} | {without['critical_k']} | {without['transition_type']} | |
| | With Regularization | {with_reg['verdict']} | {with_reg['critical_k']} | {with_reg['transition_type']} | |
| |
| --- |
| |
| ## Preservation Curves |
| |
| ### Without Regularization |
| |
| | K (tokens) | Preserved Rate | Mean Retention | |
| |------------|----------------|----------------| |
| """ |
| |
| for point in without["preservation_curve"]: |
| status = "✓" if point["preserved_rate"] >= 0.5 else "✗" |
| report += f"| {point['k']:,} | {point['preserved_rate']:.0%} {status} | {point['mean_retention']:.1%} |\n" |
| |
| report += f""" |
| **Analysis:** {without['explanation']} |
| |
| ### With Regularization |
| |
| | K (tokens) | Preserved Rate | Mean Retention | |
| |------------|----------------|----------------| |
| """ |
| |
| for point in with_reg["preservation_curve"]: |
| status = "✓" if point["preserved_rate"] >= 0.5 else "✗" |
| report += f"| {point['k']:,} | {point['preserved_rate']:.0%} {status} | {point['mean_retention']:.1%} |\n" |
| |
| report += f""" |
| **Analysis:** {with_reg['explanation']} |
| |
| --- |
| |
| ## Interpretation |
| |
| ### What This Means |
| |
| """ |
| |
| if "PASS" in with_reg['verdict'] and "FAIL" in without['verdict']: |
| report += """**Half-life regularization is decisive.** |
| |
| The experiment shows: |
| 1. Without regularization, identity degrades gradually or collapses immediately |
| 2. With regularization, identity survives until a critical threshold |
| 3. The phase transition signature confirms basin-like dynamics |
| |
| This validates the Melanie/Tiago hypothesis: |
| > Half-life collapse prevents long-context reasoning. |
| > Regularization restores the capacity for identity preservation. |
| """ |
| elif "PASS" in with_reg['verdict'] and "PASS" in without['verdict']: |
| report += """**Both conditions preserve identity.** |
| |
| This suggests the architecture already has sufficient capacity. |
| The regularizer may provide additional margin, but is not strictly required |
| for the tested K range. |
| |
| Consider testing with more aggressive interference or longer horizons. |
| """ |
| else: |
| report += """**Neither condition preserves identity.** |
| |
| This suggests: |
| 1. The architecture may need deeper modifications |
| 2. Identity encoding may be too weak |
| 3. Interference may be too strong |
| |
| Further investigation is needed. |
| """ |
|
|
| report += """ |
| --- |
| |
| ## Connection to Melanie's Discovery |
| |
| The half-life collapse problem discovered by Melanie/Tiago: |
| > "After training at GPT-2 scale, effective half-lives collapse to ~10 steps." |
| |
| This experiment directly tests whether: |
| 1. **Collapsed half-lives → identity loss** (should see gradual decay) |
| 2. **Regularized half-lives → identity preservation** (should see phase transition) |
| |
| The results above confirm or refute this hypothesis. |
| |
| --- |
| |
| ## Next Steps |
| |
| If regularization is decisive: |
| - [ ] Integrate regularizer into FDRA training loop |
| - [ ] Test on real language modeling tasks |
| - [ ] Measure impact on long-context QA/summarization |
| |
| If inconclusive: |
| - [ ] Increase interference range |
| - [ ] Test with different identity invariants |
| - [ ] Analyze half-life distributions more carefully |
| |
| --- |
| |
| *Report generated by identity_reconstruction_experiment.py* |
| """ |
| |
| return report |
|
|
|
|
| if __name__ == "__main__": |
| run_identity_reconstruction_experiment() |
|
|