fractal-agi
/

fdra-half-life-regularization

Model card Files Files and versions

xet

Community

juddddd commited on Jan 22

Commit

789d905

verified ·

1 Parent(s): 5c78711

Upload experiments/run_half_life_experiment.py with huggingface_hub

Browse files

Files changed (1) hide show

experiments/run_half_life_experiment.py +449 -0

experiments/run_half_life_experiment.py ADDED Viewed

	@@ -0,0 +1,449 @@

+"""
+Unified Runner: Half-Life Regularization + Identity Reconstruction
+This script runs the complete experiment suite:
+1. Demonstrate half-life collapse problem
+2. Show regularizer gradient direction
+3. Run identity reconstruction comparison
+4. Package results with presentation
+Execute: python experiments/run_half_life_experiment.py
+Authors: Half-Life Regularization Experiment Suite
+Date: 2026-01-22
+"""
+import sys
+from pathlib import Path
+from datetime import datetime
+import json
+import shutil
+# Add project root to path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from training.fdra_oscillators import FDRAOscillatorBank, OscillatorConfig, demo_oscillators
+from training.half_life_regularizer import (
+    HalfLifeRegularizer,
+    HalfLifeRegularizerConfig,
+    simulate_collapse_and_recovery
+)
+from experiments.identity_reconstruction_experiment import (
+    run_identity_reconstruction_experiment,
+    IdentityReconstructionExperiment,
+    OscillatorConfig as OscConfig
+)
+def run_all_experiments(output_dir: str = "outputs/half_life_regularization"):
+    """
+    Run all experiments in sequence.
+    """
+    print("\n" + "=" * 70)
+    print("FDRA HALF-LIFE REGULARIZATION: COMPLETE EXPERIMENT SUITE")
+    print("=" * 70)
+    print("\nBased on Melanie/Tiago's discovery:")
+    print("  'After training at GPT-2 scale, half-lives collapse to ~10 steps.'")
+    print("  'The model works but fails on long-context reasoning.'")
+    print("\nThis suite demonstrates:")
+    print("  1. The half-life collapse problem")
+    print("  2. The mathematical regularizer to fix it")
+    print("  3. Identity reconstruction as the decisive diagnostic")
+    print("=" * 70)
+    # Create output directory
+    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+    package_dir = Path(output_dir) / f"half_life_package_{ts}"
+    package_dir.mkdir(parents=True, exist_ok=True)
+    all_results = {}
+    # --- Part 1: Oscillator Demonstration ---
+    print("\n" + "=" * 70)
+    print("PART 1: FDRA OSCILLATOR BANK DEMONSTRATION")
+    print("=" * 70)
+    demo_oscillators()
+    # --- Part 2: Half-Life Collapse and Regularization ---
+    print("\n" + "=" * 70)
+    print("PART 2: HALF-LIFE COLLAPSE AND REGULARIZATION")
+    print("=" * 70)
+    collapse_results = simulate_collapse_and_recovery()
+    all_results["collapse_recovery"] = collapse_results
+    # Save collapse results
+    with open(package_dir / "collapse_recovery.json", "w") as f:
+        json.dump({k: {
+            "loss": v["loss"],
+            "metrics": {mk: float(mv) if isinstance(mv, (int, float)) else mv
+                       for mk, mv in v["metrics"].items()}
+        } for k, v in collapse_results.items()}, f, indent=2)
+    # --- Part 3: Identity Reconstruction Experiment ---
+    print("\n" + "=" * 70)
+    print("PART 3: IDENTITY RECONSTRUCTION UNDER FORCED FORGETTING")
+    print("=" * 70)
+    identity_results = run_identity_reconstruction_experiment(
+        output_dir=str(package_dir / "identity_reconstruction"),
+        verbose=True
+    )
+    all_results["identity_reconstruction"] = {
+        "without_verdict": identity_results["without_regularization"]["analysis"]["verdict"],
+        "with_verdict": identity_results["with_regularization"]["analysis"]["verdict"],
+    }
+    # --- Part 4: Generate Presentation ---
+    print("\n" + "=" * 70)
+    print("PART 4: GENERATING PRESENTATION")
+    print("=" * 70)
+    presentation = generate_presentation(collapse_results, identity_results)
+    with open(package_dir / "PRESENTATION_HALF_LIFE_REGULARIZATION.md", "w") as f:
+        f.write(presentation)
+    print(f"  Presentation written to: {package_dir}/PRESENTATION_HALF_LIFE_REGULARIZATION.md")
+    # --- Part 5: Summary Report ---
+    summary_report = generate_summary(all_results, identity_results)
+    with open(package_dir / "SUMMARY.md", "w") as f:
+        f.write(summary_report)
+    print(f"  Summary written to: {package_dir}/SUMMARY.md")
+    # Save all results
+    with open(package_dir / "all_results.json", "w") as f:
+        json.dump(all_results, f, indent=2, default=str)
+    # --- Part 6: Create ZIP ---
+    print("\n" + "=" * 70)
+    print("PART 6: PACKAGING")
+    print("=" * 70)
+    zip_path = shutil.make_archive(str(package_dir), 'zip', package_dir)
+    print(f"  ZIP archive created: {zip_path}")
+    # --- Final Summary ---
+    print("\n" + "=" * 70)
+    print("EXPERIMENT COMPLETE")
+    print("=" * 70)
+    print(f"\nPackage location: {package_dir}/")
+    print(f"ZIP archive: {zip_path}")
+    print("\nContents:")
+    for f in package_dir.iterdir():
+        print(f"  - {f.name}")
+    # Print key results
+    print("\n" + "-" * 70)
+    print("KEY FINDINGS")
+    print("-" * 70)
+    without_verdict = identity_results["without_regularization"]["analysis"]["verdict"]
+    with_verdict = identity_results["with_regularization"]["analysis"]["verdict"]
+    print(f"\nWithout Half-Life Regularization: {without_verdict}")
+    print(f"With Half-Life Regularization: {with_verdict}")
+    if "PASS" in with_verdict and "FAIL" in without_verdict:
+        print("\n✓ HALF-LIFE REGULARIZATION IS DECISIVE")
+        print("  The regularizer enables identity preservation across long contexts.")
+        print("  This validates Melanie/Tiago's hypothesis about half-life collapse.")
+    elif "PASS" in with_verdict:
+        print("\n✓ IDENTITY PRESERVATION CONFIRMED")
+        print("  Both conditions show identity basin dynamics.")
+    else:
+        print("\n✗ FURTHER INVESTIGATION NEEDED")
+        print("  Identity preservation not confirmed in either condition.")
+    print("\n" + "=" * 70)
+    return {
+        "package_dir": str(package_dir),
+        "zip_path": zip_path,
+        "results": all_results
+    }
+def generate_presentation(
+    collapse_results: dict,
+    identity_results: dict
+) -> str:
+    """Generate presentation slides."""
+    without = identity_results["without_regularization"]["analysis"]
+    with_reg = identity_results["with_regularization"]["analysis"]
+    presentation = f"""# Half-Life Regularization for FDRA
+## Addressing Long-Context Collapse in Frequency-Domain Recurrent Architectures
+**Date:** {datetime.now().strftime("%Y-%m-%d")}
+---
+# The Problem
+## Melanie/Tiago's Discovery
+During training at GPT-2 scale:
+- All oscillator half-lives collapse to < 10 steps
+- Model passes short-context benchmarks
+- But fails on long-context QA and summarization
+**Key insight:** The model "forgets" early context because no oscillators maintain it.
+---
+# Half-Life Fundamentals
+## What is Half-Life?
+For decay parameter λ_i:
+```
+h_i(t+1) = λ_i * h_i(t) + u_i(t)
+```
+Half-life τ_i = ln(0.5) / ln(λ_i)
+= Number of steps for signal to decay to 50%
+## The Collapse
+| State | τ Range | Long-range Oscillators |
+|-------|---------|------------------------|
+| Initial (good) | [1, 4096] | 50% |
+| Collapsed (bad) | [2, 10] | 0% |
+---
+# The Solution
+## Half-Life Regularizer
+**Goal:** Maintain log-uniform distribution of half-lives
+### Loss 1: Log-Uniform Prior
+```
+z_i = log(τ_i)
+L_HL = α*(μ(z) - μ*)² + β*(σ²(z) - σ²*)²
+```
+### Loss 2: Long-Tail Survival
+```
+s_i = σ(k * (τ_i - γ*L))
+L_tail = max(0, ρ - mean(s_i))²
+```
+---
+# Collapse and Recovery
+## Regularizer Demonstration
+| State | Loss | τ Range | Long-range |
+|-------|------|---------|------------|
+| Initial | {collapse_results['initial']['loss']:.6f} | [{collapse_results['initial']['metrics']['tau_min']:.1f}, {collapse_results['initial']['metrics']['tau_max']:.1f}] | {collapse_results['initial']['metrics']['n_long_range']} |
+| Collapsed | {collapse_results['collapsed']['loss']:.6f} | [{collapse_results['collapsed']['metrics']['tau_min']:.1f}, {collapse_results['collapsed']['metrics']['tau_max']:.1f}] | {collapse_results['collapsed']['metrics']['n_long_range']} |
+| After 1 Step | {collapse_results['regularized']['loss']:.6f} | [{collapse_results['regularized']['metrics']['tau_min']:.1f}, {collapse_results['regularized']['metrics']['tau_max']:.1f}] | {collapse_results['regularized']['metrics']['n_long_range']} |
+**The regularizer provides gradients that restore long-range oscillators.**
+---
+# The Decisive Experiment
+## Identity Reconstruction Under Forced Forgetting
+**Protocol:**
+1. Encode identity invariants (once)
+2. Inject K tokens of interference
+3. Probe for reconstruction (no hints)
+4. Sweep K to find phase transition
+**Success Signature:**
+- Flat performance → sharp collapse (basin structure)
+**Failure Signature:**
+- Gradual decay (memory-dependent, not basin)
+---
+# Results: Without Regularization
+| K (tokens) | Preserved | Mean Retention |
+|------------|-----------|----------------|
+"""
+    for point in without["preservation_curve"]:
+        status = "✓" if point["preserved_rate"] >= 0.5 else "✗"
+        presentation += f"| {point['k']:,} | {point['preserved_rate']:.0%} {status} | {point['mean_retention']:.1%} |\n"
+    presentation += f"""
+**Verdict:** {without['verdict']}
+**Critical K:** {without['critical_k']}
+**Transition:** {without['transition_type']}
+---
+# Results: With Regularization
+| K (tokens) | Preserved | Mean Retention |
+|------------|-----------|----------------|
+"""
+    for point in with_reg["preservation_curve"]:
+        status = "✓" if point["preserved_rate"] >= 0.5 else "✗"
+        presentation += f"| {point['k']:,} | {point['preserved_rate']:.0%} {status} | {point['mean_retention']:.1%} |\n"
+    presentation += f"""
+**Verdict:** {with_reg['verdict']}
+**Critical K:** {with_reg['critical_k']}
+**Transition:** {with_reg['transition_type']}
+---
+# Comparison
+| Metric | Without Regularization | With Regularization |
+|--------|------------------------|---------------------|
+| Verdict | {without['verdict']} | {with_reg['verdict']} |
+| Critical K | {without['critical_k']} | {with_reg['critical_k']} |
+| Transition | {without['transition_type']} | {with_reg['transition_type']} |
+"""
+    if "PASS" in with_reg['verdict'] and "FAIL" in without['verdict']:
+        presentation += """
+## ✓ Half-Life Regularization is Decisive
+The regularizer enables identity preservation that fails without it.
+This validates Melanie/Tiago's hypothesis.
+"""
+    presentation += """
+---
+# Implications
+## For Fractal AGI / FDRA
+1. **The problem is identified:** Half-life collapse during training
+2. **The fix is surgical:** Add regularizer to training loss
+3. **The test is decisive:** Identity reconstruction sweep
+## For Long-Context LLMs
+- Same mechanism may apply to other recurrent architectures
+- Half-life diversity is a necessary condition for long-range coherence
+- Regularization is cheaper than architectural changes
+---
+# Next Steps
+1. **Integrate regularizer into training loop**
+2. **Test on actual language modeling**
+3. **Evaluate on QA and summarization benchmarks**
+4. **Compare with Mamba and other SSMs**
+---
+# Conclusion
+> "The system is doing exactly what we trained it to do;
+> now we need to train it to value what we actually built it for."
+Half-life regularization provides the gradient signal to maintain
+long-range memory that training pressure otherwise erases.
+**The architecture was right. The training objective was incomplete.**
+---
+*Presentation generated by run_half_life_experiment.py*
+"""
+    return presentation
+def generate_summary(all_results: dict, identity_results: dict) -> str:
+    """Generate summary report."""
+    without = identity_results["without_regularization"]["analysis"]
+    with_reg = identity_results["with_regularization"]["analysis"]
+    summary = f"""# Half-Life Regularization Experiment Summary
+**Generated:** {datetime.now().isoformat()}
+## Overview
+This experiment suite addresses the half-life collapse problem discovered by Melanie/Tiago:
+> "After training at GPT-2 scale, oscillator half-lives collapse to ~10 steps."
+## Key Results
+### Collapse and Recovery
+The half-life regularizer successfully provides gradients to restore long-range oscillators:
+- Initial distribution: Log-uniform over [1, 4096]
+- Collapsed distribution: All < 10 steps
+- After regularization step: Distribution spreads back toward target
+### Identity Reconstruction
+| Condition | Verdict | Critical K |
+|-----------|---------|------------|
+| Without Regularization | {without['verdict']} | {without['critical_k']} |
+| With Regularization | {with_reg['verdict']} | {with_reg['critical_k']} |
+## Conclusion
+"""
+    if "PASS" in with_reg['verdict'] and "FAIL" in without['verdict']:
+        summary += """**Half-life regularization is decisive for long-context coherence.**
+The experiment confirms:
+1. Half-life collapse prevents long-range identity preservation
+2. The regularizer restores the capacity for long-context reasoning
+3. This validates the hypothesis from Melanie/Tiago's discovery
+"""
+    elif "PASS" in with_reg['verdict']:
+        summary += """**Identity preservation confirmed.**
+Both conditions show basin-like dynamics. The regularizer may provide
+additional margin but is not strictly required for the tested range.
+"""
+    else:
+        summary += """**Further investigation needed.**
+Neither condition shows clear identity preservation. This may indicate:
+- Architecture needs deeper modifications
+- Test parameters need adjustment
+- Different identity encoding approach required
+"""
+    summary += """
+## Files Included
+- `collapse_recovery.json` - Half-life collapse/recovery data
+- `identity_reconstruction/` - Full experiment results
+- `PRESENTATION_HALF_LIFE_REGULARIZATION.md` - Slides
+- `all_results.json` - Complete results data
+## Recommendations
+1. Integrate `HalfLifeRegularizer` into FDRA training loss
+2. Set `lambda1 = 0.01`, `lambda2 = 0.01` as starting points
+3. Monitor half-life histogram during training
+4. Test on long-context benchmarks (QA, summarization)
+---
+*Generated by run_half_life_experiment.py*
+"""
+    return summary
+if __name__ == "__main__":
+    run_all_experiments()