fdra-half-life-regularization / experiments /run_half_life_experiment.py
juddddd's picture
Upload experiments/run_half_life_experiment.py with huggingface_hub
789d905 verified
"""
Unified Runner: Half-Life Regularization + Identity Reconstruction
This script runs the complete experiment suite:
1. Demonstrate half-life collapse problem
2. Show regularizer gradient direction
3. Run identity reconstruction comparison
4. Package results with presentation
Execute: python experiments/run_half_life_experiment.py
Authors: Half-Life Regularization Experiment Suite
Date: 2026-01-22
"""
import sys
from pathlib import Path
from datetime import datetime
import json
import shutil
# Add project root to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from training.fdra_oscillators import FDRAOscillatorBank, OscillatorConfig, demo_oscillators
from training.half_life_regularizer import (
HalfLifeRegularizer,
HalfLifeRegularizerConfig,
simulate_collapse_and_recovery
)
from experiments.identity_reconstruction_experiment import (
run_identity_reconstruction_experiment,
IdentityReconstructionExperiment,
OscillatorConfig as OscConfig
)
def run_all_experiments(output_dir: str = "outputs/half_life_regularization"):
"""
Run all experiments in sequence.
"""
print("\n" + "=" * 70)
print("FDRA HALF-LIFE REGULARIZATION: COMPLETE EXPERIMENT SUITE")
print("=" * 70)
print("\nBased on Melanie/Tiago's discovery:")
print(" 'After training at GPT-2 scale, half-lives collapse to ~10 steps.'")
print(" 'The model works but fails on long-context reasoning.'")
print("\nThis suite demonstrates:")
print(" 1. The half-life collapse problem")
print(" 2. The mathematical regularizer to fix it")
print(" 3. Identity reconstruction as the decisive diagnostic")
print("=" * 70)
# Create output directory
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
package_dir = Path(output_dir) / f"half_life_package_{ts}"
package_dir.mkdir(parents=True, exist_ok=True)
all_results = {}
# --- Part 1: Oscillator Demonstration ---
print("\n" + "=" * 70)
print("PART 1: FDRA OSCILLATOR BANK DEMONSTRATION")
print("=" * 70)
demo_oscillators()
# --- Part 2: Half-Life Collapse and Regularization ---
print("\n" + "=" * 70)
print("PART 2: HALF-LIFE COLLAPSE AND REGULARIZATION")
print("=" * 70)
collapse_results = simulate_collapse_and_recovery()
all_results["collapse_recovery"] = collapse_results
# Save collapse results
with open(package_dir / "collapse_recovery.json", "w") as f:
json.dump({k: {
"loss": v["loss"],
"metrics": {mk: float(mv) if isinstance(mv, (int, float)) else mv
for mk, mv in v["metrics"].items()}
} for k, v in collapse_results.items()}, f, indent=2)
# --- Part 3: Identity Reconstruction Experiment ---
print("\n" + "=" * 70)
print("PART 3: IDENTITY RECONSTRUCTION UNDER FORCED FORGETTING")
print("=" * 70)
identity_results = run_identity_reconstruction_experiment(
output_dir=str(package_dir / "identity_reconstruction"),
verbose=True
)
all_results["identity_reconstruction"] = {
"without_verdict": identity_results["without_regularization"]["analysis"]["verdict"],
"with_verdict": identity_results["with_regularization"]["analysis"]["verdict"],
}
# --- Part 4: Generate Presentation ---
print("\n" + "=" * 70)
print("PART 4: GENERATING PRESENTATION")
print("=" * 70)
presentation = generate_presentation(collapse_results, identity_results)
with open(package_dir / "PRESENTATION_HALF_LIFE_REGULARIZATION.md", "w") as f:
f.write(presentation)
print(f" Presentation written to: {package_dir}/PRESENTATION_HALF_LIFE_REGULARIZATION.md")
# --- Part 5: Summary Report ---
summary_report = generate_summary(all_results, identity_results)
with open(package_dir / "SUMMARY.md", "w") as f:
f.write(summary_report)
print(f" Summary written to: {package_dir}/SUMMARY.md")
# Save all results
with open(package_dir / "all_results.json", "w") as f:
json.dump(all_results, f, indent=2, default=str)
# --- Part 6: Create ZIP ---
print("\n" + "=" * 70)
print("PART 6: PACKAGING")
print("=" * 70)
zip_path = shutil.make_archive(str(package_dir), 'zip', package_dir)
print(f" ZIP archive created: {zip_path}")
# --- Final Summary ---
print("\n" + "=" * 70)
print("EXPERIMENT COMPLETE")
print("=" * 70)
print(f"\nPackage location: {package_dir}/")
print(f"ZIP archive: {zip_path}")
print("\nContents:")
for f in package_dir.iterdir():
print(f" - {f.name}")
# Print key results
print("\n" + "-" * 70)
print("KEY FINDINGS")
print("-" * 70)
without_verdict = identity_results["without_regularization"]["analysis"]["verdict"]
with_verdict = identity_results["with_regularization"]["analysis"]["verdict"]
print(f"\nWithout Half-Life Regularization: {without_verdict}")
print(f"With Half-Life Regularization: {with_verdict}")
if "PASS" in with_verdict and "FAIL" in without_verdict:
print("\n✓ HALF-LIFE REGULARIZATION IS DECISIVE")
print(" The regularizer enables identity preservation across long contexts.")
print(" This validates Melanie/Tiago's hypothesis about half-life collapse.")
elif "PASS" in with_verdict:
print("\n✓ IDENTITY PRESERVATION CONFIRMED")
print(" Both conditions show identity basin dynamics.")
else:
print("\n✗ FURTHER INVESTIGATION NEEDED")
print(" Identity preservation not confirmed in either condition.")
print("\n" + "=" * 70)
return {
"package_dir": str(package_dir),
"zip_path": zip_path,
"results": all_results
}
def generate_presentation(
collapse_results: dict,
identity_results: dict
) -> str:
"""Generate presentation slides."""
without = identity_results["without_regularization"]["analysis"]
with_reg = identity_results["with_regularization"]["analysis"]
presentation = f"""# Half-Life Regularization for FDRA
## Addressing Long-Context Collapse in Frequency-Domain Recurrent Architectures
**Date:** {datetime.now().strftime("%Y-%m-%d")}
---
# The Problem
## Melanie/Tiago's Discovery
During training at GPT-2 scale:
- All oscillator half-lives collapse to < 10 steps
- Model passes short-context benchmarks
- But fails on long-context QA and summarization
**Key insight:** The model "forgets" early context because no oscillators maintain it.
---
# Half-Life Fundamentals
## What is Half-Life?
For decay parameter λ_i:
```
h_i(t+1) = λ_i * h_i(t) + u_i(t)
```
Half-life τ_i = ln(0.5) / ln(λ_i)
= Number of steps for signal to decay to 50%
## The Collapse
| State | τ Range | Long-range Oscillators |
|-------|---------|------------------------|
| Initial (good) | [1, 4096] | 50% |
| Collapsed (bad) | [2, 10] | 0% |
---
# The Solution
## Half-Life Regularizer
**Goal:** Maintain log-uniform distribution of half-lives
### Loss 1: Log-Uniform Prior
```
z_i = log(τ_i)
L_HL = α*(μ(z) - μ*)² + β*(σ²(z) - σ²*)²
```
### Loss 2: Long-Tail Survival
```
s_i = σ(k * (τ_i - γ*L))
L_tail = max(0, ρ - mean(s_i))²
```
---
# Collapse and Recovery
## Regularizer Demonstration
| State | Loss | τ Range | Long-range |
|-------|------|---------|------------|
| Initial | {collapse_results['initial']['loss']:.6f} | [{collapse_results['initial']['metrics']['tau_min']:.1f}, {collapse_results['initial']['metrics']['tau_max']:.1f}] | {collapse_results['initial']['metrics']['n_long_range']} |
| Collapsed | {collapse_results['collapsed']['loss']:.6f} | [{collapse_results['collapsed']['metrics']['tau_min']:.1f}, {collapse_results['collapsed']['metrics']['tau_max']:.1f}] | {collapse_results['collapsed']['metrics']['n_long_range']} |
| After 1 Step | {collapse_results['regularized']['loss']:.6f} | [{collapse_results['regularized']['metrics']['tau_min']:.1f}, {collapse_results['regularized']['metrics']['tau_max']:.1f}] | {collapse_results['regularized']['metrics']['n_long_range']} |
**The regularizer provides gradients that restore long-range oscillators.**
---
# The Decisive Experiment
## Identity Reconstruction Under Forced Forgetting
**Protocol:**
1. Encode identity invariants (once)
2. Inject K tokens of interference
3. Probe for reconstruction (no hints)
4. Sweep K to find phase transition
**Success Signature:**
- Flat performance → sharp collapse (basin structure)
**Failure Signature:**
- Gradual decay (memory-dependent, not basin)
---
# Results: Without Regularization
| K (tokens) | Preserved | Mean Retention |
|------------|-----------|----------------|
"""
for point in without["preservation_curve"]:
status = "✓" if point["preserved_rate"] >= 0.5 else "✗"
presentation += f"| {point['k']:,} | {point['preserved_rate']:.0%} {status} | {point['mean_retention']:.1%} |\n"
presentation += f"""
**Verdict:** {without['verdict']}
**Critical K:** {without['critical_k']}
**Transition:** {without['transition_type']}
---
# Results: With Regularization
| K (tokens) | Preserved | Mean Retention |
|------------|-----------|----------------|
"""
for point in with_reg["preservation_curve"]:
status = "✓" if point["preserved_rate"] >= 0.5 else "✗"
presentation += f"| {point['k']:,} | {point['preserved_rate']:.0%} {status} | {point['mean_retention']:.1%} |\n"
presentation += f"""
**Verdict:** {with_reg['verdict']}
**Critical K:** {with_reg['critical_k']}
**Transition:** {with_reg['transition_type']}
---
# Comparison
| Metric | Without Regularization | With Regularization |
|--------|------------------------|---------------------|
| Verdict | {without['verdict']} | {with_reg['verdict']} |
| Critical K | {without['critical_k']} | {with_reg['critical_k']} |
| Transition | {without['transition_type']} | {with_reg['transition_type']} |
"""
if "PASS" in with_reg['verdict'] and "FAIL" in without['verdict']:
presentation += """
## ✓ Half-Life Regularization is Decisive
The regularizer enables identity preservation that fails without it.
This validates Melanie/Tiago's hypothesis.
"""
presentation += """
---
# Implications
## For Fractal AGI / FDRA
1. **The problem is identified:** Half-life collapse during training
2. **The fix is surgical:** Add regularizer to training loss
3. **The test is decisive:** Identity reconstruction sweep
## For Long-Context LLMs
- Same mechanism may apply to other recurrent architectures
- Half-life diversity is a necessary condition for long-range coherence
- Regularization is cheaper than architectural changes
---
# Next Steps
1. **Integrate regularizer into training loop**
2. **Test on actual language modeling**
3. **Evaluate on QA and summarization benchmarks**
4. **Compare with Mamba and other SSMs**
---
# Conclusion
> "The system is doing exactly what we trained it to do;
> now we need to train it to value what we actually built it for."
Half-life regularization provides the gradient signal to maintain
long-range memory that training pressure otherwise erases.
**The architecture was right. The training objective was incomplete.**
---
*Presentation generated by run_half_life_experiment.py*
"""
return presentation
def generate_summary(all_results: dict, identity_results: dict) -> str:
"""Generate summary report."""
without = identity_results["without_regularization"]["analysis"]
with_reg = identity_results["with_regularization"]["analysis"]
summary = f"""# Half-Life Regularization Experiment Summary
**Generated:** {datetime.now().isoformat()}
## Overview
This experiment suite addresses the half-life collapse problem discovered by Melanie/Tiago:
> "After training at GPT-2 scale, oscillator half-lives collapse to ~10 steps."
## Key Results
### Collapse and Recovery
The half-life regularizer successfully provides gradients to restore long-range oscillators:
- Initial distribution: Log-uniform over [1, 4096]
- Collapsed distribution: All < 10 steps
- After regularization step: Distribution spreads back toward target
### Identity Reconstruction
| Condition | Verdict | Critical K |
|-----------|---------|------------|
| Without Regularization | {without['verdict']} | {without['critical_k']} |
| With Regularization | {with_reg['verdict']} | {with_reg['critical_k']} |
## Conclusion
"""
if "PASS" in with_reg['verdict'] and "FAIL" in without['verdict']:
summary += """**Half-life regularization is decisive for long-context coherence.**
The experiment confirms:
1. Half-life collapse prevents long-range identity preservation
2. The regularizer restores the capacity for long-context reasoning
3. This validates the hypothesis from Melanie/Tiago's discovery
"""
elif "PASS" in with_reg['verdict']:
summary += """**Identity preservation confirmed.**
Both conditions show basin-like dynamics. The regularizer may provide
additional margin but is not strictly required for the tested range.
"""
else:
summary += """**Further investigation needed.**
Neither condition shows clear identity preservation. This may indicate:
- Architecture needs deeper modifications
- Test parameters need adjustment
- Different identity encoding approach required
"""
summary += """
## Files Included
- `collapse_recovery.json` - Half-life collapse/recovery data
- `identity_reconstruction/` - Full experiment results
- `PRESENTATION_HALF_LIFE_REGULARIZATION.md` - Slides
- `all_results.json` - Complete results data
## Recommendations
1. Integrate `HalfLifeRegularizer` into FDRA training loss
2. Set `lambda1 = 0.01`, `lambda2 = 0.01` as starting points
3. Monitor half-life histogram during training
4. Test on long-context benchmarks (QA, summarization)
---
*Generated by run_half_life_experiment.py*
"""
return summary
if __name__ == "__main__":
run_all_experiments()