fdra-half-life-regularization / experiments /run_half_life_experiment.py

Upload experiments/run_half_life_experiment.py with huggingface_hub

789d905 verified 3 months ago

14.1 kB

	"""
	Unified Runner: Half-Life Regularization + Identity Reconstruction

	This script runs the complete experiment suite:
	1. Demonstrate half-life collapse problem
	2. Show regularizer gradient direction
	3. Run identity reconstruction comparison
	4. Package results with presentation

	Execute: python experiments/run_half_life_experiment.py

	Authors: Half-Life Regularization Experiment Suite
	Date: 2026-01-22
	"""

	import sys
	from pathlib import Path
	from datetime import datetime
	import json
	import shutil

	# Add project root to path
	sys.path.insert(0, str(Path(__file__).parent.parent))

	from training.fdra_oscillators import FDRAOscillatorBank, OscillatorConfig, demo_oscillators
	from training.half_life_regularizer import (
	HalfLifeRegularizer,
	HalfLifeRegularizerConfig,
	simulate_collapse_and_recovery
	)
	from experiments.identity_reconstruction_experiment import (
	run_identity_reconstruction_experiment,
	IdentityReconstructionExperiment,
	OscillatorConfig as OscConfig
	)


	def run_all_experiments(output_dir: str = "outputs/half_life_regularization"):
	"""
	Run all experiments in sequence.
	"""
	print("\n" + "=" * 70)
	print("FDRA HALF-LIFE REGULARIZATION: COMPLETE EXPERIMENT SUITE")
	print("=" * 70)
	print("\nBased on Melanie/Tiago's discovery:")
	print(" 'After training at GPT-2 scale, half-lives collapse to ~10 steps.'")
	print(" 'The model works but fails on long-context reasoning.'")
	print("\nThis suite demonstrates:")
	print(" 1. The half-life collapse problem")
	print(" 2. The mathematical regularizer to fix it")
	print(" 3. Identity reconstruction as the decisive diagnostic")
	print("=" * 70)

	# Create output directory
	ts = datetime.now().strftime("%Y%m%d_%H%M%S")
	package_dir = Path(output_dir) / f"half_life_package_{ts}"
	package_dir.mkdir(parents=True, exist_ok=True)

	all_results = {}

	# --- Part 1: Oscillator Demonstration ---
	print("\n" + "=" * 70)
	print("PART 1: FDRA OSCILLATOR BANK DEMONSTRATION")
	print("=" * 70)

	demo_oscillators()

	# --- Part 2: Half-Life Collapse and Regularization ---
	print("\n" + "=" * 70)
	print("PART 2: HALF-LIFE COLLAPSE AND REGULARIZATION")
	print("=" * 70)

	collapse_results = simulate_collapse_and_recovery()
	all_results["collapse_recovery"] = collapse_results

	# Save collapse results
	with open(package_dir / "collapse_recovery.json", "w") as f:
	json.dump({k: {
	"loss": v["loss"],
	"metrics": {mk: float(mv) if isinstance(mv, (int, float)) else mv
	for mk, mv in v["metrics"].items()}
	} for k, v in collapse_results.items()}, f, indent=2)

	# --- Part 3: Identity Reconstruction Experiment ---
	print("\n" + "=" * 70)
	print("PART 3: IDENTITY RECONSTRUCTION UNDER FORCED FORGETTING")
	print("=" * 70)

	identity_results = run_identity_reconstruction_experiment(
	output_dir=str(package_dir / "identity_reconstruction"),
	verbose=True
	)
	all_results["identity_reconstruction"] = {
	"without_verdict": identity_results["without_regularization"]["analysis"]["verdict"],
	"with_verdict": identity_results["with_regularization"]["analysis"]["verdict"],
	}

	# --- Part 4: Generate Presentation ---
	print("\n" + "=" * 70)
	print("PART 4: GENERATING PRESENTATION")
	print("=" * 70)

	presentation = generate_presentation(collapse_results, identity_results)
	with open(package_dir / "PRESENTATION_HALF_LIFE_REGULARIZATION.md", "w") as f:
	f.write(presentation)
	print(f" Presentation written to: {package_dir}/PRESENTATION_HALF_LIFE_REGULARIZATION.md")

	# --- Part 5: Summary Report ---
	summary_report = generate_summary(all_results, identity_results)
	with open(package_dir / "SUMMARY.md", "w") as f:
	f.write(summary_report)
	print(f" Summary written to: {package_dir}/SUMMARY.md")

	# Save all results
	with open(package_dir / "all_results.json", "w") as f:
	json.dump(all_results, f, indent=2, default=str)

	# --- Part 6: Create ZIP ---
	print("\n" + "=" * 70)
	print("PART 6: PACKAGING")
	print("=" * 70)

	zip_path = shutil.make_archive(str(package_dir), 'zip', package_dir)
	print(f" ZIP archive created: {zip_path}")

	# --- Final Summary ---
	print("\n" + "=" * 70)
	print("EXPERIMENT COMPLETE")
	print("=" * 70)
	print(f"\nPackage location: {package_dir}/")
	print(f"ZIP archive: {zip_path}")
	print("\nContents:")
	for f in package_dir.iterdir():
	print(f" - {f.name}")

	# Print key results
	print("\n" + "-" * 70)
	print("KEY FINDINGS")
	print("-" * 70)

	without_verdict = identity_results["without_regularization"]["analysis"]["verdict"]
	with_verdict = identity_results["with_regularization"]["analysis"]["verdict"]

	print(f"\nWithout Half-Life Regularization: {without_verdict}")
	print(f"With Half-Life Regularization: {with_verdict}")

	if "PASS" in with_verdict and "FAIL" in without_verdict:
	print("\n✓ HALF-LIFE REGULARIZATION IS DECISIVE")
	print(" The regularizer enables identity preservation across long contexts.")
	print(" This validates Melanie/Tiago's hypothesis about half-life collapse.")
	elif "PASS" in with_verdict:
	print("\n✓ IDENTITY PRESERVATION CONFIRMED")
	print(" Both conditions show identity basin dynamics.")
	else:
	print("\n✗ FURTHER INVESTIGATION NEEDED")
	print(" Identity preservation not confirmed in either condition.")

	print("\n" + "=" * 70)

	return {
	"package_dir": str(package_dir),
	"zip_path": zip_path,
	"results": all_results
	}


	def generate_presentation(
	collapse_results: dict,
	identity_results: dict
	) -> str:
	"""Generate presentation slides."""

	without = identity_results["without_regularization"]["analysis"]
	with_reg = identity_results["with_regularization"]["analysis"]

	presentation = f"""# Half-Life Regularization for FDRA
	## Addressing Long-Context Collapse in Frequency-Domain Recurrent Architectures

	Date: {datetime.now().strftime("%Y-%m-%d")}

	---

	# The Problem

	## Melanie/Tiago's Discovery

	During training at GPT-2 scale:
	- All oscillator half-lives collapse to < 10 steps
	- Model passes short-context benchmarks
	- But fails on long-context QA and summarization

	Key insight: The model "forgets" early context because no oscillators maintain it.

	---

	# Half-Life Fundamentals

	## What is Half-Life?

	For decay parameter λ_i:
	```
	h_i(t+1) = λ_i * h_i(t) + u_i(t)
	```

	Half-life τ_i = ln(0.5) / ln(λ_i)
	= Number of steps for signal to decay to 50%

	## The Collapse

	\| State \| τ Range \| Long-range Oscillators \|
	\|-------\|---------\|------------------------\|
	\| Initial (good) \| [1, 4096] \| 50% \|
	\| Collapsed (bad) \| [2, 10] \| 0% \|

	---

	# The Solution

	## Half-Life Regularizer

	Goal: Maintain log-uniform distribution of half-lives

	### Loss 1: Log-Uniform Prior
	```
	z_i = log(τ_i)
	L_HL = α(μ(z) - μ)² + β(σ²(z) - σ²)²
	```

	### Loss 2: Long-Tail Survival
	```
	s_i = σ(k * (τ_i - γ*L))
	L_tail = max(0, ρ - mean(s_i))²
	```

	---

	# Collapse and Recovery

	## Regularizer Demonstration

	\| State \| Loss \| τ Range \| Long-range \|
	\|-------\|------\|---------\|------------\|
	\| Initial \| {collapse_results['initial']['loss']:.6f} \| [{collapse_results['initial']['metrics']['tau_min']:.1f}, {collapse_results['initial']['metrics']['tau_max']:.1f}] \| {collapse_results['initial']['metrics']['n_long_range']} \|
	\| Collapsed \| {collapse_results['collapsed']['loss']:.6f} \| [{collapse_results['collapsed']['metrics']['tau_min']:.1f}, {collapse_results['collapsed']['metrics']['tau_max']:.1f}] \| {collapse_results['collapsed']['metrics']['n_long_range']} \|
	\| After 1 Step \| {collapse_results['regularized']['loss']:.6f} \| [{collapse_results['regularized']['metrics']['tau_min']:.1f}, {collapse_results['regularized']['metrics']['tau_max']:.1f}] \| {collapse_results['regularized']['metrics']['n_long_range']} \|

	The regularizer provides gradients that restore long-range oscillators.

	---

	# The Decisive Experiment

	## Identity Reconstruction Under Forced Forgetting

	Protocol:
	1. Encode identity invariants (once)
	2. Inject K tokens of interference
	3. Probe for reconstruction (no hints)
	4. Sweep K to find phase transition

	Success Signature:
	- Flat performance → sharp collapse (basin structure)

	Failure Signature:
	- Gradual decay (memory-dependent, not basin)

	---

	# Results: Without Regularization

	\| K (tokens) \| Preserved \| Mean Retention \|
	\|------------\|-----------\|----------------\|
	"""

	for point in without["preservation_curve"]:
	status = "✓" if point["preserved_rate"] >= 0.5 else "✗"
	presentation += f"\| {point['k']:,} \| {point['preserved_rate']:.0%} {status} \| {point['mean_retention']:.1%} \|\n"

	presentation += f"""
	Verdict: {without['verdict']}
	Critical K: {without['critical_k']}
	Transition: {without['transition_type']}

	---

	# Results: With Regularization

	\| K (tokens) \| Preserved \| Mean Retention \|
	\|------------\|-----------\|----------------\|
	"""

	for point in with_reg["preservation_curve"]:
	status = "✓" if point["preserved_rate"] >= 0.5 else "✗"
	presentation += f"\| {point['k']:,} \| {point['preserved_rate']:.0%} {status} \| {point['mean_retention']:.1%} \|\n"

	presentation += f"""
	Verdict: {with_reg['verdict']}
	Critical K: {with_reg['critical_k']}
	Transition: {with_reg['transition_type']}

	---

	# Comparison

	\| Metric \| Without Regularization \| With Regularization \|
	\|--------\|------------------------\|---------------------\|
	\| Verdict \| {without['verdict']} \| {with_reg['verdict']} \|
	\| Critical K \| {without['critical_k']} \| {with_reg['critical_k']} \|
	\| Transition \| {without['transition_type']} \| {with_reg['transition_type']} \|

	"""

	if "PASS" in with_reg['verdict'] and "FAIL" in without['verdict']:
	presentation += """
	## ✓ Half-Life Regularization is Decisive

	The regularizer enables identity preservation that fails without it.
	This validates Melanie/Tiago's hypothesis.
	"""

	presentation += """
	---

	# Implications

	## For Fractal AGI / FDRA

	1. The problem is identified: Half-life collapse during training
	2. The fix is surgical: Add regularizer to training loss
	3. The test is decisive: Identity reconstruction sweep

	## For Long-Context LLMs

	- Same mechanism may apply to other recurrent architectures
	- Half-life diversity is a necessary condition for long-range coherence
	- Regularization is cheaper than architectural changes

	---

	# Next Steps

	1. Integrate regularizer into training loop
	2. Test on actual language modeling
	3. Evaluate on QA and summarization benchmarks
	4. Compare with Mamba and other SSMs

	---

	# Conclusion

	> "The system is doing exactly what we trained it to do;
	> now we need to train it to value what we actually built it for."

	Half-life regularization provides the gradient signal to maintain
	long-range memory that training pressure otherwise erases.

	The architecture was right. The training objective was incomplete.

	---

	Presentation generated by run_half_life_experiment.py
	"""

	return presentation


	def generate_summary(all_results: dict, identity_results: dict) -> str:
	"""Generate summary report."""

	without = identity_results["without_regularization"]["analysis"]
	with_reg = identity_results["with_regularization"]["analysis"]

	summary = f"""# Half-Life Regularization Experiment Summary

	Generated: {datetime.now().isoformat()}

	## Overview

	This experiment suite addresses the half-life collapse problem discovered by Melanie/Tiago:
	> "After training at GPT-2 scale, oscillator half-lives collapse to ~10 steps."

	## Key Results

	### Collapse and Recovery

	The half-life regularizer successfully provides gradients to restore long-range oscillators:
	- Initial distribution: Log-uniform over [1, 4096]
	- Collapsed distribution: All < 10 steps
	- After regularization step: Distribution spreads back toward target

	### Identity Reconstruction

	\| Condition \| Verdict \| Critical K \|
	\|-----------\|---------\|------------\|
	\| Without Regularization \| {without['verdict']} \| {without['critical_k']} \|
	\| With Regularization \| {with_reg['verdict']} \| {with_reg['critical_k']} \|

	## Conclusion

	"""

	if "PASS" in with_reg['verdict'] and "FAIL" in without['verdict']:
	summary += """Half-life regularization is decisive for long-context coherence.

	The experiment confirms:
	1. Half-life collapse prevents long-range identity preservation
	2. The regularizer restores the capacity for long-context reasoning
	3. This validates the hypothesis from Melanie/Tiago's discovery
	"""
	elif "PASS" in with_reg['verdict']:
	summary += """Identity preservation confirmed.

	Both conditions show basin-like dynamics. The regularizer may provide
	additional margin but is not strictly required for the tested range.
	"""
	else:
	summary += """Further investigation needed.

	Neither condition shows clear identity preservation. This may indicate:
	- Architecture needs deeper modifications
	- Test parameters need adjustment
	- Different identity encoding approach required
	"""

	summary += """
	## Files Included

	- `collapse_recovery.json` - Half-life collapse/recovery data
	- `identity_reconstruction/` - Full experiment results
	- `PRESENTATION_HALF_LIFE_REGULARIZATION.md` - Slides
	- `all_results.json` - Complete results data

	## Recommendations

	1. Integrate `HalfLifeRegularizer` into FDRA training loss
	2. Set `lambda1 = 0.01`, `lambda2 = 0.01` as starting points
	3. Monitor half-life histogram during training
	4. Test on long-context benchmarks (QA, summarization)

	---

	Generated by run_half_life_experiment.py
	"""

	return summary


	if __name__ == "__main__":
	run_all_experiments()