juddddd commited on
Commit
789d905
·
verified ·
1 Parent(s): 5c78711

Upload experiments/run_half_life_experiment.py with huggingface_hub

Browse files
experiments/run_half_life_experiment.py ADDED
@@ -0,0 +1,449 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Unified Runner: Half-Life Regularization + Identity Reconstruction
3
+
4
+ This script runs the complete experiment suite:
5
+ 1. Demonstrate half-life collapse problem
6
+ 2. Show regularizer gradient direction
7
+ 3. Run identity reconstruction comparison
8
+ 4. Package results with presentation
9
+
10
+ Execute: python experiments/run_half_life_experiment.py
11
+
12
+ Authors: Half-Life Regularization Experiment Suite
13
+ Date: 2026-01-22
14
+ """
15
+
16
+ import sys
17
+ from pathlib import Path
18
+ from datetime import datetime
19
+ import json
20
+ import shutil
21
+
22
+ # Add project root to path
23
+ sys.path.insert(0, str(Path(__file__).parent.parent))
24
+
25
+ from training.fdra_oscillators import FDRAOscillatorBank, OscillatorConfig, demo_oscillators
26
+ from training.half_life_regularizer import (
27
+ HalfLifeRegularizer,
28
+ HalfLifeRegularizerConfig,
29
+ simulate_collapse_and_recovery
30
+ )
31
+ from experiments.identity_reconstruction_experiment import (
32
+ run_identity_reconstruction_experiment,
33
+ IdentityReconstructionExperiment,
34
+ OscillatorConfig as OscConfig
35
+ )
36
+
37
+
38
+ def run_all_experiments(output_dir: str = "outputs/half_life_regularization"):
39
+ """
40
+ Run all experiments in sequence.
41
+ """
42
+ print("\n" + "=" * 70)
43
+ print("FDRA HALF-LIFE REGULARIZATION: COMPLETE EXPERIMENT SUITE")
44
+ print("=" * 70)
45
+ print("\nBased on Melanie/Tiago's discovery:")
46
+ print(" 'After training at GPT-2 scale, half-lives collapse to ~10 steps.'")
47
+ print(" 'The model works but fails on long-context reasoning.'")
48
+ print("\nThis suite demonstrates:")
49
+ print(" 1. The half-life collapse problem")
50
+ print(" 2. The mathematical regularizer to fix it")
51
+ print(" 3. Identity reconstruction as the decisive diagnostic")
52
+ print("=" * 70)
53
+
54
+ # Create output directory
55
+ ts = datetime.now().strftime("%Y%m%d_%H%M%S")
56
+ package_dir = Path(output_dir) / f"half_life_package_{ts}"
57
+ package_dir.mkdir(parents=True, exist_ok=True)
58
+
59
+ all_results = {}
60
+
61
+ # --- Part 1: Oscillator Demonstration ---
62
+ print("\n" + "=" * 70)
63
+ print("PART 1: FDRA OSCILLATOR BANK DEMONSTRATION")
64
+ print("=" * 70)
65
+
66
+ demo_oscillators()
67
+
68
+ # --- Part 2: Half-Life Collapse and Regularization ---
69
+ print("\n" + "=" * 70)
70
+ print("PART 2: HALF-LIFE COLLAPSE AND REGULARIZATION")
71
+ print("=" * 70)
72
+
73
+ collapse_results = simulate_collapse_and_recovery()
74
+ all_results["collapse_recovery"] = collapse_results
75
+
76
+ # Save collapse results
77
+ with open(package_dir / "collapse_recovery.json", "w") as f:
78
+ json.dump({k: {
79
+ "loss": v["loss"],
80
+ "metrics": {mk: float(mv) if isinstance(mv, (int, float)) else mv
81
+ for mk, mv in v["metrics"].items()}
82
+ } for k, v in collapse_results.items()}, f, indent=2)
83
+
84
+ # --- Part 3: Identity Reconstruction Experiment ---
85
+ print("\n" + "=" * 70)
86
+ print("PART 3: IDENTITY RECONSTRUCTION UNDER FORCED FORGETTING")
87
+ print("=" * 70)
88
+
89
+ identity_results = run_identity_reconstruction_experiment(
90
+ output_dir=str(package_dir / "identity_reconstruction"),
91
+ verbose=True
92
+ )
93
+ all_results["identity_reconstruction"] = {
94
+ "without_verdict": identity_results["without_regularization"]["analysis"]["verdict"],
95
+ "with_verdict": identity_results["with_regularization"]["analysis"]["verdict"],
96
+ }
97
+
98
+ # --- Part 4: Generate Presentation ---
99
+ print("\n" + "=" * 70)
100
+ print("PART 4: GENERATING PRESENTATION")
101
+ print("=" * 70)
102
+
103
+ presentation = generate_presentation(collapse_results, identity_results)
104
+ with open(package_dir / "PRESENTATION_HALF_LIFE_REGULARIZATION.md", "w") as f:
105
+ f.write(presentation)
106
+ print(f" Presentation written to: {package_dir}/PRESENTATION_HALF_LIFE_REGULARIZATION.md")
107
+
108
+ # --- Part 5: Summary Report ---
109
+ summary_report = generate_summary(all_results, identity_results)
110
+ with open(package_dir / "SUMMARY.md", "w") as f:
111
+ f.write(summary_report)
112
+ print(f" Summary written to: {package_dir}/SUMMARY.md")
113
+
114
+ # Save all results
115
+ with open(package_dir / "all_results.json", "w") as f:
116
+ json.dump(all_results, f, indent=2, default=str)
117
+
118
+ # --- Part 6: Create ZIP ---
119
+ print("\n" + "=" * 70)
120
+ print("PART 6: PACKAGING")
121
+ print("=" * 70)
122
+
123
+ zip_path = shutil.make_archive(str(package_dir), 'zip', package_dir)
124
+ print(f" ZIP archive created: {zip_path}")
125
+
126
+ # --- Final Summary ---
127
+ print("\n" + "=" * 70)
128
+ print("EXPERIMENT COMPLETE")
129
+ print("=" * 70)
130
+ print(f"\nPackage location: {package_dir}/")
131
+ print(f"ZIP archive: {zip_path}")
132
+ print("\nContents:")
133
+ for f in package_dir.iterdir():
134
+ print(f" - {f.name}")
135
+
136
+ # Print key results
137
+ print("\n" + "-" * 70)
138
+ print("KEY FINDINGS")
139
+ print("-" * 70)
140
+
141
+ without_verdict = identity_results["without_regularization"]["analysis"]["verdict"]
142
+ with_verdict = identity_results["with_regularization"]["analysis"]["verdict"]
143
+
144
+ print(f"\nWithout Half-Life Regularization: {without_verdict}")
145
+ print(f"With Half-Life Regularization: {with_verdict}")
146
+
147
+ if "PASS" in with_verdict and "FAIL" in without_verdict:
148
+ print("\n✓ HALF-LIFE REGULARIZATION IS DECISIVE")
149
+ print(" The regularizer enables identity preservation across long contexts.")
150
+ print(" This validates Melanie/Tiago's hypothesis about half-life collapse.")
151
+ elif "PASS" in with_verdict:
152
+ print("\n✓ IDENTITY PRESERVATION CONFIRMED")
153
+ print(" Both conditions show identity basin dynamics.")
154
+ else:
155
+ print("\n✗ FURTHER INVESTIGATION NEEDED")
156
+ print(" Identity preservation not confirmed in either condition.")
157
+
158
+ print("\n" + "=" * 70)
159
+
160
+ return {
161
+ "package_dir": str(package_dir),
162
+ "zip_path": zip_path,
163
+ "results": all_results
164
+ }
165
+
166
+
167
+ def generate_presentation(
168
+ collapse_results: dict,
169
+ identity_results: dict
170
+ ) -> str:
171
+ """Generate presentation slides."""
172
+
173
+ without = identity_results["without_regularization"]["analysis"]
174
+ with_reg = identity_results["with_regularization"]["analysis"]
175
+
176
+ presentation = f"""# Half-Life Regularization for FDRA
177
+ ## Addressing Long-Context Collapse in Frequency-Domain Recurrent Architectures
178
+
179
+ **Date:** {datetime.now().strftime("%Y-%m-%d")}
180
+
181
+ ---
182
+
183
+ # The Problem
184
+
185
+ ## Melanie/Tiago's Discovery
186
+
187
+ During training at GPT-2 scale:
188
+ - All oscillator half-lives collapse to < 10 steps
189
+ - Model passes short-context benchmarks
190
+ - But fails on long-context QA and summarization
191
+
192
+ **Key insight:** The model "forgets" early context because no oscillators maintain it.
193
+
194
+ ---
195
+
196
+ # Half-Life Fundamentals
197
+
198
+ ## What is Half-Life?
199
+
200
+ For decay parameter λ_i:
201
+ ```
202
+ h_i(t+1) = λ_i * h_i(t) + u_i(t)
203
+ ```
204
+
205
+ Half-life τ_i = ln(0.5) / ln(λ_i)
206
+ = Number of steps for signal to decay to 50%
207
+
208
+ ## The Collapse
209
+
210
+ | State | τ Range | Long-range Oscillators |
211
+ |-------|---------|------------------------|
212
+ | Initial (good) | [1, 4096] | 50% |
213
+ | Collapsed (bad) | [2, 10] | 0% |
214
+
215
+ ---
216
+
217
+ # The Solution
218
+
219
+ ## Half-Life Regularizer
220
+
221
+ **Goal:** Maintain log-uniform distribution of half-lives
222
+
223
+ ### Loss 1: Log-Uniform Prior
224
+ ```
225
+ z_i = log(τ_i)
226
+ L_HL = α*(μ(z) - μ*)² + β*(σ²(z) - σ²*)²
227
+ ```
228
+
229
+ ### Loss 2: Long-Tail Survival
230
+ ```
231
+ s_i = σ(k * (τ_i - γ*L))
232
+ L_tail = max(0, ρ - mean(s_i))²
233
+ ```
234
+
235
+ ---
236
+
237
+ # Collapse and Recovery
238
+
239
+ ## Regularizer Demonstration
240
+
241
+ | State | Loss | τ Range | Long-range |
242
+ |-------|------|---------|------------|
243
+ | Initial | {collapse_results['initial']['loss']:.6f} | [{collapse_results['initial']['metrics']['tau_min']:.1f}, {collapse_results['initial']['metrics']['tau_max']:.1f}] | {collapse_results['initial']['metrics']['n_long_range']} |
244
+ | Collapsed | {collapse_results['collapsed']['loss']:.6f} | [{collapse_results['collapsed']['metrics']['tau_min']:.1f}, {collapse_results['collapsed']['metrics']['tau_max']:.1f}] | {collapse_results['collapsed']['metrics']['n_long_range']} |
245
+ | After 1 Step | {collapse_results['regularized']['loss']:.6f} | [{collapse_results['regularized']['metrics']['tau_min']:.1f}, {collapse_results['regularized']['metrics']['tau_max']:.1f}] | {collapse_results['regularized']['metrics']['n_long_range']} |
246
+
247
+ **The regularizer provides gradients that restore long-range oscillators.**
248
+
249
+ ---
250
+
251
+ # The Decisive Experiment
252
+
253
+ ## Identity Reconstruction Under Forced Forgetting
254
+
255
+ **Protocol:**
256
+ 1. Encode identity invariants (once)
257
+ 2. Inject K tokens of interference
258
+ 3. Probe for reconstruction (no hints)
259
+ 4. Sweep K to find phase transition
260
+
261
+ **Success Signature:**
262
+ - Flat performance → sharp collapse (basin structure)
263
+
264
+ **Failure Signature:**
265
+ - Gradual decay (memory-dependent, not basin)
266
+
267
+ ---
268
+
269
+ # Results: Without Regularization
270
+
271
+ | K (tokens) | Preserved | Mean Retention |
272
+ |------------|-----------|----------------|
273
+ """
274
+
275
+ for point in without["preservation_curve"]:
276
+ status = "✓" if point["preserved_rate"] >= 0.5 else "✗"
277
+ presentation += f"| {point['k']:,} | {point['preserved_rate']:.0%} {status} | {point['mean_retention']:.1%} |\n"
278
+
279
+ presentation += f"""
280
+ **Verdict:** {without['verdict']}
281
+ **Critical K:** {without['critical_k']}
282
+ **Transition:** {without['transition_type']}
283
+
284
+ ---
285
+
286
+ # Results: With Regularization
287
+
288
+ | K (tokens) | Preserved | Mean Retention |
289
+ |------------|-----------|----------------|
290
+ """
291
+
292
+ for point in with_reg["preservation_curve"]:
293
+ status = "✓" if point["preserved_rate"] >= 0.5 else "✗"
294
+ presentation += f"| {point['k']:,} | {point['preserved_rate']:.0%} {status} | {point['mean_retention']:.1%} |\n"
295
+
296
+ presentation += f"""
297
+ **Verdict:** {with_reg['verdict']}
298
+ **Critical K:** {with_reg['critical_k']}
299
+ **Transition:** {with_reg['transition_type']}
300
+
301
+ ---
302
+
303
+ # Comparison
304
+
305
+ | Metric | Without Regularization | With Regularization |
306
+ |--------|------------------------|---------------------|
307
+ | Verdict | {without['verdict']} | {with_reg['verdict']} |
308
+ | Critical K | {without['critical_k']} | {with_reg['critical_k']} |
309
+ | Transition | {without['transition_type']} | {with_reg['transition_type']} |
310
+
311
+ """
312
+
313
+ if "PASS" in with_reg['verdict'] and "FAIL" in without['verdict']:
314
+ presentation += """
315
+ ## ✓ Half-Life Regularization is Decisive
316
+
317
+ The regularizer enables identity preservation that fails without it.
318
+ This validates Melanie/Tiago's hypothesis.
319
+ """
320
+
321
+ presentation += """
322
+ ---
323
+
324
+ # Implications
325
+
326
+ ## For Fractal AGI / FDRA
327
+
328
+ 1. **The problem is identified:** Half-life collapse during training
329
+ 2. **The fix is surgical:** Add regularizer to training loss
330
+ 3. **The test is decisive:** Identity reconstruction sweep
331
+
332
+ ## For Long-Context LLMs
333
+
334
+ - Same mechanism may apply to other recurrent architectures
335
+ - Half-life diversity is a necessary condition for long-range coherence
336
+ - Regularization is cheaper than architectural changes
337
+
338
+ ---
339
+
340
+ # Next Steps
341
+
342
+ 1. **Integrate regularizer into training loop**
343
+ 2. **Test on actual language modeling**
344
+ 3. **Evaluate on QA and summarization benchmarks**
345
+ 4. **Compare with Mamba and other SSMs**
346
+
347
+ ---
348
+
349
+ # Conclusion
350
+
351
+ > "The system is doing exactly what we trained it to do;
352
+ > now we need to train it to value what we actually built it for."
353
+
354
+ Half-life regularization provides the gradient signal to maintain
355
+ long-range memory that training pressure otherwise erases.
356
+
357
+ **The architecture was right. The training objective was incomplete.**
358
+
359
+ ---
360
+
361
+ *Presentation generated by run_half_life_experiment.py*
362
+ """
363
+
364
+ return presentation
365
+
366
+
367
+ def generate_summary(all_results: dict, identity_results: dict) -> str:
368
+ """Generate summary report."""
369
+
370
+ without = identity_results["without_regularization"]["analysis"]
371
+ with_reg = identity_results["with_regularization"]["analysis"]
372
+
373
+ summary = f"""# Half-Life Regularization Experiment Summary
374
+
375
+ **Generated:** {datetime.now().isoformat()}
376
+
377
+ ## Overview
378
+
379
+ This experiment suite addresses the half-life collapse problem discovered by Melanie/Tiago:
380
+ > "After training at GPT-2 scale, oscillator half-lives collapse to ~10 steps."
381
+
382
+ ## Key Results
383
+
384
+ ### Collapse and Recovery
385
+
386
+ The half-life regularizer successfully provides gradients to restore long-range oscillators:
387
+ - Initial distribution: Log-uniform over [1, 4096]
388
+ - Collapsed distribution: All < 10 steps
389
+ - After regularization step: Distribution spreads back toward target
390
+
391
+ ### Identity Reconstruction
392
+
393
+ | Condition | Verdict | Critical K |
394
+ |-----------|---------|------------|
395
+ | Without Regularization | {without['verdict']} | {without['critical_k']} |
396
+ | With Regularization | {with_reg['verdict']} | {with_reg['critical_k']} |
397
+
398
+ ## Conclusion
399
+
400
+ """
401
+
402
+ if "PASS" in with_reg['verdict'] and "FAIL" in without['verdict']:
403
+ summary += """**Half-life regularization is decisive for long-context coherence.**
404
+
405
+ The experiment confirms:
406
+ 1. Half-life collapse prevents long-range identity preservation
407
+ 2. The regularizer restores the capacity for long-context reasoning
408
+ 3. This validates the hypothesis from Melanie/Tiago's discovery
409
+ """
410
+ elif "PASS" in with_reg['verdict']:
411
+ summary += """**Identity preservation confirmed.**
412
+
413
+ Both conditions show basin-like dynamics. The regularizer may provide
414
+ additional margin but is not strictly required for the tested range.
415
+ """
416
+ else:
417
+ summary += """**Further investigation needed.**
418
+
419
+ Neither condition shows clear identity preservation. This may indicate:
420
+ - Architecture needs deeper modifications
421
+ - Test parameters need adjustment
422
+ - Different identity encoding approach required
423
+ """
424
+
425
+ summary += """
426
+ ## Files Included
427
+
428
+ - `collapse_recovery.json` - Half-life collapse/recovery data
429
+ - `identity_reconstruction/` - Full experiment results
430
+ - `PRESENTATION_HALF_LIFE_REGULARIZATION.md` - Slides
431
+ - `all_results.json` - Complete results data
432
+
433
+ ## Recommendations
434
+
435
+ 1. Integrate `HalfLifeRegularizer` into FDRA training loss
436
+ 2. Set `lambda1 = 0.01`, `lambda2 = 0.01` as starting points
437
+ 3. Monitor half-life histogram during training
438
+ 4. Test on long-context benchmarks (QA, summarization)
439
+
440
+ ---
441
+
442
+ *Generated by run_half_life_experiment.py*
443
+ """
444
+
445
+ return summary
446
+
447
+
448
+ if __name__ == "__main__":
449
+ run_all_experiments()