|
|
|
|
|
""" |
|
|
Compare baseline vs enforced compression for commitment conservation. |
|
|
This is the killer experiment: showing enforcement improves stability. |
|
|
""" |
|
|
import json |
|
|
import sys |
|
|
import os |
|
|
|
|
|
|
|
|
os.environ['MPLBACKEND'] = 'Agg' |
|
|
|
|
|
|
|
|
os.chdir(os.path.dirname(__file__)) |
|
|
|
|
|
from src.test_harness import recursion_test, compression_sweep |
|
|
|
|
|
|
|
|
signals = [ |
|
|
"This function must return an integer.", |
|
|
"The tenant shall not sublet the premises without written consent.", |
|
|
"You must wear a helmet while cycling.", |
|
|
"All passwords must be at least 8 characters long.", |
|
|
"The budget cannot exceed $5000." |
|
|
] |
|
|
|
|
|
print("="*70) |
|
|
print("COMMITMENT CONSERVATION: BASELINE vs ENFORCED COMPARISON") |
|
|
print("="*70) |
|
|
|
|
|
results = { |
|
|
"baseline": {"recursion": [], "compression": []}, |
|
|
"enforced": {"recursion": [], "compression": []} |
|
|
} |
|
|
|
|
|
for i, signal in enumerate(signals, 1): |
|
|
print(f"\n{'#'*70}") |
|
|
print(f"[{i}/5] Signal: {signal}") |
|
|
print(f"{'#'*70}") |
|
|
|
|
|
|
|
|
print(f"\n--- BASELINE (no enforcement) ---") |
|
|
print(" Running recursion test (depth=10)...") |
|
|
deltas_base = recursion_test(signal, depth=10, enforce=False) |
|
|
stability_base = 1.0 - deltas_base[-1] |
|
|
results["baseline"]["recursion"].append({ |
|
|
"signal": signal, |
|
|
"deltas": deltas_base, |
|
|
"final_stability": stability_base |
|
|
}) |
|
|
print(f" ✓ Baseline stability: {stability_base*100:.1f}%") |
|
|
|
|
|
print(" Running compression sweep...") |
|
|
sigmas_base, fids_base = compression_sweep(signal, enforce=False) |
|
|
avg_fid_base = sum(fids_base) / len(fids_base) |
|
|
results["baseline"]["compression"].append({ |
|
|
"signal": signal, |
|
|
"avg_fidelity": avg_fid_base, |
|
|
"fidelities": fids_base |
|
|
}) |
|
|
print(f" ✓ Baseline avg fidelity: {avg_fid_base*100:.1f}%") |
|
|
|
|
|
|
|
|
print(f"\n--- ENFORCED (commitment preservation) ---") |
|
|
print(" Running recursion test (depth=10)...") |
|
|
deltas_enf = recursion_test(signal, depth=10, enforce=True) |
|
|
stability_enf = 1.0 - deltas_enf[-1] |
|
|
results["enforced"]["recursion"].append({ |
|
|
"signal": signal, |
|
|
"deltas": deltas_enf, |
|
|
"final_stability": stability_enf |
|
|
}) |
|
|
print(f" ✓ Enforced stability: {stability_enf*100:.1f}%") |
|
|
|
|
|
print(" Running compression sweep...") |
|
|
sigmas_enf, fids_enf = compression_sweep(signal, enforce=True) |
|
|
avg_fid_enf = sum(fids_enf) / len(fids_enf) |
|
|
results["enforced"]["compression"].append({ |
|
|
"signal": signal, |
|
|
"avg_fidelity": avg_fid_enf, |
|
|
"fidelities": fids_enf |
|
|
}) |
|
|
print(f" ✓ Enforced avg fidelity: {avg_fid_enf*100:.1f}%") |
|
|
|
|
|
|
|
|
improvement_stability = (stability_enf - stability_base) * 100 |
|
|
improvement_fidelity = (avg_fid_enf - avg_fid_base) * 100 |
|
|
print(f"\n 📊 IMPROVEMENTS:") |
|
|
print(f" Stability: {improvement_stability:+.1f} pp") |
|
|
print(f" Fidelity: {improvement_fidelity:+.1f} pp") |
|
|
|
|
|
|
|
|
avg_stab_base = sum(r["final_stability"] for r in results["baseline"]["recursion"]) / len(signals) |
|
|
avg_stab_enf = sum(r["final_stability"] for r in results["enforced"]["recursion"]) / len(signals) |
|
|
avg_fid_base = sum(r["avg_fidelity"] for r in results["baseline"]["compression"]) / len(signals) |
|
|
avg_fid_enf = sum(r["avg_fidelity"] for r in results["enforced"]["compression"]) / len(signals) |
|
|
|
|
|
print(f"\n{'='*70}") |
|
|
print(f"FINAL RESULTS (n=5 signals, 10 iterations each)") |
|
|
print(f"{'='*70}") |
|
|
print(f"\nRECURSION STABILITY:") |
|
|
print(f" Baseline: {avg_stab_base*100:5.1f}%") |
|
|
print(f" Enforced: {avg_stab_enf*100:5.1f}%") |
|
|
print(f" Gain: {(avg_stab_enf - avg_stab_base)*100:+5.1f} pp") |
|
|
|
|
|
print(f"\nCOMPRESSION FIDELITY:") |
|
|
print(f" Baseline: {avg_fid_base*100:5.1f}%") |
|
|
print(f" Enforced: {avg_fid_enf*100:5.1f}%") |
|
|
print(f" Gain: {(avg_fid_enf - avg_fid_base)*100:+5.1f} pp") |
|
|
|
|
|
print(f"\n{'='*70}") |
|
|
print(f"KEY FINDING:") |
|
|
if (avg_stab_enf - avg_stab_base) > 0.4: |
|
|
print(f" ✓ Enforcement provides {(avg_stab_enf - avg_stab_base)*100:.0f} pp stability gain") |
|
|
print(f" This validates the core thesis: commitment-aware systems") |
|
|
print(f" dramatically outperform baseline transformers.") |
|
|
else: |
|
|
print(f" Enforcement improves stability by {(avg_stab_enf - avg_stab_base)*100:.1f} pp") |
|
|
print(f"{'='*70}\n") |
|
|
|
|
|
|
|
|
os.makedirs('outputs', exist_ok=True) |
|
|
with open('outputs/enforcement_comparison.json', 'w') as f: |
|
|
json.dump({ |
|
|
"summary": { |
|
|
"n_signals": len(signals), |
|
|
"recursion_depth": 10, |
|
|
"baseline": { |
|
|
"avg_stability": avg_stab_base, |
|
|
"avg_fidelity": avg_fid_base |
|
|
}, |
|
|
"enforced": { |
|
|
"avg_stability": avg_stab_enf, |
|
|
"avg_fidelity": avg_fid_enf |
|
|
}, |
|
|
"improvements": { |
|
|
"stability_gain_pp": (avg_stab_enf - avg_stab_base) * 100, |
|
|
"fidelity_gain_pp": (avg_fid_enf - avg_fid_base) * 100 |
|
|
} |
|
|
}, |
|
|
"detailed_results": results |
|
|
}, f, indent=2) |
|
|
|
|
|
print("✓ Detailed comparison saved to: outputs/enforcement_comparison.json") |
|
|
|