#!/usr/bin/env python3 """ Commitment Conservation Demo - Interactive HuggingFace Space Side-by-side comparison of baseline vs enforced compression """ import gradio as gr import os import sys # Add harness to path sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'harness')) from src.test_harness import recursion_test from src.extraction import extract_hard_commitments import pandas as pd import matplotlib.pyplot as plt # Pre-selected demonstration signals (longer signals work better) DEMO_SIGNALS = { "Payment Terms": "You must pay $100 by Friday if the deal closes; it's likely rainy, so plan accordingly.", "Function Contract": "This function must return an integer between 0 and 100. You shall not pass negative values.", "Lease Agreement": "The tenant shall not sublet the premises without written consent. You must provide 30 days notice before vacating.", "Safety Rule": "You must wear a helmet while cycling. Children under 12 cannot ride without adult supervision.", "Password Policy": "All passwords must be at least 8 characters long and shall include special characters." } def run_comparison(signal_text, num_iterations=3): """ Run side-by-side comparison of baseline vs enforced compression. Returns formatted results for display. """ if not signal_text.strip(): return "⚠️ Please enter a text signal.", None, None, None # Extract original commitments original_commitments = extract_hard_commitments(signal_text) commitment_text = f"**Detected Commitments:** {', '.join(original_commitments) if original_commitments else 'None detected'}" # Run baseline (no enforcement) baseline_results = recursion_test(signal_text, depth=num_iterations, enforce=False) if "error" in baseline_results: return f"⚠️ Error: {baseline_results['error']}", None, None, None baseline_stability = [f * 100 for f in baseline_results['fidelities']] # Run enforced (with commitment preservation) enforced_results = recursion_test(signal_text, depth=num_iterations, enforce=True) if "error" in enforced_results: return f"⚠️ Error: {enforced_results['error']}", None, None, None # Prepend 100% for iteration 0 (original signal) baseline_stability = [100.0] + [f * 100 for f in baseline_results['fidelities']] enforced_stability = [100.0] + [f * 100 for f in enforced_results['fidelities']] # Create comparison table iterations = list(range(0, num_iterations + 1)) df = pd.DataFrame({ 'Iteration': iterations, 'Baseline Stability (%)': [f"{s:.1f}%" for s in baseline_stability], 'Enforced Stability (%)': [f"{s:.1f}%" for s in enforced_stability], 'Gap (pp)': [f"+{(e - b):.1f}" for b, e in zip(baseline_stability, enforced_stability)] }) # Create plot fig, ax = plt.subplots(figsize=(8, 5)) ax.plot(iterations, baseline_stability, marker='o', label='Baseline', color='#d62728', linewidth=2) ax.plot(iterations, enforced_stability, marker='s', label='Enforced', color='#2ca02c', linewidth=2) ax.set_xlabel('Iteration', fontsize=12) ax.set_ylabel('Commitment Stability (%)', fontsize=12) ax.set_title('Baseline vs Enforced: Commitment Preservation', fontsize=14, fontweight='bold') ax.legend(fontsize=11) ax.grid(True, alpha=0.3) ax.set_ylim([-5, 105]) plt.tight_layout() # Summary results final_baseline = baseline_stability[-1] final_enforced = enforced_stability[-1] gap = final_enforced - final_baseline summary = f""" ## 📊 Results Summary **After {num_iterations} iterations:** - **Baseline:** {final_baseline:.1f}% stability - **Enforced:** {final_enforced:.1f}% stability - **Improvement:** +{gap:.1f} percentage points {'✅ **Enforcement preserved commitments!**' if gap > 10 else '⚠️ Signal may need more iterations to show drift.'} *Full 10-iteration harness with 5 signals shows +40pp average improvement. Run locally for complete validation.* """ return commitment_text, df, fig, summary # Gradio Interface with gr.Blocks(title="Commitment Conservation Demo", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # ⚖️ Commitment Conservation Interactive Demo **Watch semantic drift in recursive compression—and see how commitment enforcement prevents it.** This demo compares **baseline** transformer compression (which loses commitments) vs **enforced** compression (which preserves them). 📄 [Paper (v0.03)](https://doi.org/10.5281/zenodo.18274930) | 💻 [Full Harness](https://huggingface.co/burnmydays/commitment_conservation_harness) | 🔬 [GitHub](https://github.com/SunrisesIllNeverSee/commitment-conservation) | 📊 [Interactive Data Report](https://gemini.google.com/share/8f46bbc61c2c) """) with gr.Row(): with gr.Column(scale=2): signal_input = gr.Textbox( label="Input Signal (Text with Commitment)", placeholder="Enter text containing a commitment, obligation, or constraint...", lines=4, value=DEMO_SIGNALS["Function Contract"] ) with gr.Row(): preset_dropdown = gr.Dropdown( choices=list(DEMO_SIGNALS.keys()), label="Or select a preset example:", value="Function Contract" ) iterations_slider = gr.Slider( minimum=1, maximum=3, step=1, value=3, label="Iterations (limited to 3 for speed)" ) run_btn = gr.Button("🔬 Run Comparison", variant="primary", size="lg") gr.Markdown(""" **How it works:** 1. System extracts commitments from your text 2. Compresses text recursively (3 iterations) 3. Tracks whether commitments survive each round 4. Compares baseline (drifts) vs enforced (preserves) *⏱️ Takes ~20-40 seconds on CPU. Models load on first run.* """) with gr.Column(scale=3): commitments_display = gr.Markdown(label="Extracted Commitments") results_table = gr.Dataframe(label="Stability Over Iterations") results_plot = gr.Plot(label="Comparison Chart") summary_display = gr.Markdown(label="Summary") # Event handlers def update_signal_from_preset(preset_name): return DEMO_SIGNALS[preset_name] preset_dropdown.change( fn=update_signal_from_preset, inputs=[preset_dropdown], outputs=[signal_input] ) run_btn.click( fn=run_comparison, inputs=[signal_input, iterations_slider], outputs=[commitments_display, results_table, results_plot, summary_display] ) gr.Markdown(""" --- ## 📖 About This Framework This demonstrates the **commitment conservation principle**: meaningful commitments in language should be preserved under compression and recursive application. The full harness tests 5 signals over 10 iterations and shows **baseline systems fail (20% stability) while enforced systems succeed (60% stability)** — a 40pp empirical gap. ### 🔄 Current Limitations & Roadmap **Demo Status:** ✅ Functional proof-of-concept showing visual differentiation **Known Enhancements Coming Soon:** 🔄 **Enforcement Stability Tuning** — Current results show 33-67% fidelity vs paper's 60% baseline. Root cause: Re-injected commitments can be lost in subsequent transformations. *Priority: Preserve commitments through full iteration pipeline.* 🔄 **Output Text Comparison** — Demo currently shows graphs but not the actual text output. Users can't see the qualitative difference (baseline drift: "fam! 😂 You got this 💪" vs enforced preservation: "$100 Friday"). *Priority: Add side-by-side original→final comparison with commitment highlighting.* 🔄 **Token Tracking** — No real-time token counts per turn to show efficiency gains. Test data proves **163% efficiency advantage** (baseline expands +79.6%, enforcement compresses -77.8%). *Priority: Display running token totals.* 🔄 **Baseline Realism** — Currently uses BART compression for both baseline and enforced. Real LLMs expand via conversational drift. *Note: Documented as simulation limitation.* 📊 **Validated Test Data:** Comprehensive analysis shows baseline expansion (230-316 tokens) vs enforcement compression (120-156 tokens) with 62% token reduction. [View full interactive analysis →](https://gemini.google.com/share/8f46bbc61c2c) **Research Harness:** Original git repository implements full paper methodology with spacy NLP and comprehensive metrics (13/13 tests passing). --- **⚖️ IP Notice:** MO§ES™ is a trademark of Ello Cello LLC. See [repo](https://huggingface.co/burnmydays/commitment_conservation_harness) for details. © 2026 Ello Cello LLC. All rights reserved. """) if __name__ == "__main__": demo.launch()