Spaces:
Paused
Paused
| """ | |
| Operon Chaperone — Interactive Gradio Demo | |
| ========================================== | |
| Try the Chaperone's multi-strategy cascade for recovering structured data | |
| from malformed LLM output. Paste broken JSON, pick a schema, and watch | |
| the cascade (STRICT -> EXTRACTION -> LENIENT -> REPAIR) recover it. | |
| Run locally: | |
| pip install gradio | |
| python space/app.py | |
| Deploy to HuggingFace Spaces: | |
| Copy this directory to a new HF Space with sdk=gradio. | |
| """ | |
| import json | |
| import sys | |
| from pathlib import Path | |
| import gradio as gr | |
| from pydantic import BaseModel | |
| # Allow importing operon_ai from the repo root when running locally | |
| _repo_root = Path(__file__).resolve().parent.parent | |
| if str(_repo_root) not in sys.path: | |
| sys.path.insert(0, str(_repo_root)) | |
| from operon_ai import Chaperone, FoldingStrategy, EnhancedFoldedProtein | |
| # --------------------------------------------------------------------------- | |
| # Preset schemas | |
| # --------------------------------------------------------------------------- | |
| class UserProfile(BaseModel): | |
| name: str | |
| age: int | |
| email: str | |
| class TaskItem(BaseModel): | |
| title: str | |
| completed: bool | |
| priority: int | |
| class APIResponse(BaseModel): | |
| status: str | |
| code: int | |
| data: dict | |
| class FunctionCall(BaseModel): | |
| name: str | |
| arguments: dict | |
| SCHEMAS: dict[str, type[BaseModel]] = { | |
| "UserProfile (name, age, email)": UserProfile, | |
| "TaskItem (title, completed, priority)": TaskItem, | |
| "APIResponse (status, code, data)": APIResponse, | |
| "FunctionCall (name, arguments)": FunctionCall, | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Preset examples | |
| # --------------------------------------------------------------------------- | |
| EXAMPLES: dict[str, dict[str, str]] = { | |
| "Perfect JSON (STRICT)": { | |
| "input": '{"name": "Alice", "age": 30, "email": "alice@example.com"}', | |
| "schema": "UserProfile (name, age, email)", | |
| }, | |
| "Markdown-wrapped (EXTRACTION)": { | |
| "input": ( | |
| "Here is the data you requested:\n\n" | |
| "```json\n" | |
| '{"name": "Bob", "age": 25, "email": "bob@example.com"}\n' | |
| "```\n\n" | |
| "Let me know if you need anything else!" | |
| ), | |
| "schema": "UserProfile (name, age, email)", | |
| }, | |
| "Wrong types (LENIENT)": { | |
| "input": '{"name": "Charlie", "age": "35", "email": "charlie@example.com"}', | |
| "schema": "UserProfile (name, age, email)", | |
| }, | |
| "Single quotes + trailing comma (REPAIR)": { | |
| "input": "{'name': 'Diana', 'age': 28, 'email': 'diana@example.com',}", | |
| "schema": "UserProfile (name, age, email)", | |
| }, | |
| "Python literals (REPAIR)": { | |
| "input": '{"title": "Buy groceries", "completed": True, "priority": 3}', | |
| "schema": "TaskItem (title, completed, priority)", | |
| }, | |
| "XML-tagged JSON (EXTRACTION)": { | |
| "input": ( | |
| "Processing complete.\n" | |
| '<json>{"status": "ok", "code": 200, "data": {"key": "value"}}</json>\n' | |
| "End of response." | |
| ), | |
| "schema": "APIResponse (status, code, data)", | |
| }, | |
| "Unquoted keys (REPAIR)": { | |
| "input": '{name: "Eve", age: 42, email: "eve@example.com"}', | |
| "schema": "UserProfile (name, age, email)", | |
| }, | |
| "Function call in code block (EXTRACTION)": { | |
| "input": ( | |
| "I'll call the weather function for you:\n\n" | |
| "```\n" | |
| '{"name": "get_weather", "arguments": {"city": "Tokyo", "units": "celsius"}}\n' | |
| "```" | |
| ), | |
| "schema": "FunctionCall (name, arguments)", | |
| }, | |
| "Completely invalid (ALL FAIL)": { | |
| "input": "This is not JSON at all, just plain text about the weather.", | |
| "schema": "UserProfile (name, age, email)", | |
| }, | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Core logic | |
| # --------------------------------------------------------------------------- | |
| STRATEGY_LABELS = { | |
| FoldingStrategy.STRICT: ("STRICT", "#22c55e"), # green | |
| FoldingStrategy.EXTRACTION: ("EXTRACTION", "#3b82f6"), # blue | |
| FoldingStrategy.LENIENT: ("LENIENT", "#f59e0b"), # amber | |
| FoldingStrategy.REPAIR: ("REPAIR", "#ef4444"), # red | |
| } | |
| def _format_confidence_bar(confidence: float) -> str: | |
| pct = int(confidence * 100) | |
| color = "#22c55e" if pct >= 90 else "#f59e0b" if pct >= 70 else "#ef4444" | |
| return ( | |
| f'<div style="display:flex;align-items:center;gap:8px;">' | |
| f'<div style="flex:1;background:#e5e7eb;border-radius:4px;height:20px;max-width:200px;">' | |
| f'<div style="width:{pct}%;background:{color};height:100%;border-radius:4px;"></div>' | |
| f'</div>' | |
| f'<span style="font-weight:600;">{pct}%</span>' | |
| f'</div>' | |
| ) | |
| def _format_strategy_badge(strategy: FoldingStrategy | None) -> str: | |
| if strategy is None: | |
| return '<span style="background:#6b7280;color:white;padding:2px 8px;border-radius:4px;font-size:0.85em;">NONE</span>' | |
| label, color = STRATEGY_LABELS[strategy] | |
| return f'<span style="background:{color};color:white;padding:2px 8px;border-radius:4px;font-size:0.85em;">{label}</span>' | |
| def run_chaperone(raw_input: str, schema_name: str) -> tuple[str, str, str, str]: | |
| """Run the Chaperone cascade and return formatted results. | |
| Returns (result_html, parsed_json, cascade_trace, coercions). | |
| """ | |
| if not raw_input.strip(): | |
| return "Enter some text to fold.", "", "", "" | |
| schema_cls = SCHEMAS.get(schema_name) | |
| if schema_cls is None: | |
| return f"Unknown schema: {schema_name}", "", "", "" | |
| chap = Chaperone(silent=True) | |
| result: EnhancedFoldedProtein = chap.fold_enhanced(raw_input, schema_cls) | |
| # --- Result summary --- | |
| if result.valid: | |
| badge = _format_strategy_badge(result.strategy_used) | |
| conf_bar = _format_confidence_bar(result.confidence) | |
| result_html = ( | |
| f'<div style="padding:12px;border-radius:8px;border:1px solid #22c55e;background:#f0fdf4;">' | |
| f'<div style="display:flex;align-items:center;gap:12px;margin-bottom:8px;">' | |
| f'<span style="font-size:1.2em;font-weight:700;color:#16a34a;">Folded Successfully</span>' | |
| f'{badge}' | |
| f'</div>' | |
| f'<div style="margin-top:8px;"><strong>Confidence:</strong> {conf_bar}</div>' | |
| f'</div>' | |
| ) | |
| else: | |
| result_html = ( | |
| f'<div style="padding:12px;border-radius:8px;border:1px solid #ef4444;background:#fef2f2;">' | |
| f'<span style="font-size:1.2em;font-weight:700;color:#dc2626;">All strategies failed</span>' | |
| f'<p style="margin-top:8px;color:#7f1d1d;">{result.error_trace}</p>' | |
| f'</div>' | |
| ) | |
| # --- Parsed output --- | |
| if result.valid and result.structure is not None: | |
| parsed_json = json.dumps(result.structure.model_dump(), indent=2) | |
| else: | |
| parsed_json = "" | |
| # --- Cascade trace --- | |
| trace_rows = [] | |
| for attempt in result.attempts: | |
| status = "Pass" if attempt.success else "Fail" | |
| status_color = "#22c55e" if attempt.success else "#ef4444" | |
| error_text = attempt.error[:80] if attempt.error else "-" | |
| trace_rows.append( | |
| f"| {_format_strategy_badge(attempt.strategy)} | " | |
| f'<span style="color:{status_color};font-weight:600;">{status}</span> | ' | |
| f"{attempt.duration_ms:.1f} ms | " | |
| f"`{error_text}` |" | |
| ) | |
| if trace_rows: | |
| cascade_trace = ( | |
| "| Strategy | Result | Duration | Error |\n" | |
| "|----------|--------|----------|-------|\n" | |
| + "\n".join(trace_rows) | |
| ) | |
| else: | |
| cascade_trace = "No attempts recorded (input may be empty)." | |
| # --- Coercions --- | |
| if result.coercions_applied: | |
| coercions = "\n".join(f"- `{c}`" for c in result.coercions_applied) | |
| else: | |
| coercions = "No coercions or repairs needed." if result.valid else "N/A (folding failed)." | |
| return result_html, parsed_json, cascade_trace, coercions | |
| def load_example(example_name: str) -> tuple[str, str]: | |
| """Load a preset example into the input fields.""" | |
| if example_name in EXAMPLES: | |
| ex = EXAMPLES[example_name] | |
| return ex["input"], ex["schema"] | |
| return "", list(SCHEMAS.keys())[0] | |
| # --------------------------------------------------------------------------- | |
| # BFCL benchmark results | |
| # --------------------------------------------------------------------------- | |
| BFCL_RESULTS_MD = """ | |
| ## BFCL v4 Benchmark Results | |
| The Chaperone cascade improves function-call accuracy by recovering valid | |
| structured output from malformed LLM responses. These results were obtained | |
| by wrapping base models in **prompting mode** (no native function calling) | |
| with Operon's Chaperone decode pipeline. | |
| ### Non-Live (Synthetic Test Cases) | |
| | Model | Overall | Simple | Multiple | Parallel | Parallel Multiple | Irrelevance | | |
| |-------|---------|--------|----------|----------|-------------------|-------------| | |
| | GPT-4o-mini + Chaperone | **88.73%** | 79.42% | 94.00% | 92.00% | 89.50% | 87.92% | | |
| | Gemini-2.5-Flash + Chaperone | **88.65%** | 78.08% | 92.50% | 95.50% | 88.50% | 93.33% | | |
| ### Live (Real-World API Calls) | |
| | Model | Overall | Simple | Multiple | Parallel | Parallel Multiple | Irrelevance | Relevance | | |
| |-------|---------|--------|----------|----------|-------------------|-------------|-----------| | |
| | Gemini-2.5-Flash + Chaperone | **78.31%** | 87.60% | 75.97% | 81.25% | 79.17% | 87.78% | 62.50% | | |
| | GPT-4o-mini + Chaperone | **76.98%** | 80.23% | 76.16% | 93.75% | 66.67% | 78.85% | 93.75% | | |
| ### How It Works | |
| The Chaperone wraps any LLM's text output in a **4-stage cascade**: | |
| 1. **STRICT** -- Direct `json.loads()` parse. No modifications. Confidence: 100%. | |
| 2. **EXTRACTION** -- Find JSON inside markdown blocks, XML tags, or bare objects. Confidence: 90%. | |
| 3. **LENIENT** -- Extract JSON and coerce types (e.g., `"42"` to `42`). Confidence: ~80%. | |
| 4. **REPAIR** -- Fix trailing commas, single quotes, Python literals (`None`/`True`/`False`), unquoted keys. Confidence: ~70%. | |
| Each strategy is tried in order. The first one that produces valid output (passes Pydantic validation) wins. | |
| [View on GitHub](https://github.com/coredipper/operon) | [BFCL Leaderboard](https://gorilla.cs.berkeley.edu/leaderboard.html) | |
| """ | |
| # --------------------------------------------------------------------------- | |
| # Gradio UI | |
| # --------------------------------------------------------------------------- | |
| def build_app() -> gr.Blocks: | |
| with gr.Blocks(title="Operon Chaperone Demo") as app: | |
| gr.Markdown( | |
| "# Operon Chaperone\n" | |
| "Recover structured data from malformed LLM output through a " | |
| "multi-strategy cascade: **STRICT** > **EXTRACTION** > **LENIENT** > **REPAIR**.\n\n" | |
| "[GitHub](https://github.com/coredipper/operon) | " | |
| "[Paper](https://github.com/coredipper/operon/tree/main/article)" | |
| ) | |
| with gr.Tabs(): | |
| # ---- Tab 1: Interactive Demo ---- | |
| with gr.TabItem("Try It"): | |
| with gr.Row(): | |
| example_dropdown = gr.Dropdown( | |
| choices=["(custom)"] + list(EXAMPLES.keys()), | |
| value="(custom)", | |
| label="Load Example", | |
| scale=2, | |
| ) | |
| schema_dropdown = gr.Dropdown( | |
| choices=list(SCHEMAS.keys()), | |
| value=list(SCHEMAS.keys())[0], | |
| label="Target Schema", | |
| scale=2, | |
| ) | |
| raw_input = gr.Textbox( | |
| label="Raw LLM Output", | |
| placeholder='Paste malformed JSON here, e.g. {\'name\': "Alice", \'age\': 30,}', | |
| lines=6, | |
| ) | |
| fold_btn = gr.Button("Fold", variant="primary", size="lg") | |
| result_html = gr.HTML(label="Result") | |
| with gr.Row(): | |
| parsed_output = gr.Code( | |
| label="Parsed Output", | |
| language="json", | |
| lines=8, | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| cascade_trace = gr.Markdown(label="Cascade Trace") | |
| with gr.Column(): | |
| coercions_md = gr.Markdown(label="Repairs / Coercions Applied") | |
| # Wire events | |
| fold_btn.click( | |
| fn=run_chaperone, | |
| inputs=[raw_input, schema_dropdown], | |
| outputs=[result_html, parsed_output, cascade_trace, coercions_md], | |
| ) | |
| raw_input.submit( | |
| fn=run_chaperone, | |
| inputs=[raw_input, schema_dropdown], | |
| outputs=[result_html, parsed_output, cascade_trace, coercions_md], | |
| ) | |
| example_dropdown.change( | |
| fn=load_example, | |
| inputs=[example_dropdown], | |
| outputs=[raw_input, schema_dropdown], | |
| ) | |
| # ---- Tab 2: BFCL Results ---- | |
| with gr.TabItem("BFCL Results"): | |
| gr.Markdown(BFCL_RESULTS_MD) | |
| return app | |
| if __name__ == "__main__": | |
| app = build_app() | |
| app.launch(theme=gr.themes.Soft()) | |