""" Operon Chaperone — Interactive Gradio Demo ========================================== Try the Chaperone's multi-strategy cascade for recovering structured data from malformed LLM output. Paste broken JSON, pick a schema, and watch the cascade (STRICT -> EXTRACTION -> LENIENT -> REPAIR) recover it. Run locally: pip install gradio python space/app.py Deploy to HuggingFace Spaces: Copy this directory to a new HF Space with sdk=gradio. """ import json import sys from pathlib import Path import gradio as gr from pydantic import BaseModel # Allow importing operon_ai from the repo root when running locally _repo_root = Path(__file__).resolve().parent.parent if str(_repo_root) not in sys.path: sys.path.insert(0, str(_repo_root)) from operon_ai import Chaperone, FoldingStrategy, EnhancedFoldedProtein # --------------------------------------------------------------------------- # Preset schemas # --------------------------------------------------------------------------- class UserProfile(BaseModel): name: str age: int email: str class TaskItem(BaseModel): title: str completed: bool priority: int class APIResponse(BaseModel): status: str code: int data: dict class FunctionCall(BaseModel): name: str arguments: dict SCHEMAS: dict[str, type[BaseModel]] = { "UserProfile (name, age, email)": UserProfile, "TaskItem (title, completed, priority)": TaskItem, "APIResponse (status, code, data)": APIResponse, "FunctionCall (name, arguments)": FunctionCall, } # --------------------------------------------------------------------------- # Preset examples # --------------------------------------------------------------------------- EXAMPLES: dict[str, dict[str, str]] = { "Perfect JSON (STRICT)": { "input": '{"name": "Alice", "age": 30, "email": "alice@example.com"}', "schema": "UserProfile (name, age, email)", }, "Markdown-wrapped (EXTRACTION)": { "input": ( "Here is the data you requested:\n\n" "```json\n" '{"name": "Bob", "age": 25, "email": "bob@example.com"}\n' "```\n\n" "Let me know if you need anything else!" ), "schema": "UserProfile (name, age, email)", }, "Wrong types (LENIENT)": { "input": '{"name": "Charlie", "age": "35", "email": "charlie@example.com"}', "schema": "UserProfile (name, age, email)", }, "Single quotes + trailing comma (REPAIR)": { "input": "{'name': 'Diana', 'age': 28, 'email': 'diana@example.com',}", "schema": "UserProfile (name, age, email)", }, "Python literals (REPAIR)": { "input": '{"title": "Buy groceries", "completed": True, "priority": 3}', "schema": "TaskItem (title, completed, priority)", }, "XML-tagged JSON (EXTRACTION)": { "input": ( "Processing complete.\n" '{"status": "ok", "code": 200, "data": {"key": "value"}}\n' "End of response." ), "schema": "APIResponse (status, code, data)", }, "Unquoted keys (REPAIR)": { "input": '{name: "Eve", age: 42, email: "eve@example.com"}', "schema": "UserProfile (name, age, email)", }, "Function call in code block (EXTRACTION)": { "input": ( "I'll call the weather function for you:\n\n" "```\n" '{"name": "get_weather", "arguments": {"city": "Tokyo", "units": "celsius"}}\n' "```" ), "schema": "FunctionCall (name, arguments)", }, "Completely invalid (ALL FAIL)": { "input": "This is not JSON at all, just plain text about the weather.", "schema": "UserProfile (name, age, email)", }, } # --------------------------------------------------------------------------- # Core logic # --------------------------------------------------------------------------- STRATEGY_LABELS = { FoldingStrategy.STRICT: ("STRICT", "#22c55e"), # green FoldingStrategy.EXTRACTION: ("EXTRACTION", "#3b82f6"), # blue FoldingStrategy.LENIENT: ("LENIENT", "#f59e0b"), # amber FoldingStrategy.REPAIR: ("REPAIR", "#ef4444"), # red } def _format_confidence_bar(confidence: float) -> str: pct = int(confidence * 100) color = "#22c55e" if pct >= 90 else "#f59e0b" if pct >= 70 else "#ef4444" return ( f'
' f'
' f'
' f'
' f'{pct}%' f'
' ) def _format_strategy_badge(strategy: FoldingStrategy | None) -> str: if strategy is None: return 'NONE' label, color = STRATEGY_LABELS[strategy] return f'{label}' def run_chaperone(raw_input: str, schema_name: str) -> tuple[str, str, str, str]: """Run the Chaperone cascade and return formatted results. Returns (result_html, parsed_json, cascade_trace, coercions). """ if not raw_input.strip(): return "Enter some text to fold.", "", "", "" schema_cls = SCHEMAS.get(schema_name) if schema_cls is None: return f"Unknown schema: {schema_name}", "", "", "" chap = Chaperone(silent=True) result: EnhancedFoldedProtein = chap.fold_enhanced(raw_input, schema_cls) # --- Result summary --- if result.valid: badge = _format_strategy_badge(result.strategy_used) conf_bar = _format_confidence_bar(result.confidence) result_html = ( f'
' f'
' f'Folded Successfully' f'{badge}' f'
' f'
Confidence: {conf_bar}
' f'
' ) else: result_html = ( f'
' f'All strategies failed' f'

{result.error_trace}

' f'
' ) # --- Parsed output --- if result.valid and result.structure is not None: parsed_json = json.dumps(result.structure.model_dump(), indent=2) else: parsed_json = "" # --- Cascade trace --- trace_rows = [] for attempt in result.attempts: status = "Pass" if attempt.success else "Fail" status_color = "#22c55e" if attempt.success else "#ef4444" error_text = attempt.error[:80] if attempt.error else "-" trace_rows.append( f"| {_format_strategy_badge(attempt.strategy)} | " f'{status} | ' f"{attempt.duration_ms:.1f} ms | " f"`{error_text}` |" ) if trace_rows: cascade_trace = ( "| Strategy | Result | Duration | Error |\n" "|----------|--------|----------|-------|\n" + "\n".join(trace_rows) ) else: cascade_trace = "No attempts recorded (input may be empty)." # --- Coercions --- if result.coercions_applied: coercions = "\n".join(f"- `{c}`" for c in result.coercions_applied) else: coercions = "No coercions or repairs needed." if result.valid else "N/A (folding failed)." return result_html, parsed_json, cascade_trace, coercions def load_example(example_name: str) -> tuple[str, str]: """Load a preset example into the input fields.""" if example_name in EXAMPLES: ex = EXAMPLES[example_name] return ex["input"], ex["schema"] return "", list(SCHEMAS.keys())[0] # --------------------------------------------------------------------------- # BFCL benchmark results # --------------------------------------------------------------------------- BFCL_RESULTS_MD = """ ## BFCL v4 Benchmark Results The Chaperone cascade improves function-call accuracy by recovering valid structured output from malformed LLM responses. These results were obtained by wrapping base models in **prompting mode** (no native function calling) with Operon's Chaperone decode pipeline. ### Non-Live (Synthetic Test Cases) | Model | Overall | Simple | Multiple | Parallel | Parallel Multiple | Irrelevance | |-------|---------|--------|----------|----------|-------------------|-------------| | GPT-4o-mini + Chaperone | **88.73%** | 79.42% | 94.00% | 92.00% | 89.50% | 87.92% | | Gemini-2.5-Flash + Chaperone | **88.65%** | 78.08% | 92.50% | 95.50% | 88.50% | 93.33% | ### Live (Real-World API Calls) | Model | Overall | Simple | Multiple | Parallel | Parallel Multiple | Irrelevance | Relevance | |-------|---------|--------|----------|----------|-------------------|-------------|-----------| | Gemini-2.5-Flash + Chaperone | **78.31%** | 87.60% | 75.97% | 81.25% | 79.17% | 87.78% | 62.50% | | GPT-4o-mini + Chaperone | **76.98%** | 80.23% | 76.16% | 93.75% | 66.67% | 78.85% | 93.75% | ### How It Works The Chaperone wraps any LLM's text output in a **4-stage cascade**: 1. **STRICT** -- Direct `json.loads()` parse. No modifications. Confidence: 100%. 2. **EXTRACTION** -- Find JSON inside markdown blocks, XML tags, or bare objects. Confidence: 90%. 3. **LENIENT** -- Extract JSON and coerce types (e.g., `"42"` to `42`). Confidence: ~80%. 4. **REPAIR** -- Fix trailing commas, single quotes, Python literals (`None`/`True`/`False`), unquoted keys. Confidence: ~70%. Each strategy is tried in order. The first one that produces valid output (passes Pydantic validation) wins. [View on GitHub](https://github.com/coredipper/operon) | [BFCL Leaderboard](https://gorilla.cs.berkeley.edu/leaderboard.html) """ # --------------------------------------------------------------------------- # Gradio UI # --------------------------------------------------------------------------- def build_app() -> gr.Blocks: with gr.Blocks(title="Operon Chaperone Demo") as app: gr.Markdown( "# Operon Chaperone\n" "Recover structured data from malformed LLM output through a " "multi-strategy cascade: **STRICT** > **EXTRACTION** > **LENIENT** > **REPAIR**.\n\n" "[GitHub](https://github.com/coredipper/operon) | " "[Paper](https://github.com/coredipper/operon/tree/main/article)" ) with gr.Tabs(): # ---- Tab 1: Interactive Demo ---- with gr.TabItem("Try It"): with gr.Row(): example_dropdown = gr.Dropdown( choices=["(custom)"] + list(EXAMPLES.keys()), value="(custom)", label="Load Example", scale=2, ) schema_dropdown = gr.Dropdown( choices=list(SCHEMAS.keys()), value=list(SCHEMAS.keys())[0], label="Target Schema", scale=2, ) raw_input = gr.Textbox( label="Raw LLM Output", placeholder='Paste malformed JSON here, e.g. {\'name\': "Alice", \'age\': 30,}', lines=6, ) fold_btn = gr.Button("Fold", variant="primary", size="lg") result_html = gr.HTML(label="Result") with gr.Row(): parsed_output = gr.Code( label="Parsed Output", language="json", lines=8, ) with gr.Row(): with gr.Column(): cascade_trace = gr.Markdown(label="Cascade Trace") with gr.Column(): coercions_md = gr.Markdown(label="Repairs / Coercions Applied") # Wire events fold_btn.click( fn=run_chaperone, inputs=[raw_input, schema_dropdown], outputs=[result_html, parsed_output, cascade_trace, coercions_md], ) raw_input.submit( fn=run_chaperone, inputs=[raw_input, schema_dropdown], outputs=[result_html, parsed_output, cascade_trace, coercions_md], ) example_dropdown.change( fn=load_example, inputs=[example_dropdown], outputs=[raw_input, schema_dropdown], ) # ---- Tab 2: BFCL Results ---- with gr.TabItem("BFCL Results"): gr.Markdown(BFCL_RESULTS_MD) return app if __name__ == "__main__": app = build_app() app.launch(theme=gr.themes.Soft())