import urllib.parse import gradio as gr import requests MODAL_ENDPOINT = "https://no-name13--lean-proof-agent-fastapi-app.modal.run" # ── Example theorems ────────────────────────────────────────────────────────── # Lead with add_comm_nat (induction / multi-step) so the agent loop is visible. # Keep zero_add as the simple contrast. False theorem always last. EXAMPLES = [ ( "theorem add_comm_nat : ∀ n m : Nat, n + m = m + n", "∀ n m · n + m = m + n — addition is commutative", "provable", ), ( "theorem reverse_reverse : ∀ (α : Type) (l : List α), l.reverse.reverse = l", "∀ α l · reverse(reverse l) = l — reversing a list twice is identity", "provable", ), ( "theorem zero_add : ∀ n : Nat, 0 + n = n", "∀ n · 0 + n = n — zero is the left identity for addition", "provable", ), ( "theorem currying : ∀ P Q R : Prop, (P ∧ Q → R) ↔ (P → Q → R)", "∀ P Q R · (P∧Q→R) ↔ (P→Q→R) — currying", "provable", ), ( "theorem de_morgan_and : ∀ P Q : Prop, ¬(P ∧ Q) ↔ ¬P ∨ ¬Q", "∀ P Q · ¬(P∧Q) ↔ ¬P∨¬Q — De Morgan's law", "provable", ), # FALSE theorem — the stuck detector concludes "not provable" within a few steps. ( "theorem cannot_prove : ∀ n : Nat, n + 1 = n", "∀ n · n+1 = n — FALSE: the agent correctly cannot prove this ✗", "unprovable", ), ] EXAMPLE_LOOKUP = {lean.strip(): (desc, kind) for lean, desc, kind in EXAMPLES} TACTIC_EXPLANATIONS = { "intro": "introduces a universally quantified variable or hypothesis into the local context", "induction": "applies structural induction — splits into base case (zero) and inductive step (succ)", "simp": "simplifies the goal using a library of known equalities and lemmas", "rfl": "closes the goal when both sides are definitionally equal", "omega": "decision procedure for linear arithmetic — automatically solves goals about integers/naturals", "rw": "rewrites the goal using an equation", "exact": "closes the goal by providing an exact proof term", "apply": "applies a lemma whose conclusion matches the goal", "constructor": "splits a conjunction (∧) or iff (↔) goal into its two parts", "cases": "case-splits on a hypothesis or value", "norm_num": "solves numeric goals like 2 + 2 = 4 or 3 ∣ 6", "contradiction": "closes the goal when context contains P and ¬P (a direct contradiction)", "assumption": "closes the goal when it matches a hypothesis in the local context exactly", "tauto": "closes propositional tautologies automatically", } # ── Dark theme — mathematical / formal-proof terminal aesthetic ─────────────── # Wraps in try/except so a theme API mismatch never breaks the app. CUSTOM_CSS = """ /* ── Base ───────────────────────────────────────────────── */ body, .gradio-container { background: #0d1117 !important; color: #c9d1d9 !important; } .contain, .gap, .row { background: #0d1117 !important; } /* ── Panels ─────────────────────────────────────────────── */ .block.padded, .block { background: #161b22 !important; border: 1px solid #30363d !important; border-radius: 8px !important; } .form { background: #161b22 !important; } /* ── All text — catch-all first, then specifics ──────────── */ * { color: #c9d1d9; } /* ── Headings ────────────────────────────────────────────── */ h1, h2, h3, h4 { color: #e6edf3 !important; } h1 { border-bottom: 1px solid #21262d; padding-bottom: 6px; } /* ── Labels (component titles, slider/checkbox labels) ────── */ label, .label-wrap, .label-wrap span, span.text-gray-500, span.text-sm, .block > .label-wrap > span { color: #b0bec5 !important; } /* ── Body text, lists, paragraphs ───────────────────────── */ p, li, em, span { color: #c9d1d9 !important; } a { color: #58a6ff !important; } strong, b { color: #e6edf3 !important; } /* ── Slider ──────────────────────────────────────────────── */ input[type=range] { accent-color: #2ea043 !important; } .range-input span, .range-input output, input[type=range] + span, .slider-container span { color: #c9d1d9 !important; } /* ── Checkbox ────────────────────────────────────────────── */ input[type=checkbox] { accent-color: #2ea043 !important; } .checkbox-label, .checkbox-label span { color: #c9d1d9 !important; } /* ── Inputs ──────────────────────────────────────────────── */ textarea, input[type=text], input[type=number] { background: #0d1117 !important; color: #e6edf3 !important; border: 1px solid #30363d !important; border-radius: 6px !important; font-family: 'JetBrains Mono', 'Fira Code', ui-monospace, monospace !important; } textarea::placeholder { color: #484f58 !important; } /* ── Primary button ──────────────────────────────────────── */ button.primary { background: #1a3a1a !important; border: 1px solid #2ea043 !important; color: #3fb950 !important; font-weight: bold !important; letter-spacing: 0.05em !important; transition: all 0.15s ease !important; } button.primary:hover { background: #2ea043 !important; color: #fff !important; box-shadow: 0 0 10px #2ea04355 !important; } /* ── Secondary / example buttons ────────────────────────── */ button.secondary, button[variant=secondary] { background: #21262d !important; border: 1px solid #30363d !important; color: #c9d1d9 !important; font-size: 0.8em !important; transition: all 0.12s ease !important; } button.secondary:hover { border-color: #3fb950 !important; color: #3fb950 !important; } /* ── Code blocks ─────────────────────────────────────────── */ code { background: #161b22 !important; color: #79c0ff !important; border: 1px solid #30363d !important; border-radius: 4px !important; padding: 2px 5px !important; } pre { background: #161b22 !important; border: 1px solid #30363d !important; border-radius: 6px !important; } pre code { color: #c9d1d9 !important; border: none !important; padding: 0 !important; } /* ── Markdown output ─────────────────────────────────────── */ .output-markdown, .output-markdown * { color: #c9d1d9 !important; } .output-markdown h1, .output-markdown h2, .output-markdown h3 { color: #e6edf3 !important; } .output-markdown strong, .output-markdown b { color: #e6edf3 !important; } .output-markdown a { color: #58a6ff !important; } .output-markdown code { color: #79c0ff !important; } .output-markdown hr { border-color: #30363d !important; } /* ── Description italic below theorem input ──────────────── */ .prose em, .prose i, em, i { color: #8fb8d8 !important; } """ try: _theme = gr.themes.Base( primary_hue=gr.themes.colors.green, neutral_hue=gr.themes.colors.zinc, font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"], font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"], ) except Exception: _theme = None # ── Verdict banner — three states, dark-theme colours ──────────────────────── def build_verdict_banner(success: bool, stuck: bool, english_desc: str = "") -> str: font = "font-family:'JetBrains Mono','Fira Code',ui-monospace,monospace;" if success: desc_html = ( f'
' f'{english_desc}
' ) if english_desc else "" return ( f'
' f'
' f'✓ FORMALLY VERIFIED
' f'{desc_html}' f'
' f'The AI proposed proof tactics. Lean 4\'s formal kernel ' f'checked every logical step against its axioms and accepted the proof.

' f'This cannot be faked. ' f'Unlike a chatbot saying "yes, that\'s true," Lean\'s kernel rejects any gap in reasoning ' f'— no exceptions, no hallucinations. The proof below is machine-checked mathematics.' f'
' ) if stuck: desc_html = ( f'
' f'{english_desc}
' ) if english_desc else "" return ( f'
' f'
' f'⚠ NOT PROVABLE AS STATED
' f'{desc_html}' f'
' f'The agent detected it was stuck: the same goal state recurred with no progress.

' f'This is a deliberate conclusion, not a failure. ' f'The claim as written cannot be proven — it may be mathematically false, ' f'or require axioms and tactics outside the current mode. ' f'Recognising when something is unprovable is part of what a formal proof agent does.' f'
' ) return ( f'
' f'
' f'✗ SEARCH INCOMPLETE
' f'
' f'The agent exhausted its step budget without completing the proof. ' f'Partial progress is shown below — try increasing the step limit or picking a simpler theorem.' f'
' ) # ── SVG proof tree — DO NOT MODIFY ─────────────────────────────────────────── def build_proof_tree_svg(steps: list, tactics: list, success: bool, stuck: bool = False, claim: str = "") -> str: if not steps: return "" NODE_W, NODE_H = 300, 44 STEP_H = 140 SVG_W = 820 CX = SVG_W // 2 n = len(steps) title_h = 72 if claim else 52 SVG_H = title_h + (n + (1 if (success or stuck) else 0)) * STEP_H + NODE_H + 20 def esc(s): return str(s).replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"') def trunc(s, maxn=36): s = str(s).replace('\n', ' ').strip() return s[:maxn] + '…' if len(s) > maxn else s out = [] out.append(f'') out.append(f'') out.append('') for mid, col in [('ahg', '#00e639'), ('ahr', '#f38ba8'), ('ahgr', '#6c7086'), ('aha', '#e6a817')]: out.append( f'' f'' ) out.append('') if success: title, tc = "Proof Tree — ✓ Verified", "#00e639" elif stuck: title, tc = "Proof Tree — ⚠ Concluded: Not Provable", "#e6a817" else: title, tc = "Proof Tree — ✗ Incomplete", "#f38ba8" out.append( f'{esc(title)}' ) if claim: out.append( f'' f'Claim: {esc(trunc(claim, 74))}' ) legend_y = title_h - 10 for lx, col, label in [(20, '#00e639', 'successful path'), (168, '#f38ba8', 'failed attempt')]: out.append(f'') out.append( f'{label}' ) for i, step in enumerate(steps): cy = title_h + i * STEP_H + NODE_H // 2 chosen = step.get('chosen', '') candidates = step.get('candidates', []) status = step.get('status', '') goal = step.get('goal', '') failed = [c for c in candidates if c != chosen] on_path = bool(chosen) and status != 'all_failed' if on_path and success: bg, border = '#001a00', '#00e639' elif status == 'all_failed': bg, border = '#1a0000', '#f38ba8' else: bg, border = '#050f05', '#2a5a2a' nx, ny = CX - NODE_W // 2, cy - NODE_H // 2 out.append( f'' ) out.append( f'' f'{esc(trunc(f"Step {i}: {goal}"))}' ) for j, fc in enumerate(failed): bx = CX + NODE_W // 2 + 55 + j * 75 by = cy + STEP_H // 3 out.append( f'' ) mx = (CX + NODE_W // 2 + bx) // 2 + 3 my = (cy + by) // 2 - 3 out.append( f'' f'{esc(trunc(fc, 18))}' ) out.append(f'') out.append( f'' ) next_cy = title_h + (i + 1) * STEP_H + NODE_H // 2 if on_path: color, mid = ('#00e639', 'ahg') if success else ('#6c7086', 'ahgr') out.append( f'' ) ly = (cy + NODE_H // 2 + next_cy - NODE_H // 2) // 2 out.append( f'' f'{esc(trunc(chosen, 28))}' ) elif status == 'all_failed': out.append( f'' ) if success or stuck: cy = title_h + n * STEP_H + NODE_H // 2 nx, ny = CX - NODE_W // 2, cy - NODE_H // 2 if success: node_fill, node_stroke = '#002800', '#00e639' node_text, node_col = '✓ QED — Goals accomplished!', '#00ff41' else: node_fill, node_stroke = '#201000', '#e6a817' node_text, node_col = '⚠ Concluded: not provable as stated', '#f5c842' out.append( f'' ) out.append( f'' f'{esc(node_text)}' ) out.append('') return f'
{"".join(out)}
' # ── Step walkthrough ────────────────────────────────────────────────────────── def explain_tactic(tactic: str) -> str: for key, explanation in TACTIC_EXPLANATIONS.items(): if tactic.strip().startswith(key): return f"*`{key}` — {explanation}*" return "" def make_playground_url(theorem_stmt: str, tactics: list) -> str: proof_lines = [f"{theorem_stmt} := by"] + [ f" {line}" for t in tactics for line in t.strip().split("\n") ] code = "\n".join(proof_lines) return "https://live.lean-lang.org/#code=" + urllib.parse.quote(code) def format_steps(steps: list, tactics: list, stuck: bool = False, theorem_stmt: str = "") -> str: if not steps: return "" out = ["### Agent loop: propose → verify → learn\n"] for s in steps: goal = s['goal'] chosen = s.get('chosen', '') candidates = s.get('candidates', []) status = s.get('status', '') error = s.get('error', '') step_num = s['step'] out.append(f"---\n**Step {step_num}** — current goal:") out.append(f"```\n{goal}\n```") if not candidates: out.append("⚠️ *LLM endpoint warming up — no candidates available at this step*") else: rejected = [c for c in candidates if c != chosen] out.append(f"\U0001f916 **LLM proposed:** `{'`, `'.join(candidates)}`") for r in rejected: out.append(f"- ❌ `{r}` — Lean kernel rejected") if error and rejected: short_err = error.replace('\n', ' ')[:120] out.append(f"- \U0001f4e2 *Kernel error fed back to agent:* `{short_err}`") if chosen and status != 'all_failed': exp = explain_tactic(chosen) out.append(f"- ✅ `{chosen}` — Lean kernel accepted") if exp: out.append(f" {exp}") out.append(f"- *Result:* `{status}`") elif status == 'all_failed': out.append("- ❌ All candidates rejected — feeding errors back, trying next step") out.append("") if stuck: out.append( "---\n⚠️ **Search concluded** — the same goal state recurred with no progress.\n" "The claim could not be proven. It may be mathematically false, " "or require axioms and tactics outside the current mode." ) elif tactics: proof_lines = ["by"] + [f" {line}" for t in tactics for line in t.strip().split("\n")] proof_block = "\n".join(proof_lines) out.append("---\n### Complete proof\n") out.append("```lean4") out.append(proof_block) out.append("```") out.append("\n---\n### Verify it yourself") if theorem_stmt: url = make_playground_url(theorem_stmt, tactics) out.append( f"[**▶ Open in Lean 4 web playground ↗**]({url})\n\n" "The proof is pre-filled and ready to run. " "The kernel outputs **\"Goals accomplished!\"** or rejects it if anything is wrong. " "**It cannot be convinced. It cannot be fooled.**" ) else: out.append( "Paste the proof above into [live.lean-lang.org ↗](https://live.lean-lang.org/). " "The kernel outputs **\"Goals accomplished!\"** — or rejects it if anything is wrong." ) return "\n".join(out) # ── Main proof handler ──────────────────────────────────────────────────────── def prove_theorem(theorem: str, max_steps: int, use_fallbacks: bool): if not theorem.strip(): yield "Please enter a theorem statement.", "", "" return yield "⏳ Sending to proof agent on Modal…", "", "" lookup = EXAMPLE_LOOKUP.get(theorem.strip()) english_desc = lookup[0] if lookup else "" try: resp = requests.post( f"{MODAL_ENDPOINT}/prove", json={ "theorem": theorem, "max_steps": max_steps, "use_fallbacks": use_fallbacks, "show_reasoning": True, # always run live loop, skip cache read }, timeout=280, ) data = resp.json() except requests.exceptions.Timeout: yield "❌ Request timed out. Try a simpler theorem or fewer max steps.", "", "" return except Exception as e: yield f"❌ Error contacting proof agent: {e}", "", "" return warmup_note = "" msg = data.get("message", "") if "warming up" in msg.lower() or "unavailable" in msg.lower(): warmup_note = "⚠️ LLM endpoint warming up (cold start) — proof attempted with fallback tactics only.\n\n" success = data["success"] stuck = data.get("stuck", False) banner = build_verdict_banner(success, stuck, english_desc) details = format_steps( data["steps"], data["tactics"], stuck=stuck, theorem_stmt=theorem.strip() ) svg_html = build_proof_tree_svg( data["steps"], data["tactics"], success, stuck=stuck, claim=english_desc ) yield warmup_note + banner, details, svg_html # ── UI ──────────────────────────────────────────────────────────────────────── _blocks_kwargs: dict = dict(title="Q.E.D", css=CUSTOM_CSS) if _theme is not None: _blocks_kwargs["theme"] = _theme with gr.Blocks(**_blocks_kwargs) as demo: gr.Markdown(""" # ⊢ Q.E.D **∀ theorem → ∃ proof** — LLM-guided formal verification, powered by Modal. Enter a theorem in Lean 4 syntax (∀ ∃ ¬ ∧ ∨ → ↔ ℕ ℤ α all supported). The agent proposes tactics, Lean's kernel verifies each step, and kernel errors feed back into the next proposal. Watch it **prove** a true theorem — or **correctly conclude** a false one is unprovable. """) with gr.Row(): with gr.Column(scale=2): theorem_input = gr.Textbox( label="Theorem statement", placeholder="theorem my_thm : ∀ n : Nat, 0 + n = n", value=EXAMPLES[0][0], lines=3, ) desc_display = gr.Markdown( value=f"*{EXAMPLES[0][1]}*", label="", ) with gr.Row(): max_steps = gr.Slider(5, 30, value=20, step=1, label="Max steps") use_fallbacks = gr.Checkbox(value=True, label="Use fallback tactics (ω, simp…)") prove_btn = gr.Button("⊢ Prove", variant="primary") with gr.Column(scale=1): gr.Markdown("**Examples** — click to load\n\n*∀ provable ones first, then a false one ↓*") for lean_stmt, english_desc, kind in EXAMPLES: btn = gr.Button(english_desc, size="sm") btn.click( fn=lambda lean=lean_stmt, d=english_desc: (lean, f"*{d}*"), outputs=[theorem_input, desc_display], ) banner_out = gr.HTML(label="Verdict") steps_out = gr.Markdown(label="Agent loop walkthrough") tree_out = gr.HTML(label="Proof search tree") prove_btn.click( fn=prove_theorem, inputs=[theorem_input, max_steps, use_fallbacks], outputs=[banner_out, steps_out, tree_out], ) demo.queue() demo.launch()