import urllib.parse
import gradio as gr
import requests
MODAL_ENDPOINT = "https://no-name13--lean-proof-agent-fastapi-app.modal.run"
# ── Example theorems ──────────────────────────────────────────────────────────
# Lead with add_comm_nat (induction / multi-step) so the agent loop is visible.
# Keep zero_add as the simple contrast. False theorem always last.
EXAMPLES = [
(
"theorem add_comm_nat : ∀ n m : Nat, n + m = m + n",
"∀ n m · n + m = m + n — addition is commutative",
"provable",
),
(
"theorem reverse_reverse : ∀ (α : Type) (l : List α), l.reverse.reverse = l",
"∀ α l · reverse(reverse l) = l — reversing a list twice is identity",
"provable",
),
(
"theorem zero_add : ∀ n : Nat, 0 + n = n",
"∀ n · 0 + n = n — zero is the left identity for addition",
"provable",
),
(
"theorem currying : ∀ P Q R : Prop, (P ∧ Q → R) ↔ (P → Q → R)",
"∀ P Q R · (P∧Q→R) ↔ (P→Q→R) — currying",
"provable",
),
(
"theorem de_morgan_and : ∀ P Q : Prop, ¬(P ∧ Q) ↔ ¬P ∨ ¬Q",
"∀ P Q · ¬(P∧Q) ↔ ¬P∨¬Q — De Morgan's law",
"provable",
),
# FALSE theorem — the stuck detector concludes "not provable" within a few steps.
(
"theorem cannot_prove : ∀ n : Nat, n + 1 = n",
"∀ n · n+1 = n — FALSE: the agent correctly cannot prove this ✗",
"unprovable",
),
]
EXAMPLE_LOOKUP = {lean.strip(): (desc, kind) for lean, desc, kind in EXAMPLES}
TACTIC_EXPLANATIONS = {
"intro": "introduces a universally quantified variable or hypothesis into the local context",
"induction": "applies structural induction — splits into base case (zero) and inductive step (succ)",
"simp": "simplifies the goal using a library of known equalities and lemmas",
"rfl": "closes the goal when both sides are definitionally equal",
"omega": "decision procedure for linear arithmetic — automatically solves goals about integers/naturals",
"rw": "rewrites the goal using an equation",
"exact": "closes the goal by providing an exact proof term",
"apply": "applies a lemma whose conclusion matches the goal",
"constructor": "splits a conjunction (∧) or iff (↔) goal into its two parts",
"cases": "case-splits on a hypothesis or value",
"norm_num": "solves numeric goals like 2 + 2 = 4 or 3 ∣ 6",
"contradiction": "closes the goal when context contains P and ¬P (a direct contradiction)",
"assumption": "closes the goal when it matches a hypothesis in the local context exactly",
"tauto": "closes propositional tautologies automatically",
}
# ── Dark theme — mathematical / formal-proof terminal aesthetic ───────────────
# Wraps in try/except so a theme API mismatch never breaks the app.
CUSTOM_CSS = """
/* ── Base ───────────────────────────────────────────────── */
body, .gradio-container { background: #0d1117 !important; color: #c9d1d9 !important; }
.contain, .gap, .row { background: #0d1117 !important; }
/* ── Panels ─────────────────────────────────────────────── */
.block.padded, .block {
background: #161b22 !important;
border: 1px solid #30363d !important;
border-radius: 8px !important;
}
.form { background: #161b22 !important; }
/* ── All text — catch-all first, then specifics ──────────── */
* { color: #c9d1d9; }
/* ── Headings ────────────────────────────────────────────── */
h1, h2, h3, h4 { color: #e6edf3 !important; }
h1 { border-bottom: 1px solid #21262d; padding-bottom: 6px; }
/* ── Labels (component titles, slider/checkbox labels) ────── */
label, .label-wrap, .label-wrap span,
span.text-gray-500, span.text-sm,
.block > .label-wrap > span { color: #b0bec5 !important; }
/* ── Body text, lists, paragraphs ───────────────────────── */
p, li, em, span { color: #c9d1d9 !important; }
a { color: #58a6ff !important; }
strong, b { color: #e6edf3 !important; }
/* ── Slider ──────────────────────────────────────────────── */
input[type=range] { accent-color: #2ea043 !important; }
.range-input span, .range-input output,
input[type=range] + span, .slider-container span { color: #c9d1d9 !important; }
/* ── Checkbox ────────────────────────────────────────────── */
input[type=checkbox] { accent-color: #2ea043 !important; }
.checkbox-label, .checkbox-label span { color: #c9d1d9 !important; }
/* ── Inputs ──────────────────────────────────────────────── */
textarea, input[type=text], input[type=number] {
background: #0d1117 !important;
color: #e6edf3 !important;
border: 1px solid #30363d !important;
border-radius: 6px !important;
font-family: 'JetBrains Mono', 'Fira Code', ui-monospace, monospace !important;
}
textarea::placeholder { color: #484f58 !important; }
/* ── Primary button ──────────────────────────────────────── */
button.primary {
background: #1a3a1a !important;
border: 1px solid #2ea043 !important;
color: #3fb950 !important;
font-weight: bold !important;
letter-spacing: 0.05em !important;
transition: all 0.15s ease !important;
}
button.primary:hover {
background: #2ea043 !important;
color: #fff !important;
box-shadow: 0 0 10px #2ea04355 !important;
}
/* ── Secondary / example buttons ────────────────────────── */
button.secondary, button[variant=secondary] {
background: #21262d !important;
border: 1px solid #30363d !important;
color: #c9d1d9 !important;
font-size: 0.8em !important;
transition: all 0.12s ease !important;
}
button.secondary:hover { border-color: #3fb950 !important; color: #3fb950 !important; }
/* ── Code blocks ─────────────────────────────────────────── */
code {
background: #161b22 !important;
color: #79c0ff !important;
border: 1px solid #30363d !important;
border-radius: 4px !important;
padding: 2px 5px !important;
}
pre {
background: #161b22 !important;
border: 1px solid #30363d !important;
border-radius: 6px !important;
}
pre code { color: #c9d1d9 !important; border: none !important; padding: 0 !important; }
/* ── Markdown output ─────────────────────────────────────── */
.output-markdown, .output-markdown * { color: #c9d1d9 !important; }
.output-markdown h1, .output-markdown h2,
.output-markdown h3 { color: #e6edf3 !important; }
.output-markdown strong, .output-markdown b { color: #e6edf3 !important; }
.output-markdown a { color: #58a6ff !important; }
.output-markdown code { color: #79c0ff !important; }
.output-markdown hr { border-color: #30363d !important; }
/* ── Description italic below theorem input ──────────────── */
.prose em, .prose i, em, i { color: #8fb8d8 !important; }
"""
try:
_theme = gr.themes.Base(
primary_hue=gr.themes.colors.green,
neutral_hue=gr.themes.colors.zinc,
font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
)
except Exception:
_theme = None
# ── Verdict banner — three states, dark-theme colours ────────────────────────
def build_verdict_banner(success: bool, stuck: bool, english_desc: str = "") -> str:
font = "font-family:'JetBrains Mono','Fira Code',ui-monospace,monospace;"
if success:
desc_html = (
f'
'
f'{english_desc}
'
) if english_desc else ""
return (
f'
'
f'
'
f'✓ FORMALLY VERIFIED
'
f'{desc_html}'
f'
'
f'The AI proposed proof tactics. Lean 4\'s formal kernel '
f'checked every logical step against its axioms and accepted the proof.
'
f'This cannot be faked. '
f'Unlike a chatbot saying "yes, that\'s true," Lean\'s kernel rejects any gap in reasoning '
f'— no exceptions, no hallucinations. The proof below is machine-checked mathematics.'
f'
'
)
if stuck:
desc_html = (
f'
'
f'{english_desc}
'
) if english_desc else ""
return (
f'
'
f'
'
f'⚠ NOT PROVABLE AS STATED
'
f'{desc_html}'
f'
'
f'The agent detected it was stuck: the same goal state recurred with no progress.
'
f'This is a deliberate conclusion, not a failure. '
f'The claim as written cannot be proven — it may be mathematically false, '
f'or require axioms and tactics outside the current mode. '
f'Recognising when something is unprovable is part of what a formal proof agent does.'
f'
'
)
return (
f'
'
f'
'
f'✗ SEARCH INCOMPLETE
'
f'
'
f'The agent exhausted its step budget without completing the proof. '
f'Partial progress is shown below — try increasing the step limit or picking a simpler theorem.'
f'
'
)
# ── SVG proof tree — DO NOT MODIFY ───────────────────────────────────────────
def build_proof_tree_svg(steps: list, tactics: list, success: bool,
stuck: bool = False, claim: str = "") -> str:
if not steps:
return ""
NODE_W, NODE_H = 300, 44
STEP_H = 140
SVG_W = 820
CX = SVG_W // 2
n = len(steps)
title_h = 72 if claim else 52
SVG_H = title_h + (n + (1 if (success or stuck) else 0)) * STEP_H + NODE_H + 20
def esc(s):
return str(s).replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"')
def trunc(s, maxn=36):
s = str(s).replace('\n', ' ').strip()
return s[:maxn] + '…' if len(s) > maxn else s
out = []
out.append(f'')
return f'
{"".join(out)}
'
# ── Step walkthrough ──────────────────────────────────────────────────────────
def explain_tactic(tactic: str) -> str:
for key, explanation in TACTIC_EXPLANATIONS.items():
if tactic.strip().startswith(key):
return f"*`{key}` — {explanation}*"
return ""
def make_playground_url(theorem_stmt: str, tactics: list) -> str:
proof_lines = [f"{theorem_stmt} := by"] + [
f" {line}" for t in tactics for line in t.strip().split("\n")
]
code = "\n".join(proof_lines)
return "https://live.lean-lang.org/#code=" + urllib.parse.quote(code)
def format_steps(steps: list, tactics: list, stuck: bool = False,
theorem_stmt: str = "") -> str:
if not steps:
return ""
out = ["### Agent loop: propose → verify → learn\n"]
for s in steps:
goal = s['goal']
chosen = s.get('chosen', '')
candidates = s.get('candidates', [])
status = s.get('status', '')
error = s.get('error', '')
step_num = s['step']
out.append(f"---\n**Step {step_num}** — current goal:")
out.append(f"```\n{goal}\n```")
if not candidates:
out.append("⚠️ *LLM endpoint warming up — no candidates available at this step*")
else:
rejected = [c for c in candidates if c != chosen]
out.append(f"\U0001f916 **LLM proposed:** `{'`, `'.join(candidates)}`")
for r in rejected:
out.append(f"- ❌ `{r}` — Lean kernel rejected")
if error and rejected:
short_err = error.replace('\n', ' ')[:120]
out.append(f"- \U0001f4e2 *Kernel error fed back to agent:* `{short_err}`")
if chosen and status != 'all_failed':
exp = explain_tactic(chosen)
out.append(f"- ✅ `{chosen}` — Lean kernel accepted")
if exp:
out.append(f" {exp}")
out.append(f"- *Result:* `{status}`")
elif status == 'all_failed':
out.append("- ❌ All candidates rejected — feeding errors back, trying next step")
out.append("")
if stuck:
out.append(
"---\n⚠️ **Search concluded** — the same goal state recurred with no progress.\n"
"The claim could not be proven. It may be mathematically false, "
"or require axioms and tactics outside the current mode."
)
elif tactics:
proof_lines = ["by"] + [f" {line}" for t in tactics for line in t.strip().split("\n")]
proof_block = "\n".join(proof_lines)
out.append("---\n### Complete proof\n")
out.append("```lean4")
out.append(proof_block)
out.append("```")
out.append("\n---\n### Verify it yourself")
if theorem_stmt:
url = make_playground_url(theorem_stmt, tactics)
out.append(
f"[**▶ Open in Lean 4 web playground ↗**]({url})\n\n"
"The proof is pre-filled and ready to run. "
"The kernel outputs **\"Goals accomplished!\"** or rejects it if anything is wrong. "
"**It cannot be convinced. It cannot be fooled.**"
)
else:
out.append(
"Paste the proof above into [live.lean-lang.org ↗](https://live.lean-lang.org/). "
"The kernel outputs **\"Goals accomplished!\"** — or rejects it if anything is wrong."
)
return "\n".join(out)
# ── Main proof handler ────────────────────────────────────────────────────────
def prove_theorem(theorem: str, max_steps: int, use_fallbacks: bool):
if not theorem.strip():
yield "Please enter a theorem statement.", "", ""
return
yield "⏳ Sending to proof agent on Modal…", "", ""
lookup = EXAMPLE_LOOKUP.get(theorem.strip())
english_desc = lookup[0] if lookup else ""
try:
resp = requests.post(
f"{MODAL_ENDPOINT}/prove",
json={
"theorem": theorem,
"max_steps": max_steps,
"use_fallbacks": use_fallbacks,
"show_reasoning": True, # always run live loop, skip cache read
},
timeout=280,
)
data = resp.json()
except requests.exceptions.Timeout:
yield "❌ Request timed out. Try a simpler theorem or fewer max steps.", "", ""
return
except Exception as e:
yield f"❌ Error contacting proof agent: {e}", "", ""
return
warmup_note = ""
msg = data.get("message", "")
if "warming up" in msg.lower() or "unavailable" in msg.lower():
warmup_note = "⚠️ LLM endpoint warming up (cold start) — proof attempted with fallback tactics only.\n\n"
success = data["success"]
stuck = data.get("stuck", False)
banner = build_verdict_banner(success, stuck, english_desc)
details = format_steps(
data["steps"], data["tactics"],
stuck=stuck, theorem_stmt=theorem.strip()
)
svg_html = build_proof_tree_svg(
data["steps"], data["tactics"], success, stuck=stuck, claim=english_desc
)
yield warmup_note + banner, details, svg_html
# ── UI ────────────────────────────────────────────────────────────────────────
_blocks_kwargs: dict = dict(title="Q.E.D", css=CUSTOM_CSS)
if _theme is not None:
_blocks_kwargs["theme"] = _theme
with gr.Blocks(**_blocks_kwargs) as demo:
gr.Markdown("""
# ⊢ Q.E.D
**∀ theorem → ∃ proof** — LLM-guided formal verification, powered by Modal.
Enter a theorem in Lean 4 syntax (∀ ∃ ¬ ∧ ∨ → ↔ ℕ ℤ α all supported).
The agent proposes tactics, Lean's kernel verifies each step, and kernel errors feed back into the next proposal.
Watch it **prove** a true theorem — or **correctly conclude** a false one is unprovable.
""")
with gr.Row():
with gr.Column(scale=2):
theorem_input = gr.Textbox(
label="Theorem statement",
placeholder="theorem my_thm : ∀ n : Nat, 0 + n = n",
value=EXAMPLES[0][0],
lines=3,
)
desc_display = gr.Markdown(
value=f"*{EXAMPLES[0][1]}*",
label="",
)
with gr.Row():
max_steps = gr.Slider(5, 30, value=20, step=1, label="Max steps")
use_fallbacks = gr.Checkbox(value=True, label="Use fallback tactics (ω, simp…)")
prove_btn = gr.Button("⊢ Prove", variant="primary")
with gr.Column(scale=1):
gr.Markdown("**Examples** — click to load\n\n*∀ provable ones first, then a false one ↓*")
for lean_stmt, english_desc, kind in EXAMPLES:
btn = gr.Button(english_desc, size="sm")
btn.click(
fn=lambda lean=lean_stmt, d=english_desc: (lean, f"*{d}*"),
outputs=[theorem_input, desc_display],
)
banner_out = gr.HTML(label="Verdict")
steps_out = gr.Markdown(label="Agent loop walkthrough")
tree_out = gr.HTML(label="Proof search tree")
prove_btn.click(
fn=prove_theorem,
inputs=[theorem_input, max_steps, use_fallbacks],
outputs=[banner_out, steps_out, tree_out],
)
demo.queue()
demo.launch()