QED / front.py
SPP
Q.E.D β€” initial submission
ed428ff
Raw
History Blame Contribute Delete
25.5 kB
import urllib.parse
import gradio as gr
import requests
MODAL_ENDPOINT = "https://no-name13--lean-proof-agent-fastapi-app.modal.run"
# ── Example theorems ──────────────────────────────────────────────────────────
# Lead with add_comm_nat (induction / multi-step) so the agent loop is visible.
# Keep zero_add as the simple contrast. False theorem always last.
EXAMPLES = [
(
"theorem add_comm_nat : βˆ€ n m : Nat, n + m = m + n",
"βˆ€ n m Β· n + m = m + n β€” addition is commutative",
"provable",
),
(
"theorem reverse_reverse : βˆ€ (Ξ± : Type) (l : List Ξ±), l.reverse.reverse = l",
"βˆ€ Ξ± l Β· reverse(reverse l) = l β€” reversing a list twice is identity",
"provable",
),
(
"theorem zero_add : βˆ€ n : Nat, 0 + n = n",
"βˆ€ n Β· 0 + n = n β€” zero is the left identity for addition",
"provable",
),
(
"theorem currying : βˆ€ P Q R : Prop, (P ∧ Q β†’ R) ↔ (P β†’ Q β†’ R)",
"βˆ€ P Q R Β· (P∧Qβ†’R) ↔ (Pβ†’Qβ†’R) β€” currying",
"provable",
),
(
"theorem de_morgan_and : βˆ€ P Q : Prop, Β¬(P ∧ Q) ↔ Β¬P ∨ Β¬Q",
"βˆ€ P Q Β· Β¬(P∧Q) ↔ Β¬P∨¬Q β€” De Morgan's law",
"provable",
),
# FALSE theorem β€” the stuck detector concludes "not provable" within a few steps.
(
"theorem cannot_prove : βˆ€ n : Nat, n + 1 = n",
"βˆ€ n Β· n+1 = n β€” FALSE: the agent correctly cannot prove this βœ—",
"unprovable",
),
]
EXAMPLE_LOOKUP = {lean.strip(): (desc, kind) for lean, desc, kind in EXAMPLES}
TACTIC_EXPLANATIONS = {
"intro": "introduces a universally quantified variable or hypothesis into the local context",
"induction": "applies structural induction β€” splits into base case (zero) and inductive step (succ)",
"simp": "simplifies the goal using a library of known equalities and lemmas",
"rfl": "closes the goal when both sides are definitionally equal",
"omega": "decision procedure for linear arithmetic β€” automatically solves goals about integers/naturals",
"rw": "rewrites the goal using an equation",
"exact": "closes the goal by providing an exact proof term",
"apply": "applies a lemma whose conclusion matches the goal",
"constructor": "splits a conjunction (∧) or iff (↔) goal into its two parts",
"cases": "case-splits on a hypothesis or value",
"norm_num": "solves numeric goals like 2 + 2 = 4 or 3 ∣ 6",
"contradiction": "closes the goal when context contains P and Β¬P (a direct contradiction)",
"assumption": "closes the goal when it matches a hypothesis in the local context exactly",
"tauto": "closes propositional tautologies automatically",
}
# ── Dark theme β€” mathematical / formal-proof terminal aesthetic ───────────────
# Wraps in try/except so a theme API mismatch never breaks the app.
CUSTOM_CSS = """
/* ── Base ───────────────────────────────────────────────── */
body, .gradio-container { background: #0d1117 !important; color: #c9d1d9 !important; }
.contain, .gap, .row { background: #0d1117 !important; }
/* ── Panels ─────────────────────────────────────────────── */
.block.padded, .block {
background: #161b22 !important;
border: 1px solid #30363d !important;
border-radius: 8px !important;
}
.form { background: #161b22 !important; }
/* ── All text β€” catch-all first, then specifics ──────────── */
* { color: #c9d1d9; }
/* ── Headings ────────────────────────────────────────────── */
h1, h2, h3, h4 { color: #e6edf3 !important; }
h1 { border-bottom: 1px solid #21262d; padding-bottom: 6px; }
/* ── Labels (component titles, slider/checkbox labels) ────── */
label, .label-wrap, .label-wrap span,
span.text-gray-500, span.text-sm,
.block > .label-wrap > span { color: #b0bec5 !important; }
/* ── Body text, lists, paragraphs ───────────────────────── */
p, li, em, span { color: #c9d1d9 !important; }
a { color: #58a6ff !important; }
strong, b { color: #e6edf3 !important; }
/* ── Slider ──────────────────────────────────────────────── */
input[type=range] { accent-color: #2ea043 !important; }
.range-input span, .range-input output,
input[type=range] + span, .slider-container span { color: #c9d1d9 !important; }
/* ── Checkbox ────────────────────────────────────────────── */
input[type=checkbox] { accent-color: #2ea043 !important; }
.checkbox-label, .checkbox-label span { color: #c9d1d9 !important; }
/* ── Inputs ──────────────────────────────────────────────── */
textarea, input[type=text], input[type=number] {
background: #0d1117 !important;
color: #e6edf3 !important;
border: 1px solid #30363d !important;
border-radius: 6px !important;
font-family: 'JetBrains Mono', 'Fira Code', ui-monospace, monospace !important;
}
textarea::placeholder { color: #484f58 !important; }
/* ── Primary button ──────────────────────────────────────── */
button.primary {
background: #1a3a1a !important;
border: 1px solid #2ea043 !important;
color: #3fb950 !important;
font-weight: bold !important;
letter-spacing: 0.05em !important;
transition: all 0.15s ease !important;
}
button.primary:hover {
background: #2ea043 !important;
color: #fff !important;
box-shadow: 0 0 10px #2ea04355 !important;
}
/* ── Secondary / example buttons ────────────────────────── */
button.secondary, button[variant=secondary] {
background: #21262d !important;
border: 1px solid #30363d !important;
color: #c9d1d9 !important;
font-size: 0.8em !important;
transition: all 0.12s ease !important;
}
button.secondary:hover { border-color: #3fb950 !important; color: #3fb950 !important; }
/* ── Code blocks ─────────────────────────────────────────── */
code {
background: #161b22 !important;
color: #79c0ff !important;
border: 1px solid #30363d !important;
border-radius: 4px !important;
padding: 2px 5px !important;
}
pre {
background: #161b22 !important;
border: 1px solid #30363d !important;
border-radius: 6px !important;
}
pre code { color: #c9d1d9 !important; border: none !important; padding: 0 !important; }
/* ── Markdown output ─────────────────────────────────────── */
.output-markdown, .output-markdown * { color: #c9d1d9 !important; }
.output-markdown h1, .output-markdown h2,
.output-markdown h3 { color: #e6edf3 !important; }
.output-markdown strong, .output-markdown b { color: #e6edf3 !important; }
.output-markdown a { color: #58a6ff !important; }
.output-markdown code { color: #79c0ff !important; }
.output-markdown hr { border-color: #30363d !important; }
/* ── Description italic below theorem input ──────────────── */
.prose em, .prose i, em, i { color: #8fb8d8 !important; }
"""
try:
_theme = gr.themes.Base(
primary_hue=gr.themes.colors.green,
neutral_hue=gr.themes.colors.zinc,
font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
)
except Exception:
_theme = None
# ── Verdict banner β€” three states, dark-theme colours ────────────────────────
def build_verdict_banner(success: bool, stuck: bool, english_desc: str = "") -> str:
font = "font-family:'JetBrains Mono','Fira Code',ui-monospace,monospace;"
if success:
desc_html = (
f'<div style="font-size:1.0em;color:#7ee787;margin:6px 0 10px;">'
f'<em>{english_desc}</em></div>'
) if english_desc else ""
return (
f'<div style="background:#0d2818;border:2px solid #2ea043;border-radius:8px;'
f'padding:16px 22px;margin:10px 0;{font}">'
f'<div style="font-size:1.8em;font-weight:bold;color:#3fb950;letter-spacing:2px;margin-bottom:6px;">'
f'&#10003; FORMALLY VERIFIED</div>'
f'{desc_html}'
f'<div style="font-size:0.82em;color:#7ee787;line-height:1.75;'
f'border-top:1px solid #1a4a1a;padding-top:10px;margin-top:8px;">'
f'The AI proposed proof tactics. <strong style="color:#a0e8a0;">Lean 4\'s formal kernel</strong> '
f'checked every logical step against its axioms and accepted the proof.<br><br>'
f'<strong style="color:#3fb950;">This cannot be faked.</strong> '
f'Unlike a chatbot saying "yes, that\'s true," Lean\'s kernel rejects any gap in reasoning '
f'β€” no exceptions, no hallucinations. The proof below is machine-checked mathematics.'
f'</div></div>'
)
if stuck:
desc_html = (
f'<div style="font-size:1.0em;color:#e3b341;margin:6px 0 10px;">'
f'<em>{english_desc}</em></div>'
) if english_desc else ""
return (
f'<div style="background:#1c1400;border:2px solid #d29922;border-radius:8px;'
f'padding:16px 22px;margin:10px 0;{font}">'
f'<div style="font-size:1.7em;font-weight:bold;color:#e3b341;letter-spacing:1px;margin-bottom:6px;">'
f'&#9888; NOT PROVABLE AS STATED</div>'
f'{desc_html}'
f'<div style="font-size:0.82em;color:#c9a227;line-height:1.75;'
f'border-top:1px solid #3a2800;padding-top:10px;margin-top:8px;">'
f'The agent detected it was stuck: the same goal state recurred with no progress.<br><br>'
f'<strong style="color:#e3b341;">This is a deliberate conclusion, not a failure.</strong> '
f'The claim as written cannot be proven β€” it may be mathematically false, '
f'or require axioms and tactics outside the current mode. '
f'Recognising when something is unprovable is part of what a formal proof agent does.'
f'</div></div>'
)
return (
f'<div style="background:#1c0a0a;border:2px solid #c62828;border-radius:8px;'
f'padding:16px 22px;margin:10px 0;{font}">'
f'<div style="font-size:1.6em;font-weight:bold;color:#f85149;margin-bottom:8px;">'
f'&#10007; SEARCH INCOMPLETE</div>'
f'<div style="font-size:0.82em;color:#c9d1d9;line-height:1.7;">'
f'The agent exhausted its step budget without completing the proof. '
f'Partial progress is shown below β€” try increasing the step limit or picking a simpler theorem.'
f'</div></div>'
)
# ── SVG proof tree β€” DO NOT MODIFY ───────────────────────────────────────────
def build_proof_tree_svg(steps: list, tactics: list, success: bool,
stuck: bool = False, claim: str = "") -> str:
if not steps:
return ""
NODE_W, NODE_H = 300, 44
STEP_H = 140
SVG_W = 820
CX = SVG_W // 2
n = len(steps)
title_h = 72 if claim else 52
SVG_H = title_h + (n + (1 if (success or stuck) else 0)) * STEP_H + NODE_H + 20
def esc(s):
return str(s).replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;')
def trunc(s, maxn=36):
s = str(s).replace('\n', ' ').strip()
return s[:maxn] + '…' if len(s) > maxn else s
out = []
out.append(f'<svg xmlns="http://www.w3.org/2000/svg" width="{SVG_W}" height="{SVG_H}">')
out.append(f'<rect width="{SVG_W}" height="{SVG_H}" fill="#030c04" rx="8"/>')
out.append('<defs>')
for mid, col in [('ahg', '#00e639'), ('ahr', '#f38ba8'), ('ahgr', '#6c7086'), ('aha', '#e6a817')]:
out.append(
f'<marker id="{mid}" markerWidth="8" markerHeight="6" refX="7" refY="3" orient="auto">'
f'<polygon points="0 0,8 3,0 6" fill="{col}"/></marker>'
)
out.append('</defs>')
if success:
title, tc = "Proof Tree β€” βœ“ Verified", "#00e639"
elif stuck:
title, tc = "Proof Tree β€” ⚠ Concluded: Not Provable", "#e6a817"
else:
title, tc = "Proof Tree β€” βœ— Incomplete", "#f38ba8"
out.append(
f'<text x="{CX}" y="28" text-anchor="middle" '
f'font-family="monospace" font-size="14" font-weight="bold" fill="{tc}">{esc(title)}</text>'
)
if claim:
out.append(
f'<text x="{CX}" y="47" text-anchor="middle" '
f'font-family="monospace" font-size="10" fill="#70b870">'
f'Claim: {esc(trunc(claim, 74))}</text>'
)
legend_y = title_h - 10
for lx, col, label in [(20, '#00e639', 'successful path'), (168, '#f38ba8', 'failed attempt')]:
out.append(f'<line x1="{lx}" y1="{legend_y}" x2="{lx+20}" y2="{legend_y}" stroke="{col}" stroke-width="2"/>')
out.append(
f'<text x="{lx+24}" y="{legend_y+4}" font-family="monospace" font-size="9" fill="{col}">{label}</text>'
)
for i, step in enumerate(steps):
cy = title_h + i * STEP_H + NODE_H // 2
chosen = step.get('chosen', '')
candidates = step.get('candidates', [])
status = step.get('status', '')
goal = step.get('goal', '')
failed = [c for c in candidates if c != chosen]
on_path = bool(chosen) and status != 'all_failed'
if on_path and success:
bg, border = '#001a00', '#00e639'
elif status == 'all_failed':
bg, border = '#1a0000', '#f38ba8'
else:
bg, border = '#050f05', '#2a5a2a'
nx, ny = CX - NODE_W // 2, cy - NODE_H // 2
out.append(
f'<rect x="{nx}" y="{ny}" width="{NODE_W}" height="{NODE_H}" '
f'rx="5" fill="{bg}" stroke="{border}" stroke-width="1.5"/>'
)
out.append(
f'<text x="{CX}" y="{cy+5}" text-anchor="middle" '
f'font-family="monospace" font-size="10" fill="#98c898">'
f'{esc(trunc(f"Step {i}: {goal}"))}</text>'
)
for j, fc in enumerate(failed):
bx = CX + NODE_W // 2 + 55 + j * 75
by = cy + STEP_H // 3
out.append(
f'<line x1="{CX+NODE_W//2}" y1="{cy}" x2="{bx}" y2="{by}" '
f'stroke="#f38ba8" stroke-width="1.5" stroke-dasharray="4,3" marker-end="url(#ahr)"/>'
)
mx = (CX + NODE_W // 2 + bx) // 2 + 3
my = (cy + by) // 2 - 3
out.append(
f'<text x="{mx}" y="{my}" font-family="monospace" font-size="8" fill="#f38ba8">'
f'{esc(trunc(fc, 18))}</text>'
)
out.append(f'<circle cx="{bx}" cy="{by}" r="7" fill="#1a0000" stroke="#f38ba8" stroke-width="1"/>')
out.append(
f'<text x="{bx}" y="{by+4}" text-anchor="middle" '
f'font-family="monospace" font-size="9" fill="#f38ba8">βœ—</text>'
)
next_cy = title_h + (i + 1) * STEP_H + NODE_H // 2
if on_path:
color, mid = ('#00e639', 'ahg') if success else ('#6c7086', 'ahgr')
out.append(
f'<line x1="{CX}" y1="{cy+NODE_H//2}" x2="{CX}" y2="{next_cy-NODE_H//2}" '
f'stroke="{color}" stroke-width="2" marker-end="url(#{mid})"/>'
)
ly = (cy + NODE_H // 2 + next_cy - NODE_H // 2) // 2
out.append(
f'<text x="{CX+6}" y="{ly}" font-family="monospace" font-size="9" fill="{color}">'
f'{esc(trunc(chosen, 28))}</text>'
)
elif status == 'all_failed':
out.append(
f'<line x1="{CX}" y1="{cy+NODE_H//2}" x2="{CX}" y2="{cy+NODE_H//2+28}" '
f'stroke="#f38ba8" stroke-width="1.5" stroke-dasharray="4,3"/>'
)
if success or stuck:
cy = title_h + n * STEP_H + NODE_H // 2
nx, ny = CX - NODE_W // 2, cy - NODE_H // 2
if success:
node_fill, node_stroke = '#002800', '#00e639'
node_text, node_col = 'βœ“ QED β€” Goals accomplished!', '#00ff41'
else:
node_fill, node_stroke = '#201000', '#e6a817'
node_text, node_col = '⚠ Concluded: not provable as stated', '#f5c842'
out.append(
f'<rect x="{nx}" y="{ny}" width="{NODE_W}" height="{NODE_H}" '
f'rx="5" fill="{node_fill}" stroke="{node_stroke}" stroke-width="2"/>'
)
out.append(
f'<text x="{CX}" y="{cy+5}" text-anchor="middle" '
f'font-family="monospace" font-size="11" font-weight="bold" fill="{node_col}">'
f'{esc(node_text)}</text>'
)
out.append('</svg>')
return f'<div style="overflow-x:auto;padding:8px">{"".join(out)}</div>'
# ── Step walkthrough ──────────────────────────────────────────────────────────
def explain_tactic(tactic: str) -> str:
for key, explanation in TACTIC_EXPLANATIONS.items():
if tactic.strip().startswith(key):
return f"*`{key}` β€” {explanation}*"
return ""
def make_playground_url(theorem_stmt: str, tactics: list) -> str:
proof_lines = [f"{theorem_stmt} := by"] + [
f" {line}" for t in tactics for line in t.strip().split("\n")
]
code = "\n".join(proof_lines)
return "https://live.lean-lang.org/#code=" + urllib.parse.quote(code)
def format_steps(steps: list, tactics: list, stuck: bool = False,
theorem_stmt: str = "") -> str:
if not steps:
return ""
out = ["### Agent loop: propose β†’ verify β†’ learn\n"]
for s in steps:
goal = s['goal']
chosen = s.get('chosen', '')
candidates = s.get('candidates', [])
status = s.get('status', '')
error = s.get('error', '')
step_num = s['step']
out.append(f"---\n**Step {step_num}** β€” current goal:")
out.append(f"```\n{goal}\n```")
if not candidates:
out.append("⚠️ *LLM endpoint warming up β€” no candidates available at this step*")
else:
rejected = [c for c in candidates if c != chosen]
out.append(f"\U0001f916 **LLM proposed:** `{'`, `'.join(candidates)}`")
for r in rejected:
out.append(f"- ❌ `{r}` β€” Lean kernel rejected")
if error and rejected:
short_err = error.replace('\n', ' ')[:120]
out.append(f"- \U0001f4e2 *Kernel error fed back to agent:* `{short_err}`")
if chosen and status != 'all_failed':
exp = explain_tactic(chosen)
out.append(f"- βœ… `{chosen}` β€” Lean kernel accepted")
if exp:
out.append(f" {exp}")
out.append(f"- *Result:* `{status}`")
elif status == 'all_failed':
out.append("- ❌ All candidates rejected β€” feeding errors back, trying next step")
out.append("")
if stuck:
out.append(
"---\n⚠️ **Search concluded** β€” the same goal state recurred with no progress.\n"
"The claim could not be proven. It may be mathematically false, "
"or require axioms and tactics outside the current mode."
)
elif tactics:
proof_lines = ["by"] + [f" {line}" for t in tactics for line in t.strip().split("\n")]
proof_block = "\n".join(proof_lines)
out.append("---\n### Complete proof\n")
out.append("```lean4")
out.append(proof_block)
out.append("```")
out.append("\n---\n### Verify it yourself")
if theorem_stmt:
url = make_playground_url(theorem_stmt, tactics)
out.append(
f"[**β–Ά Open in Lean 4 web playground β†—**]({url})\n\n"
"The proof is pre-filled and ready to run. "
"The kernel outputs **\"Goals accomplished!\"** or rejects it if anything is wrong. "
"**It cannot be convinced. It cannot be fooled.**"
)
else:
out.append(
"Paste the proof above into [live.lean-lang.org β†—](https://live.lean-lang.org/). "
"The kernel outputs **\"Goals accomplished!\"** β€” or rejects it if anything is wrong."
)
return "\n".join(out)
# ── Main proof handler ────────────────────────────────────────────────────────
def prove_theorem(theorem: str, max_steps: int, use_fallbacks: bool):
if not theorem.strip():
yield "Please enter a theorem statement.", "", ""
return
yield "⏳ Sending to proof agent on Modal…", "", ""
lookup = EXAMPLE_LOOKUP.get(theorem.strip())
english_desc = lookup[0] if lookup else ""
try:
resp = requests.post(
f"{MODAL_ENDPOINT}/prove",
json={
"theorem": theorem,
"max_steps": max_steps,
"use_fallbacks": use_fallbacks,
"show_reasoning": True, # always run live loop, skip cache read
},
timeout=280,
)
data = resp.json()
except requests.exceptions.Timeout:
yield "❌ Request timed out. Try a simpler theorem or fewer max steps.", "", ""
return
except Exception as e:
yield f"❌ Error contacting proof agent: {e}", "", ""
return
warmup_note = ""
msg = data.get("message", "")
if "warming up" in msg.lower() or "unavailable" in msg.lower():
warmup_note = "⚠️ LLM endpoint warming up (cold start) β€” proof attempted with fallback tactics only.\n\n"
success = data["success"]
stuck = data.get("stuck", False)
banner = build_verdict_banner(success, stuck, english_desc)
details = format_steps(
data["steps"], data["tactics"],
stuck=stuck, theorem_stmt=theorem.strip()
)
svg_html = build_proof_tree_svg(
data["steps"], data["tactics"], success, stuck=stuck, claim=english_desc
)
yield warmup_note + banner, details, svg_html
# ── UI ────────────────────────────────────────────────────────────────────────
_blocks_kwargs: dict = dict(title="Q.E.D", css=CUSTOM_CSS)
if _theme is not None:
_blocks_kwargs["theme"] = _theme
with gr.Blocks(**_blocks_kwargs) as demo:
gr.Markdown("""
# ⊒ Q.E.D
**βˆ€ theorem β†’ βˆƒ proof** β€” LLM-guided formal verification, powered by Modal.
Enter a theorem in Lean 4 syntax (βˆ€ βˆƒ Β¬ ∧ ∨ β†’ ↔ β„• β„€ Ξ± all supported).
The agent proposes tactics, Lean's kernel verifies each step, and kernel errors feed back into the next proposal.
Watch it **prove** a true theorem β€” or **correctly conclude** a false one is unprovable.
""")
with gr.Row():
with gr.Column(scale=2):
theorem_input = gr.Textbox(
label="Theorem statement",
placeholder="theorem my_thm : βˆ€ n : Nat, 0 + n = n",
value=EXAMPLES[0][0],
lines=3,
)
desc_display = gr.Markdown(
value=f"*{EXAMPLES[0][1]}*",
label="",
)
with gr.Row():
max_steps = gr.Slider(5, 30, value=20, step=1, label="Max steps")
use_fallbacks = gr.Checkbox(value=True, label="Use fallback tactics (Ο‰, simp…)")
prove_btn = gr.Button("⊒ Prove", variant="primary")
with gr.Column(scale=1):
gr.Markdown("**Examples** β€” click to load\n\n*βˆ€ provable ones first, then a false one ↓*")
for lean_stmt, english_desc, kind in EXAMPLES:
btn = gr.Button(english_desc, size="sm")
btn.click(
fn=lambda lean=lean_stmt, d=english_desc: (lean, f"*{d}*"),
outputs=[theorem_input, desc_display],
)
banner_out = gr.HTML(label="Verdict")
steps_out = gr.Markdown(label="Agent loop walkthrough")
tree_out = gr.HTML(label="Proof search tree")
prove_btn.click(
fn=prove_theorem,
inputs=[theorem_input, max_steps, use_fallbacks],
outputs=[banner_out, steps_out, tree_out],
)
demo.queue()
demo.launch()