Spaces:

build-small-hackathon
/

QED

Running

QED / front.py

SPP

Q.E.D — initial submission

ed428ff 14 days ago

25.5 kB

	import urllib.parse
	import gradio as gr
	import requests

	MODAL_ENDPOINT = "https://no-name13--lean-proof-agent-fastapi-app.modal.run"

	# ── Example theorems ──────────────────────────────────────────────────────────
	# Lead with add_comm_nat (induction / multi-step) so the agent loop is visible.
	# Keep zero_add as the simple contrast. False theorem always last.
	EXAMPLES = [
	(
	"theorem add_comm_nat : ∀ n m : Nat, n + m = m + n",
	"∀ n m · n + m = m + n — addition is commutative",
	"provable",
	),
	(
	"theorem reverse_reverse : ∀ (α : Type) (l : List α), l.reverse.reverse = l",
	"∀ α l · reverse(reverse l) = l — reversing a list twice is identity",
	"provable",
	),
	(
	"theorem zero_add : ∀ n : Nat, 0 + n = n",
	"∀ n · 0 + n = n — zero is the left identity for addition",
	"provable",
	),
	(
	"theorem currying : ∀ P Q R : Prop, (P ∧ Q → R) ↔ (P → Q → R)",
	"∀ P Q R · (P∧Q→R) ↔ (P→Q→R) — currying",
	"provable",
	),
	(
	"theorem de_morgan_and : ∀ P Q : Prop, ¬(P ∧ Q) ↔ ¬P ∨ ¬Q",
	"∀ P Q · ¬(P∧Q) ↔ ¬P∨¬Q — De Morgan's law",
	"provable",
	),
	# FALSE theorem — the stuck detector concludes "not provable" within a few steps.
	(
	"theorem cannot_prove : ∀ n : Nat, n + 1 = n",
	"∀ n · n+1 = n — FALSE: the agent correctly cannot prove this ✗",
	"unprovable",
	),
	]

	EXAMPLE_LOOKUP = {lean.strip(): (desc, kind) for lean, desc, kind in EXAMPLES}

	TACTIC_EXPLANATIONS = {
	"intro": "introduces a universally quantified variable or hypothesis into the local context",
	"induction": "applies structural induction — splits into base case (zero) and inductive step (succ)",
	"simp": "simplifies the goal using a library of known equalities and lemmas",
	"rfl": "closes the goal when both sides are definitionally equal",
	"omega": "decision procedure for linear arithmetic — automatically solves goals about integers/naturals",
	"rw": "rewrites the goal using an equation",
	"exact": "closes the goal by providing an exact proof term",
	"apply": "applies a lemma whose conclusion matches the goal",
	"constructor": "splits a conjunction (∧) or iff (↔) goal into its two parts",
	"cases": "case-splits on a hypothesis or value",
	"norm_num": "solves numeric goals like 2 + 2 = 4 or 3 ∣ 6",
	"contradiction": "closes the goal when context contains P and ¬P (a direct contradiction)",
	"assumption": "closes the goal when it matches a hypothesis in the local context exactly",
	"tauto": "closes propositional tautologies automatically",
	}

	# ── Dark theme — mathematical / formal-proof terminal aesthetic ───────────────
	# Wraps in try/except so a theme API mismatch never breaks the app.
	CUSTOM_CSS = """
	/* ── Base ───────────────────────────────────────────────── */
	body, .gradio-container { background: #0d1117 !important; color: #c9d1d9 !important; }
	.contain, .gap, .row { background: #0d1117 !important; }

	/* ── Panels ─────────────────────────────────────────────── */
	.block.padded, .block {
	background: #161b22 !important;
	border: 1px solid #30363d !important;
	border-radius: 8px !important;
	}
	.form { background: #161b22 !important; }

	/* ── All text — catch-all first, then specifics ──────────── */
	* { color: #c9d1d9; }

	/* ── Headings ────────────────────────────────────────────── */
	h1, h2, h3, h4 { color: #e6edf3 !important; }
	h1 { border-bottom: 1px solid #21262d; padding-bottom: 6px; }

	/* ── Labels (component titles, slider/checkbox labels) ────── */
	label, .label-wrap, .label-wrap span,
	span.text-gray-500, span.text-sm,
	.block > .label-wrap > span { color: #b0bec5 !important; }

	/* ── Body text, lists, paragraphs ───────────────────────── */
	p, li, em, span { color: #c9d1d9 !important; }
	a { color: #58a6ff !important; }
	strong, b { color: #e6edf3 !important; }

	/* ── Slider ──────────────────────────────────────────────── */
	input[type=range] { accent-color: #2ea043 !important; }
	.range-input span, .range-input output,
	input[type=range] + span, .slider-container span { color: #c9d1d9 !important; }

	/* ── Checkbox ────────────────────────────────────────────── */
	input[type=checkbox] { accent-color: #2ea043 !important; }
	.checkbox-label, .checkbox-label span { color: #c9d1d9 !important; }

	/* ── Inputs ──────────────────────────────────────────────── */
	textarea, input[type=text], input[type=number] {
	background: #0d1117 !important;
	color: #e6edf3 !important;
	border: 1px solid #30363d !important;
	border-radius: 6px !important;
	font-family: 'JetBrains Mono', 'Fira Code', ui-monospace, monospace !important;
	}
	textarea::placeholder { color: #484f58 !important; }

	/* ── Primary button ──────────────────────────────────────── */
	button.primary {
	background: #1a3a1a !important;
	border: 1px solid #2ea043 !important;
	color: #3fb950 !important;
	font-weight: bold !important;
	letter-spacing: 0.05em !important;
	transition: all 0.15s ease !important;
	}
	button.primary:hover {
	background: #2ea043 !important;
	color: #fff !important;
	box-shadow: 0 0 10px #2ea04355 !important;
	}

	/* ── Secondary / example buttons ────────────────────────── */
	button.secondary, button[variant=secondary] {
	background: #21262d !important;
	border: 1px solid #30363d !important;
	color: #c9d1d9 !important;
	font-size: 0.8em !important;
	transition: all 0.12s ease !important;
	}
	button.secondary:hover { border-color: #3fb950 !important; color: #3fb950 !important; }

	/* ── Code blocks ─────────────────────────────────────────── */
	code {
	background: #161b22 !important;
	color: #79c0ff !important;
	border: 1px solid #30363d !important;
	border-radius: 4px !important;
	padding: 2px 5px !important;
	}
	pre {
	background: #161b22 !important;
	border: 1px solid #30363d !important;
	border-radius: 6px !important;
	}
	pre code { color: #c9d1d9 !important; border: none !important; padding: 0 !important; }

	/* ── Markdown output ─────────────────────────────────────── */
	.output-markdown, .output-markdown * { color: #c9d1d9 !important; }
	.output-markdown h1, .output-markdown h2,
	.output-markdown h3 { color: #e6edf3 !important; }
	.output-markdown strong, .output-markdown b { color: #e6edf3 !important; }
	.output-markdown a { color: #58a6ff !important; }
	.output-markdown code { color: #79c0ff !important; }
	.output-markdown hr { border-color: #30363d !important; }

	/* ── Description italic below theorem input ──────────────── */
	.prose em, .prose i, em, i { color: #8fb8d8 !important; }
	"""

	try:
	_theme = gr.themes.Base(
	primary_hue=gr.themes.colors.green,
	neutral_hue=gr.themes.colors.zinc,
	font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
	font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
	)
	except Exception:
	_theme = None


	# ── Verdict banner — three states, dark-theme colours ────────────────────────

	def build_verdict_banner(success: bool, stuck: bool, english_desc: str = "") -> str:
	font = "font-family:'JetBrains Mono','Fira Code',ui-monospace,monospace;"

	if success:
	desc_html = (
	f'<div style="font-size:1.0em;color:#7ee787;margin:6px 0 10px;">'
	f'<em>{english_desc}</em></div>'
	) if english_desc else ""
	return (
	f'<div style="background:#0d2818;border:2px solid #2ea043;border-radius:8px;'
	f'padding:16px 22px;margin:10px 0;{font}">'
	f'<div style="font-size:1.8em;font-weight:bold;color:#3fb950;letter-spacing:2px;margin-bottom:6px;">'
	f'✓ FORMALLY VERIFIED</div>'
	f'{desc_html}'
	f'<div style="font-size:0.82em;color:#7ee787;line-height:1.75;'
	f'border-top:1px solid #1a4a1a;padding-top:10px;margin-top:8px;">'
	f'The AI proposed proof tactics. <strong style="color:#a0e8a0;">Lean 4\'s formal kernel</strong> '
	f'checked every logical step against its axioms and accepted the proof.<br><br>'
	f'<strong style="color:#3fb950;">This cannot be faked.</strong> '
	f'Unlike a chatbot saying "yes, that\'s true," Lean\'s kernel rejects any gap in reasoning '
	f'— no exceptions, no hallucinations. The proof below is machine-checked mathematics.'
	f'</div></div>'
	)

	if stuck:
	desc_html = (
	f'<div style="font-size:1.0em;color:#e3b341;margin:6px 0 10px;">'
	f'<em>{english_desc}</em></div>'
	) if english_desc else ""
	return (
	f'<div style="background:#1c1400;border:2px solid #d29922;border-radius:8px;'
	f'padding:16px 22px;margin:10px 0;{font}">'
	f'<div style="font-size:1.7em;font-weight:bold;color:#e3b341;letter-spacing:1px;margin-bottom:6px;">'
	f'⚠ NOT PROVABLE AS STATED</div>'
	f'{desc_html}'
	f'<div style="font-size:0.82em;color:#c9a227;line-height:1.75;'
	f'border-top:1px solid #3a2800;padding-top:10px;margin-top:8px;">'
	f'The agent detected it was stuck: the same goal state recurred with no progress.<br><br>'
	f'<strong style="color:#e3b341;">This is a deliberate conclusion, not a failure.</strong> '
	f'The claim as written cannot be proven — it may be mathematically false, '
	f'or require axioms and tactics outside the current mode. '
	f'Recognising when something is unprovable is part of what a formal proof agent does.'
	f'</div></div>'
	)

	return (
	f'<div style="background:#1c0a0a;border:2px solid #c62828;border-radius:8px;'
	f'padding:16px 22px;margin:10px 0;{font}">'
	f'<div style="font-size:1.6em;font-weight:bold;color:#f85149;margin-bottom:8px;">'
	f'✗ SEARCH INCOMPLETE</div>'
	f'<div style="font-size:0.82em;color:#c9d1d9;line-height:1.7;">'
	f'The agent exhausted its step budget without completing the proof. '
	f'Partial progress is shown below — try increasing the step limit or picking a simpler theorem.'
	f'</div></div>'
	)


	# ── SVG proof tree — DO NOT MODIFY ───────────────────────────────────────────

	def build_proof_tree_svg(steps: list, tactics: list, success: bool,
	stuck: bool = False, claim: str = "") -> str:
	if not steps:
	return ""

	NODE_W, NODE_H = 300, 44
	STEP_H = 140
	SVG_W = 820
	CX = SVG_W // 2

	n = len(steps)
	title_h = 72 if claim else 52
	SVG_H = title_h + (n + (1 if (success or stuck) else 0)) * STEP_H + NODE_H + 20

	def esc(s):
	return str(s).replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"')

	def trunc(s, maxn=36):
	s = str(s).replace('\n', ' ').strip()
	return s[:maxn] + '…' if len(s) > maxn else s

	out = []
	out.append(f'<svg xmlns="http://www.w3.org/2000/svg" width="{SVG_W}" height="{SVG_H}">')
	out.append(f'<rect width="{SVG_W}" height="{SVG_H}" fill="#030c04" rx="8"/>')
	out.append('<defs>')
	for mid, col in [('ahg', '#00e639'), ('ahr', '#f38ba8'), ('ahgr', '#6c7086'), ('aha', '#e6a817')]:
	out.append(
	f'<marker id="{mid}" markerWidth="8" markerHeight="6" refX="7" refY="3" orient="auto">'
	f'<polygon points="0 0,8 3,0 6" fill="{col}"/></marker>'
	)
	out.append('</defs>')

	if success:
	title, tc = "Proof Tree — ✓ Verified", "#00e639"
	elif stuck:
	title, tc = "Proof Tree — ⚠ Concluded: Not Provable", "#e6a817"
	else:
	title, tc = "Proof Tree — ✗ Incomplete", "#f38ba8"

	out.append(
	f'<text x="{CX}" y="28" text-anchor="middle" '
	f'font-family="monospace" font-size="14" font-weight="bold" fill="{tc}">{esc(title)}</text>'
	)

	if claim:
	out.append(
	f'<text x="{CX}" y="47" text-anchor="middle" '
	f'font-family="monospace" font-size="10" fill="#70b870">'
	f'Claim: {esc(trunc(claim, 74))}</text>'
	)

	legend_y = title_h - 10
	for lx, col, label in [(20, '#00e639', 'successful path'), (168, '#f38ba8', 'failed attempt')]:
	out.append(f'<line x1="{lx}" y1="{legend_y}" x2="{lx+20}" y2="{legend_y}" stroke="{col}" stroke-width="2"/>')
	out.append(
	f'<text x="{lx+24}" y="{legend_y+4}" font-family="monospace" font-size="9" fill="{col}">{label}</text>'
	)

	for i, step in enumerate(steps):
	cy = title_h + i * STEP_H + NODE_H // 2
	chosen = step.get('chosen', '')
	candidates = step.get('candidates', [])
	status = step.get('status', '')
	goal = step.get('goal', '')
	failed = [c for c in candidates if c != chosen]

	on_path = bool(chosen) and status != 'all_failed'
	if on_path and success:
	bg, border = '#001a00', '#00e639'
	elif status == 'all_failed':
	bg, border = '#1a0000', '#f38ba8'
	else:
	bg, border = '#050f05', '#2a5a2a'

	nx, ny = CX - NODE_W // 2, cy - NODE_H // 2
	out.append(
	f'<rect x="{nx}" y="{ny}" width="{NODE_W}" height="{NODE_H}" '
	f'rx="5" fill="{bg}" stroke="{border}" stroke-width="1.5"/>'
	)
	out.append(
	f'<text x="{CX}" y="{cy+5}" text-anchor="middle" '
	f'font-family="monospace" font-size="10" fill="#98c898">'
	f'{esc(trunc(f"Step {i}: {goal}"))}</text>'
	)

	for j, fc in enumerate(failed):
	bx = CX + NODE_W // 2 + 55 + j * 75
	by = cy + STEP_H // 3
	out.append(
	f'<line x1="{CX+NODE_W//2}" y1="{cy}" x2="{bx}" y2="{by}" '
	f'stroke="#f38ba8" stroke-width="1.5" stroke-dasharray="4,3" marker-end="url(#ahr)"/>'
	)
	mx = (CX + NODE_W // 2 + bx) // 2 + 3
	my = (cy + by) // 2 - 3
	out.append(
	f'<text x="{mx}" y="{my}" font-family="monospace" font-size="8" fill="#f38ba8">'
	f'{esc(trunc(fc, 18))}</text>'
	)
	out.append(f'<circle cx="{bx}" cy="{by}" r="7" fill="#1a0000" stroke="#f38ba8" stroke-width="1"/>')
	out.append(
	f'<text x="{bx}" y="{by+4}" text-anchor="middle" '
	f'font-family="monospace" font-size="9" fill="#f38ba8">✗</text>'
	)

	next_cy = title_h + (i + 1) * STEP_H + NODE_H // 2
	if on_path:
	color, mid = ('#00e639', 'ahg') if success else ('#6c7086', 'ahgr')
	out.append(
	f'<line x1="{CX}" y1="{cy+NODE_H//2}" x2="{CX}" y2="{next_cy-NODE_H//2}" '
	f'stroke="{color}" stroke-width="2" marker-end="url(#{mid})"/>'
	)
	ly = (cy + NODE_H // 2 + next_cy - NODE_H // 2) // 2
	out.append(
	f'<text x="{CX+6}" y="{ly}" font-family="monospace" font-size="9" fill="{color}">'
	f'{esc(trunc(chosen, 28))}</text>'
	)
	elif status == 'all_failed':
	out.append(
	f'<line x1="{CX}" y1="{cy+NODE_H//2}" x2="{CX}" y2="{cy+NODE_H//2+28}" '
	f'stroke="#f38ba8" stroke-width="1.5" stroke-dasharray="4,3"/>'
	)

	if success or stuck:
	cy = title_h + n * STEP_H + NODE_H // 2
	nx, ny = CX - NODE_W // 2, cy - NODE_H // 2
	if success:
	node_fill, node_stroke = '#002800', '#00e639'
	node_text, node_col = '✓ QED — Goals accomplished!', '#00ff41'
	else:
	node_fill, node_stroke = '#201000', '#e6a817'
	node_text, node_col = '⚠ Concluded: not provable as stated', '#f5c842'
	out.append(
	f'<rect x="{nx}" y="{ny}" width="{NODE_W}" height="{NODE_H}" '
	f'rx="5" fill="{node_fill}" stroke="{node_stroke}" stroke-width="2"/>'
	)
	out.append(
	f'<text x="{CX}" y="{cy+5}" text-anchor="middle" '
	f'font-family="monospace" font-size="11" font-weight="bold" fill="{node_col}">'
	f'{esc(node_text)}</text>'
	)

	out.append('</svg>')
	return f'<div style="overflow-x:auto;padding:8px">{"".join(out)}</div>'


	# ── Step walkthrough ──────────────────────────────────────────────────────────

	def explain_tactic(tactic: str) -> str:
	for key, explanation in TACTIC_EXPLANATIONS.items():
	if tactic.strip().startswith(key):
	return f"`{key}` — {explanation}"
	return ""


	def make_playground_url(theorem_stmt: str, tactics: list) -> str:
	proof_lines = [f"{theorem_stmt} := by"] + [
	f" {line}" for t in tactics for line in t.strip().split("\n")
	]
	code = "\n".join(proof_lines)
	return "https://live.lean-lang.org/#code=" + urllib.parse.quote(code)


	def format_steps(steps: list, tactics: list, stuck: bool = False,
	theorem_stmt: str = "") -> str:
	if not steps:
	return ""
	out = ["### Agent loop: propose → verify → learn\n"]
	for s in steps:
	goal = s['goal']
	chosen = s.get('chosen', '')
	candidates = s.get('candidates', [])
	status = s.get('status', '')
	error = s.get('error', '')
	step_num = s['step']

	out.append(f"---\nStep {step_num} — current goal:")
	out.append(f"```\n{goal}\n```")

	if not candidates:
	out.append("⚠️ LLM endpoint warming up — no candidates available at this step")
	else:
	rejected = [c for c in candidates if c != chosen]
	out.append(f"\U0001f916 LLM proposed: `{'`, `'.join(candidates)}`")
	for r in rejected:
	out.append(f"- ❌ `{r}` — Lean kernel rejected")
	if error and rejected:
	short_err = error.replace('\n', ' ')[:120]
	out.append(f"- \U0001f4e2 Kernel error fed back to agent: `{short_err}`")
	if chosen and status != 'all_failed':
	exp = explain_tactic(chosen)
	out.append(f"- ✅ `{chosen}` — Lean kernel accepted")
	if exp:
	out.append(f" {exp}")
	out.append(f"- Result: `{status}`")
	elif status == 'all_failed':
	out.append("- ❌ All candidates rejected — feeding errors back, trying next step")

	out.append("")

	if stuck:
	out.append(
	"---\n⚠️ Search concluded — the same goal state recurred with no progress.\n"
	"The claim could not be proven. It may be mathematically false, "
	"or require axioms and tactics outside the current mode."
	)
	elif tactics:
	proof_lines = ["by"] + [f" {line}" for t in tactics for line in t.strip().split("\n")]
	proof_block = "\n".join(proof_lines)
	out.append("---\n### Complete proof\n")
	out.append("```lean4")
	out.append(proof_block)
	out.append("```")
	out.append("\n---\n### Verify it yourself")
	if theorem_stmt:
	url = make_playground_url(theorem_stmt, tactics)
	out.append(
	f"[▶ Open in Lean 4 web playground ↗]({url})\n\n"
	"The proof is pre-filled and ready to run. "
	"The kernel outputs \"Goals accomplished!\" or rejects it if anything is wrong. "
	"It cannot be convinced. It cannot be fooled."
	)
	else:
	out.append(
	"Paste the proof above into [live.lean-lang.org ↗](https://live.lean-lang.org/). "
	"The kernel outputs \"Goals accomplished!\" — or rejects it if anything is wrong."
	)

	return "\n".join(out)


	# ── Main proof handler ────────────────────────────────────────────────────────

	def prove_theorem(theorem: str, max_steps: int, use_fallbacks: bool):
	if not theorem.strip():
	yield "Please enter a theorem statement.", "", ""
	return

	yield "⏳ Sending to proof agent on Modal…", "", ""

	lookup = EXAMPLE_LOOKUP.get(theorem.strip())
	english_desc = lookup[0] if lookup else ""

	try:
	resp = requests.post(
	f"{MODAL_ENDPOINT}/prove",
	json={
	"theorem": theorem,
	"max_steps": max_steps,
	"use_fallbacks": use_fallbacks,
	"show_reasoning": True, # always run live loop, skip cache read
	},
	timeout=280,
	)
	data = resp.json()
	except requests.exceptions.Timeout:
	yield "❌ Request timed out. Try a simpler theorem or fewer max steps.", "", ""
	return
	except Exception as e:
	yield f"❌ Error contacting proof agent: {e}", "", ""
	return

	warmup_note = ""
	msg = data.get("message", "")
	if "warming up" in msg.lower() or "unavailable" in msg.lower():
	warmup_note = "⚠️ LLM endpoint warming up (cold start) — proof attempted with fallback tactics only.\n\n"

	success = data["success"]
	stuck = data.get("stuck", False)

	banner = build_verdict_banner(success, stuck, english_desc)
	details = format_steps(
	data["steps"], data["tactics"],
	stuck=stuck, theorem_stmt=theorem.strip()
	)
	svg_html = build_proof_tree_svg(
	data["steps"], data["tactics"], success, stuck=stuck, claim=english_desc
	)
	yield warmup_note + banner, details, svg_html


	# ── UI ────────────────────────────────────────────────────────────────────────

	_blocks_kwargs: dict = dict(title="Q.E.D", css=CUSTOM_CSS)
	if _theme is not None:
	_blocks_kwargs["theme"] = _theme

	with gr.Blocks(**_blocks_kwargs) as demo:
	gr.Markdown("""
	# ⊢ Q.E.D
	∀ theorem → ∃ proof — LLM-guided formal verification, powered by Modal.

	Enter a theorem in Lean 4 syntax (∀ ∃ ¬ ∧ ∨ → ↔ ℕ ℤ α all supported).
	The agent proposes tactics, Lean's kernel verifies each step, and kernel errors feed back into the next proposal.
	Watch it prove a true theorem — or correctly conclude a false one is unprovable.
	""")

	with gr.Row():
	with gr.Column(scale=2):
	theorem_input = gr.Textbox(
	label="Theorem statement",
	placeholder="theorem my_thm : ∀ n : Nat, 0 + n = n",
	value=EXAMPLES[0][0],
	lines=3,
	)
	desc_display = gr.Markdown(
	value=f"{EXAMPLES[0][1]}",
	label="",
	)
	with gr.Row():
	max_steps = gr.Slider(5, 30, value=20, step=1, label="Max steps")
	use_fallbacks = gr.Checkbox(value=True, label="Use fallback tactics (ω, simp…)")
	prove_btn = gr.Button("⊢ Prove", variant="primary")

	with gr.Column(scale=1):
	gr.Markdown("Examples — click to load\n\n∀ provable ones first, then a false one ↓")
	for lean_stmt, english_desc, kind in EXAMPLES:
	btn = gr.Button(english_desc, size="sm")
	btn.click(
	fn=lambda lean=lean_stmt, d=english_desc: (lean, f"{d}"),
	outputs=[theorem_input, desc_display],
	)

	banner_out = gr.HTML(label="Verdict")
	steps_out = gr.Markdown(label="Agent loop walkthrough")
	tree_out = gr.HTML(label="Proof search tree")

	prove_btn.click(
	fn=prove_theorem,
	inputs=[theorem_input, max_steps, use_fallbacks],
	outputs=[banner_out, steps_out, tree_out],
	)

	demo.queue()
	demo.launch()