Spaces:

build-small-hackathon
/

QED

Running

File size: 25,506 Bytes

ed428ff

import urllib.parse
import gradio as gr
import requests

MODAL_ENDPOINT = "https://no-name13--lean-proof-agent-fastapi-app.modal.run"

# ── Example theorems ──────────────────────────────────────────────────────────
# Lead with add_comm_nat (induction / multi-step) so the agent loop is visible.
# Keep zero_add as the simple contrast. False theorem always last.
EXAMPLES = [
    (
        "theorem add_comm_nat : ∀ n m : Nat, n + m = m + n",
        "∀ n m · n + m = m + n — addition is commutative",
        "provable",
    ),
    (
        "theorem reverse_reverse : ∀ (α : Type) (l : List α), l.reverse.reverse = l",
        "∀ α l · reverse(reverse l) = l — reversing a list twice is identity",
        "provable",
    ),
    (
        "theorem zero_add : ∀ n : Nat, 0 + n = n",
        "∀ n · 0 + n = n — zero is the left identity for addition",
        "provable",
    ),
    (
        "theorem currying : ∀ P Q R : Prop, (P ∧ Q → R) ↔ (P → Q → R)",
        "∀ P Q R · (P∧Q→R) ↔ (P→Q→R) — currying",
        "provable",
    ),
    (
        "theorem de_morgan_and : ∀ P Q : Prop, ¬(P ∧ Q) ↔ ¬P ∨ ¬Q",
        "∀ P Q · ¬(P∧Q) ↔ ¬P∨¬Q — De Morgan's law",
        "provable",
    ),
    # FALSE theorem — the stuck detector concludes "not provable" within a few steps.
    (
        "theorem cannot_prove : ∀ n : Nat, n + 1 = n",
        "∀ n · n+1 = n — FALSE: the agent correctly cannot prove this ✗",
        "unprovable",
    ),
]

EXAMPLE_LOOKUP = {lean.strip(): (desc, kind) for lean, desc, kind in EXAMPLES}

TACTIC_EXPLANATIONS = {
    "intro": "introduces a universally quantified variable or hypothesis into the local context",
    "induction": "applies structural induction — splits into base case (zero) and inductive step (succ)",
    "simp": "simplifies the goal using a library of known equalities and lemmas",
    "rfl": "closes the goal when both sides are definitionally equal",
    "omega": "decision procedure for linear arithmetic — automatically solves goals about integers/naturals",
    "rw": "rewrites the goal using an equation",
    "exact": "closes the goal by providing an exact proof term",
    "apply": "applies a lemma whose conclusion matches the goal",
    "constructor": "splits a conjunction (∧) or iff (↔) goal into its two parts",
    "cases": "case-splits on a hypothesis or value",
    "norm_num": "solves numeric goals like 2 + 2 = 4 or 3 ∣ 6",
    "contradiction": "closes the goal when context contains P and ¬P (a direct contradiction)",
    "assumption": "closes the goal when it matches a hypothesis in the local context exactly",
    "tauto": "closes propositional tautologies automatically",
}

# ── Dark theme — mathematical / formal-proof terminal aesthetic ───────────────
# Wraps in try/except so a theme API mismatch never breaks the app.
CUSTOM_CSS = """
/* ── Base ───────────────────────────────────────────────── */
body, .gradio-container { background: #0d1117 !important; color: #c9d1d9 !important; }
.contain, .gap, .row { background: #0d1117 !important; }

/* ── Panels ─────────────────────────────────────────────── */
.block.padded, .block {
    background: #161b22 !important;
    border: 1px solid #30363d !important;
    border-radius: 8px !important;
}
.form { background: #161b22 !important; }

/* ── All text — catch-all first, then specifics ──────────── */
* { color: #c9d1d9; }

/* ── Headings ────────────────────────────────────────────── */
h1, h2, h3, h4 { color: #e6edf3 !important; }
h1 { border-bottom: 1px solid #21262d; padding-bottom: 6px; }

/* ── Labels (component titles, slider/checkbox labels) ────── */
label, .label-wrap, .label-wrap span,
span.text-gray-500, span.text-sm,
.block > .label-wrap > span { color: #b0bec5 !important; }

/* ── Body text, lists, paragraphs ───────────────────────── */
p, li, em, span { color: #c9d1d9 !important; }
a { color: #58a6ff !important; }
strong, b { color: #e6edf3 !important; }

/* ── Slider ──────────────────────────────────────────────── */
input[type=range] { accent-color: #2ea043 !important; }
.range-input span, .range-input output,
input[type=range] + span, .slider-container span { color: #c9d1d9 !important; }

/* ── Checkbox ────────────────────────────────────────────── */
input[type=checkbox] { accent-color: #2ea043 !important; }
.checkbox-label, .checkbox-label span { color: #c9d1d9 !important; }

/* ── Inputs ──────────────────────────────────────────────── */
textarea, input[type=text], input[type=number] {
    background: #0d1117 !important;
    color: #e6edf3 !important;
    border: 1px solid #30363d !important;
    border-radius: 6px !important;
    font-family: 'JetBrains Mono', 'Fira Code', ui-monospace, monospace !important;
}
textarea::placeholder { color: #484f58 !important; }

/* ── Primary button ──────────────────────────────────────── */
button.primary {
    background: #1a3a1a !important;
    border: 1px solid #2ea043 !important;
    color: #3fb950 !important;
    font-weight: bold !important;
    letter-spacing: 0.05em !important;
    transition: all 0.15s ease !important;
}
button.primary:hover {
    background: #2ea043 !important;
    color: #fff !important;
    box-shadow: 0 0 10px #2ea04355 !important;
}

/* ── Secondary / example buttons ────────────────────────── */
button.secondary, button[variant=secondary] {
    background: #21262d !important;
    border: 1px solid #30363d !important;
    color: #c9d1d9 !important;
    font-size: 0.8em !important;
    transition: all 0.12s ease !important;
}
button.secondary:hover { border-color: #3fb950 !important; color: #3fb950 !important; }

/* ── Code blocks ─────────────────────────────────────────── */
code {
    background: #161b22 !important;
    color: #79c0ff !important;
    border: 1px solid #30363d !important;
    border-radius: 4px !important;
    padding: 2px 5px !important;
}
pre {
    background: #161b22 !important;
    border: 1px solid #30363d !important;
    border-radius: 6px !important;
}
pre code { color: #c9d1d9 !important; border: none !important; padding: 0 !important; }

/* ── Markdown output ─────────────────────────────────────── */
.output-markdown, .output-markdown * { color: #c9d1d9 !important; }
.output-markdown h1, .output-markdown h2,
.output-markdown h3 { color: #e6edf3 !important; }
.output-markdown strong, .output-markdown b { color: #e6edf3 !important; }
.output-markdown a { color: #58a6ff !important; }
.output-markdown code { color: #79c0ff !important; }
.output-markdown hr { border-color: #30363d !important; }

/* ── Description italic below theorem input ──────────────── */
.prose em, .prose i, em, i { color: #8fb8d8 !important; }
"""

try:
    _theme = gr.themes.Base(
        primary_hue=gr.themes.colors.green,
        neutral_hue=gr.themes.colors.zinc,
        font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
        font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
    )
except Exception:
    _theme = None


# ── Verdict banner — three states, dark-theme colours ────────────────────────

def build_verdict_banner(success: bool, stuck: bool, english_desc: str = "") -> str:
    font = "font-family:'JetBrains Mono','Fira Code',ui-monospace,monospace;"

    if success:
        desc_html = (
            f'<div style="font-size:1.0em;color:#7ee787;margin:6px 0 10px;">'
            f'<em>{english_desc}</em></div>'
        ) if english_desc else ""
        return (
            f'<div style="background:#0d2818;border:2px solid #2ea043;border-radius:8px;'
            f'padding:16px 22px;margin:10px 0;{font}">'
            f'<div style="font-size:1.8em;font-weight:bold;color:#3fb950;letter-spacing:2px;margin-bottom:6px;">'
            f'&#10003; FORMALLY VERIFIED</div>'
            f'{desc_html}'
            f'<div style="font-size:0.82em;color:#7ee787;line-height:1.75;'
            f'border-top:1px solid #1a4a1a;padding-top:10px;margin-top:8px;">'
            f'The AI proposed proof tactics. <strong style="color:#a0e8a0;">Lean 4\'s formal kernel</strong> '
            f'checked every logical step against its axioms and accepted the proof.<br><br>'
            f'<strong style="color:#3fb950;">This cannot be faked.</strong> '
            f'Unlike a chatbot saying "yes, that\'s true," Lean\'s kernel rejects any gap in reasoning '
            f'— no exceptions, no hallucinations. The proof below is machine-checked mathematics.'
            f'</div></div>'
        )

    if stuck:
        desc_html = (
            f'<div style="font-size:1.0em;color:#e3b341;margin:6px 0 10px;">'
            f'<em>{english_desc}</em></div>'
        ) if english_desc else ""
        return (
            f'<div style="background:#1c1400;border:2px solid #d29922;border-radius:8px;'
            f'padding:16px 22px;margin:10px 0;{font}">'
            f'<div style="font-size:1.7em;font-weight:bold;color:#e3b341;letter-spacing:1px;margin-bottom:6px;">'
            f'&#9888; NOT PROVABLE AS STATED</div>'
            f'{desc_html}'
            f'<div style="font-size:0.82em;color:#c9a227;line-height:1.75;'
            f'border-top:1px solid #3a2800;padding-top:10px;margin-top:8px;">'
            f'The agent detected it was stuck: the same goal state recurred with no progress.<br><br>'
            f'<strong style="color:#e3b341;">This is a deliberate conclusion, not a failure.</strong> '
            f'The claim as written cannot be proven — it may be mathematically false, '
            f'or require axioms and tactics outside the current mode. '
            f'Recognising when something is unprovable is part of what a formal proof agent does.'
            f'</div></div>'
        )

    return (
        f'<div style="background:#1c0a0a;border:2px solid #c62828;border-radius:8px;'
        f'padding:16px 22px;margin:10px 0;{font}">'
        f'<div style="font-size:1.6em;font-weight:bold;color:#f85149;margin-bottom:8px;">'
        f'&#10007; SEARCH INCOMPLETE</div>'
        f'<div style="font-size:0.82em;color:#c9d1d9;line-height:1.7;">'
        f'The agent exhausted its step budget without completing the proof. '
        f'Partial progress is shown below — try increasing the step limit or picking a simpler theorem.'
        f'</div></div>'
    )


# ── SVG proof tree — DO NOT MODIFY ───────────────────────────────────────────

def build_proof_tree_svg(steps: list, tactics: list, success: bool,
                         stuck: bool = False, claim: str = "") -> str:
    if not steps:
        return ""

    NODE_W, NODE_H = 300, 44
    STEP_H = 140
    SVG_W = 820
    CX = SVG_W // 2

    n = len(steps)
    title_h = 72 if claim else 52
    SVG_H = title_h + (n + (1 if (success or stuck) else 0)) * STEP_H + NODE_H + 20

    def esc(s):
        return str(s).replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;')

    def trunc(s, maxn=36):
        s = str(s).replace('\n', ' ').strip()
        return s[:maxn] + '…' if len(s) > maxn else s

    out = []
    out.append(f'<svg xmlns="http://www.w3.org/2000/svg" width="{SVG_W}" height="{SVG_H}">')
    out.append(f'<rect width="{SVG_W}" height="{SVG_H}" fill="#030c04" rx="8"/>')
    out.append('<defs>')
    for mid, col in [('ahg', '#00e639'), ('ahr', '#f38ba8'), ('ahgr', '#6c7086'), ('aha', '#e6a817')]:
        out.append(
            f'<marker id="{mid}" markerWidth="8" markerHeight="6" refX="7" refY="3" orient="auto">'
            f'<polygon points="0 0,8 3,0 6" fill="{col}"/></marker>'
        )
    out.append('</defs>')

    if success:
        title, tc = "Proof Tree — ✓ Verified", "#00e639"
    elif stuck:
        title, tc = "Proof Tree — ⚠ Concluded: Not Provable", "#e6a817"
    else:
        title, tc = "Proof Tree — ✗ Incomplete", "#f38ba8"

    out.append(
        f'<text x="{CX}" y="28" text-anchor="middle" '
        f'font-family="monospace" font-size="14" font-weight="bold" fill="{tc}">{esc(title)}</text>'
    )

    if claim:
        out.append(
            f'<text x="{CX}" y="47" text-anchor="middle" '
            f'font-family="monospace" font-size="10" fill="#70b870">'
            f'Claim: {esc(trunc(claim, 74))}</text>'
        )

    legend_y = title_h - 10
    for lx, col, label in [(20, '#00e639', 'successful path'), (168, '#f38ba8', 'failed attempt')]:
        out.append(f'<line x1="{lx}" y1="{legend_y}" x2="{lx+20}" y2="{legend_y}" stroke="{col}" stroke-width="2"/>')
        out.append(
            f'<text x="{lx+24}" y="{legend_y+4}" font-family="monospace" font-size="9" fill="{col}">{label}</text>'
        )

    for i, step in enumerate(steps):
        cy = title_h + i * STEP_H + NODE_H // 2
        chosen = step.get('chosen', '')
        candidates = step.get('candidates', [])
        status = step.get('status', '')
        goal = step.get('goal', '')
        failed = [c for c in candidates if c != chosen]

        on_path = bool(chosen) and status != 'all_failed'
        if on_path and success:
            bg, border = '#001a00', '#00e639'
        elif status == 'all_failed':
            bg, border = '#1a0000', '#f38ba8'
        else:
            bg, border = '#050f05', '#2a5a2a'

        nx, ny = CX - NODE_W // 2, cy - NODE_H // 2
        out.append(
            f'<rect x="{nx}" y="{ny}" width="{NODE_W}" height="{NODE_H}" '
            f'rx="5" fill="{bg}" stroke="{border}" stroke-width="1.5"/>'
        )
        out.append(
            f'<text x="{CX}" y="{cy+5}" text-anchor="middle" '
            f'font-family="monospace" font-size="10" fill="#98c898">'
            f'{esc(trunc(f"Step {i}: {goal}"))}</text>'
        )

        for j, fc in enumerate(failed):
            bx = CX + NODE_W // 2 + 55 + j * 75
            by = cy + STEP_H // 3
            out.append(
                f'<line x1="{CX+NODE_W//2}" y1="{cy}" x2="{bx}" y2="{by}" '
                f'stroke="#f38ba8" stroke-width="1.5" stroke-dasharray="4,3" marker-end="url(#ahr)"/>'
            )
            mx = (CX + NODE_W // 2 + bx) // 2 + 3
            my = (cy + by) // 2 - 3
            out.append(
                f'<text x="{mx}" y="{my}" font-family="monospace" font-size="8" fill="#f38ba8">'
                f'{esc(trunc(fc, 18))}</text>'
            )
            out.append(f'<circle cx="{bx}" cy="{by}" r="7" fill="#1a0000" stroke="#f38ba8" stroke-width="1"/>')
            out.append(
                f'<text x="{bx}" y="{by+4}" text-anchor="middle" '
                f'font-family="monospace" font-size="9" fill="#f38ba8">✗</text>'
            )

        next_cy = title_h + (i + 1) * STEP_H + NODE_H // 2
        if on_path:
            color, mid = ('#00e639', 'ahg') if success else ('#6c7086', 'ahgr')
            out.append(
                f'<line x1="{CX}" y1="{cy+NODE_H//2}" x2="{CX}" y2="{next_cy-NODE_H//2}" '
                f'stroke="{color}" stroke-width="2" marker-end="url(#{mid})"/>'
            )
            ly = (cy + NODE_H // 2 + next_cy - NODE_H // 2) // 2
            out.append(
                f'<text x="{CX+6}" y="{ly}" font-family="monospace" font-size="9" fill="{color}">'
                f'{esc(trunc(chosen, 28))}</text>'
            )
        elif status == 'all_failed':
            out.append(
                f'<line x1="{CX}" y1="{cy+NODE_H//2}" x2="{CX}" y2="{cy+NODE_H//2+28}" '
                f'stroke="#f38ba8" stroke-width="1.5" stroke-dasharray="4,3"/>'
            )

    if success or stuck:
        cy = title_h + n * STEP_H + NODE_H // 2
        nx, ny = CX - NODE_W // 2, cy - NODE_H // 2
        if success:
            node_fill, node_stroke = '#002800', '#00e639'
            node_text, node_col = '✓ QED — Goals accomplished!', '#00ff41'
        else:
            node_fill, node_stroke = '#201000', '#e6a817'
            node_text, node_col = '⚠ Concluded: not provable as stated', '#f5c842'
        out.append(
            f'<rect x="{nx}" y="{ny}" width="{NODE_W}" height="{NODE_H}" '
            f'rx="5" fill="{node_fill}" stroke="{node_stroke}" stroke-width="2"/>'
        )
        out.append(
            f'<text x="{CX}" y="{cy+5}" text-anchor="middle" '
            f'font-family="monospace" font-size="11" font-weight="bold" fill="{node_col}">'
            f'{esc(node_text)}</text>'
        )

    out.append('</svg>')
    return f'<div style="overflow-x:auto;padding:8px">{"".join(out)}</div>'


# ── Step walkthrough ──────────────────────────────────────────────────────────

def explain_tactic(tactic: str) -> str:
    for key, explanation in TACTIC_EXPLANATIONS.items():
        if tactic.strip().startswith(key):
            return f"*`{key}` — {explanation}*"
    return ""


def make_playground_url(theorem_stmt: str, tactics: list) -> str:
    proof_lines = [f"{theorem_stmt} := by"] + [
        f"  {line}" for t in tactics for line in t.strip().split("\n")
    ]
    code = "\n".join(proof_lines)
    return "https://live.lean-lang.org/#code=" + urllib.parse.quote(code)


def format_steps(steps: list, tactics: list, stuck: bool = False,
                 theorem_stmt: str = "") -> str:
    if not steps:
        return ""
    out = ["### Agent loop: propose → verify → learn\n"]
    for s in steps:
        goal = s['goal']
        chosen = s.get('chosen', '')
        candidates = s.get('candidates', [])
        status = s.get('status', '')
        error = s.get('error', '')
        step_num = s['step']

        out.append(f"---\n**Step {step_num}** — current goal:")
        out.append(f"```\n{goal}\n```")

        if not candidates:
            out.append("⚠️ *LLM endpoint warming up — no candidates available at this step*")
        else:
            rejected = [c for c in candidates if c != chosen]
            out.append(f"\U0001f916 **LLM proposed:** `{'`, `'.join(candidates)}`")
            for r in rejected:
                out.append(f"- ❌ `{r}` — Lean kernel rejected")
            if error and rejected:
                short_err = error.replace('\n', ' ')[:120]
                out.append(f"- \U0001f4e2 *Kernel error fed back to agent:* `{short_err}`")
            if chosen and status != 'all_failed':
                exp = explain_tactic(chosen)
                out.append(f"- ✅ `{chosen}` — Lean kernel accepted")
                if exp:
                    out.append(f"  {exp}")
                out.append(f"- *Result:* `{status}`")
            elif status == 'all_failed':
                out.append("- ❌ All candidates rejected — feeding errors back, trying next step")

        out.append("")

    if stuck:
        out.append(
            "---\n⚠️ **Search concluded** — the same goal state recurred with no progress.\n"
            "The claim could not be proven. It may be mathematically false, "
            "or require axioms and tactics outside the current mode."
        )
    elif tactics:
        proof_lines = ["by"] + [f"  {line}" for t in tactics for line in t.strip().split("\n")]
        proof_block = "\n".join(proof_lines)
        out.append("---\n### Complete proof\n")
        out.append("```lean4")
        out.append(proof_block)
        out.append("```")
        out.append("\n---\n### Verify it yourself")
        if theorem_stmt:
            url = make_playground_url(theorem_stmt, tactics)
            out.append(
                f"[**▶ Open in Lean 4 web playground ↗**]({url})\n\n"
                "The proof is pre-filled and ready to run. "
                "The kernel outputs **\"Goals accomplished!\"** or rejects it if anything is wrong. "
                "**It cannot be convinced. It cannot be fooled.**"
            )
        else:
            out.append(
                "Paste the proof above into [live.lean-lang.org ↗](https://live.lean-lang.org/). "
                "The kernel outputs **\"Goals accomplished!\"** — or rejects it if anything is wrong."
            )

    return "\n".join(out)


# ── Main proof handler ────────────────────────────────────────────────────────

def prove_theorem(theorem: str, max_steps: int, use_fallbacks: bool):
    if not theorem.strip():
        yield "Please enter a theorem statement.", "", ""
        return

    yield "⏳ Sending to proof agent on Modal…", "", ""

    lookup = EXAMPLE_LOOKUP.get(theorem.strip())
    english_desc = lookup[0] if lookup else ""

    try:
        resp = requests.post(
            f"{MODAL_ENDPOINT}/prove",
            json={
                "theorem": theorem,
                "max_steps": max_steps,
                "use_fallbacks": use_fallbacks,
                "show_reasoning": True,   # always run live loop, skip cache read
            },
            timeout=280,
        )
        data = resp.json()
    except requests.exceptions.Timeout:
        yield "❌ Request timed out. Try a simpler theorem or fewer max steps.", "", ""
        return
    except Exception as e:
        yield f"❌ Error contacting proof agent: {e}", "", ""
        return

    warmup_note = ""
    msg = data.get("message", "")
    if "warming up" in msg.lower() or "unavailable" in msg.lower():
        warmup_note = "⚠️ LLM endpoint warming up (cold start) — proof attempted with fallback tactics only.\n\n"

    success = data["success"]
    stuck = data.get("stuck", False)

    banner = build_verdict_banner(success, stuck, english_desc)
    details = format_steps(
        data["steps"], data["tactics"],
        stuck=stuck, theorem_stmt=theorem.strip()
    )
    svg_html = build_proof_tree_svg(
        data["steps"], data["tactics"], success, stuck=stuck, claim=english_desc
    )
    yield warmup_note + banner, details, svg_html


# ── UI ────────────────────────────────────────────────────────────────────────

_blocks_kwargs: dict = dict(title="Q.E.D", css=CUSTOM_CSS)
if _theme is not None:
    _blocks_kwargs["theme"] = _theme

with gr.Blocks(**_blocks_kwargs) as demo:
    gr.Markdown("""
# ⊢ Q.E.D
**∀ theorem → ∃ proof** — LLM-guided formal verification, powered by Modal.

Enter a theorem in Lean 4 syntax (∀ ∃ ¬ ∧ ∨ → ↔ ℕ ℤ α all supported).
The agent proposes tactics, Lean's kernel verifies each step, and kernel errors feed back into the next proposal.
Watch it **prove** a true theorem — or **correctly conclude** a false one is unprovable.
""")

    with gr.Row():
        with gr.Column(scale=2):
            theorem_input = gr.Textbox(
                label="Theorem statement",
                placeholder="theorem my_thm : ∀ n : Nat, 0 + n = n",
                value=EXAMPLES[0][0],
                lines=3,
            )
            desc_display = gr.Markdown(
                value=f"*{EXAMPLES[0][1]}*",
                label="",
            )
            with gr.Row():
                max_steps = gr.Slider(5, 30, value=20, step=1, label="Max steps")
                use_fallbacks = gr.Checkbox(value=True, label="Use fallback tactics (ω, simp…)")
            prove_btn = gr.Button("⊢ Prove", variant="primary")

        with gr.Column(scale=1):
            gr.Markdown("**Examples** — click to load\n\n*∀ provable ones first, then a false one ↓*")
            for lean_stmt, english_desc, kind in EXAMPLES:
                btn = gr.Button(english_desc, size="sm")
                btn.click(
                    fn=lambda lean=lean_stmt, d=english_desc: (lean, f"*{d}*"),
                    outputs=[theorem_input, desc_display],
                )

    banner_out = gr.HTML(label="Verdict")
    steps_out = gr.Markdown(label="Agent loop walkthrough")
    tree_out = gr.HTML(label="Proof search tree")

    prove_btn.click(
        fn=prove_theorem,
        inputs=[theorem_input, max_steps, use_fallbacks],
        outputs=[banner_out, steps_out, tree_out],
    )

demo.queue()
demo.launch()