Lean4-helper / app.py
p4r5kpftnp-cmd
Minimalist UI pass: remove emojis and decorative chrome
6e69422
Raw
History Blame Contribute Delete
21.5 kB
import io
import os
import sys
import tempfile
import threading
import gradio as gr
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "src"))
from langgraph_agent import LangGraphAgent
from lean_verifier import LeanEnvironment
from retriever import MathLibRetriever
# ---------------------------------------------------------------------------
# Cached heavyweight components.
#
# `LeanEnvironment` spawns a Lean REPL and loads Mathlib (5-15s cold).
# `MathLibRetriever` loads the ~200MB FAISS index + cross-encoder model.
# Without caching, every solve_proof call paid this cost. We build them once
# on first use and share across requests; LeanEnvironment.verify_proof has
# its own lock so concurrent solves serialize at the Lean step but parallel
# everywhere else.
# ---------------------------------------------------------------------------
_COMPONENT_LOCK = threading.Lock()
_LEAN_ENV: LeanEnvironment | None = None
_RETRIEVER: MathLibRetriever | None = None
def _get_components():
global _LEAN_ENV, _RETRIEVER
with _COMPONENT_LOCK:
if _LEAN_ENV is None:
print("Initializing Lean environment (one-time)โ€ฆ")
_LEAN_ENV = LeanEnvironment(use_mathlib=True)
if _RETRIEVER is None:
print("Loading FAISS retriever (one-time)โ€ฆ")
_RETRIEVER = MathLibRetriever()
return _LEAN_ENV, _RETRIEVER
# ---------------------------------------------------------------------------
# Thread-safe stdout capture (preserves per-call agent logs under concurrency).
# ---------------------------------------------------------------------------
class _ThreadLocalStdout:
def __init__(self, real_stdout):
self._real = real_stdout
self._tls = threading.local()
def _current(self):
stack = getattr(self._tls, "stack", None)
return stack[-1] if stack else self._real
def write(self, s):
return self._current().write(s)
def flush(self):
try:
return self._current().flush()
except Exception:
return None
def isatty(self):
try:
return self._real.isatty()
except Exception:
return False
def fileno(self):
return self._real.fileno()
def push(self, buf):
stack = getattr(self._tls, "stack", None)
if stack is None:
stack = []
self._tls.stack = stack
stack.append(buf)
def pop(self):
stack = getattr(self._tls, "stack", None)
if stack:
stack.pop()
_STDOUT_PROXY = _ThreadLocalStdout(sys.stdout)
sys.stdout = _STDOUT_PROXY
class _capture_stdout:
def __init__(self, buf):
self._buf = buf
def __enter__(self):
if sys.stdout is not _STDOUT_PROXY:
sys.stdout = _STDOUT_PROXY
_STDOUT_PROXY.push(self._buf)
return self._buf
def __exit__(self, exc_type, exc, tb):
_STDOUT_PROXY.pop()
return False
GROQ_MODELS = [
"openai/gpt-oss-120b", # default, strongest open-weight on Groq
"openai/gpt-oss-20b", # fast / cheap GPT-OSS
"qwen/qwen3-32b", # Qwen 3, 32B reasoning model
"meta-llama/llama-4-scout-17b-16e-instruct", # Llama 4 MoE
]
CLAUDE_MODELS = [
"claude-opus-4-7",
"claude-sonnet-4-6",
"claude-haiku-4-5-20251001",
]
ALL_MODELS = GROQ_MODELS + CLAUDE_MODELS
def _is_claude(model_name: str) -> bool:
return model_name.startswith("claude-")
EXAMPLE_CODE = """\
import Mathlib
theorem add_zero_simple (n : โ„•) : n + 0 = n := by
sorry
"""
# Cream / warm-orange palette inspired by claude.ai
CUSTOM_CSS = """
:root {
--bg: #FAF9F5;
--panel: #FFFFFF;
--panel-tint: #F5F4ED;
--border: #E8E5DC;
--border-soft: #EFEDE4;
--text: #1A1A1A;
--text-muted: #6B6B6B;
--accent: #D97757;
--accent-hover:#C5694A;
--accent-soft: #FBE9DF;
--ok: #2E7D55;
--err: #B85450;
}
/* โ”€โ”€โ”€ Container โ”€โ”€โ”€ */
.gradio-container {
background: var(--bg) !important;
color: var(--text) !important;
color-scheme: light;
font-family: 'Inter', -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif !important;
max-width: 1280px !important;
margin: 0 auto !important;
padding: 44px 28px 28px !important;
}
footer, .show-api { display: none !important; }
/* โ”€โ”€โ”€ Header โ”€โ”€โ”€ */
#header {
display: flex;
align-items: baseline;
gap: 14px;
margin-bottom: 32px;
padding: 0 2px;
}
#header .title {
font-size: 21px;
font-weight: 600;
letter-spacing: -0.01em;
color: var(--text);
}
#header .sub {
font-size: 13px;
color: var(--text-muted);
letter-spacing: 0.01em;
}
/* โ”€โ”€โ”€ Controls bar โ”€โ”€โ”€ */
#controls {
display: flex;
align-items: flex-end;
gap: 22px;
margin-bottom: 16px;
padding: 14px 16px;
background: var(--panel);
border: 1px solid var(--border);
border-radius: 10px;
}
/* strip Gradio's inner block chrome so controls sit flat in the bar */
#controls .block, #controls .form {
background: transparent !important;
border: none !important;
box-shadow: none !important;
padding: 0 !important;
margin: 0 !important;
}
/* component labels โ€” small caps over each field */
#controls span[data-testid="block-info"],
#controls label > span:first-child {
display: inline-block;
font-size: 11px !important;
font-weight: 600 !important;
text-transform: uppercase;
letter-spacing: 0.06em;
color: var(--text-muted) !important;
margin-bottom: 5px !important;
}
/* โ”€โ”€โ”€ Editor panels โ”€โ”€โ”€ */
.panel {
background: var(--panel);
border: 1px solid var(--border);
border-radius: 10px;
overflow: hidden;
transition: border-color 0.15s ease;
max-height: 580px;
}
.panel:focus-within {
border-color: #D2CEC1;
}
.panel-header {
display: flex !important;
align-items: center !important;
justify-content: space-between !important;
padding: 9px 14px !important;
background: var(--panel-tint) !important;
border-bottom: 1px solid var(--border-soft) !important;
gap: 8px !important;
min-height: 44px;
}
.panel-title {
font-family: 'JetBrains Mono', ui-monospace, "SF Mono", Menlo, monospace;
font-size: 12px;
color: var(--text-muted);
font-weight: 500;
}
.panel-header .panel-actions {
display: flex;
gap: 6px;
align-items: center;
}
/* โ”€โ”€โ”€ Buttons โ”€โ”€โ”€ */
.btn button {
background: var(--panel) !important;
color: var(--text-muted) !important;
border: 1px solid var(--border) !important;
border-radius: 6px !important;
padding: 5px 13px !important;
font-size: 12px !important;
font-weight: 500 !important;
min-width: 0 !important;
box-shadow: none !important;
transition: color 0.15s ease, border-color 0.15s ease !important;
cursor: pointer;
}
.btn button:hover {
color: var(--text) !important;
border-color: #CFCBBE !important;
}
.btn button:focus-visible {
outline: 1px solid var(--accent) !important;
outline-offset: 1px !important;
}
.btn-primary button {
background: var(--accent) !important;
color: white !important;
border: 1px solid var(--accent) !important;
box-shadow: none !important;
}
.btn-primary button:hover {
background: var(--accent-hover) !important;
border-color: var(--accent-hover) !important;
color: white !important;
}
/* โ”€โ”€โ”€ Code editors โ”€โ”€โ”€ */
.cm-editor {
background: var(--panel) !important;
font-family: 'JetBrains Mono', ui-monospace, "SF Mono", Menlo, monospace !important;
font-size: 13px !important;
line-height: 1.7 !important;
max-height: 520px;
overflow: auto;
}
.cm-content { color: var(--text) !important; padding: 12px 0 !important; }
.cm-gutters {
background: var(--panel) !important;
color: #B0AFA3 !important;
border-right: 1px solid var(--border-soft) !important;
}
.cm-activeLine { background: var(--panel-tint) !important; }
.cm-activeLineGutter { background: var(--panel-tint) !important; color: var(--text) !important; }
.cm-cursor { border-left-color: var(--accent) !important; }
.cm-selectionBackground { background: var(--accent-soft) !important; }
.cm-scroller { max-height: 520px; overflow: auto !important; }
/* readable text in editors regardless of theme/state */
.cm-editor, .cm-editor .cm-content, .cm-editor .cm-line,
.cm-editor .cm-line span, .cm-editor .cm-content * {
color: #1A1A1A !important;
}
/* thin warm scrollbars in code panes + logs */
.cm-scroller::-webkit-scrollbar,
.gradio-accordion textarea::-webkit-scrollbar { width: 8px; height: 8px; }
.cm-scroller::-webkit-scrollbar-thumb,
.gradio-accordion textarea::-webkit-scrollbar-thumb {
background: #DDD9CD;
border-radius: 4px;
}
.cm-scroller::-webkit-scrollbar-thumb:hover,
.gradio-accordion textarea::-webkit-scrollbar-thumb:hover { background: #C9C5B8; }
.cm-scroller::-webkit-scrollbar-track,
.gradio-accordion textarea::-webkit-scrollbar-track { background: transparent; }
/* Remove the harsh default block borders Gradio puts around gr.Code */
.gr-block.gr-code, .gradio-code {
border: none !important;
background: transparent !important;
}
/* โ”€โ”€โ”€ Spacing between the two panes โ”€โ”€โ”€ */
#editor-row { gap: 18px !important; }
@media (max-width: 900px) {
#editor-row { flex-direction: column !important; }
#controls { flex-wrap: wrap; }
}
/* โ”€โ”€โ”€ Logs accordion โ”€โ”€โ”€ */
.gradio-accordion {
background: var(--panel) !important;
border: 1px solid var(--border) !important;
border-radius: 10px !important;
margin-top: 16px !important;
overflow: hidden;
}
.gradio-accordion > button {
background: var(--panel-tint) !important;
color: var(--text-muted) !important;
font-size: 12px !important;
font-weight: 500 !important;
padding: 10px 14px !important;
border: none !important;
text-align: left !important;
}
.gradio-accordion textarea {
background: var(--panel) !important;
color: var(--text) !important;
border: none !important;
font-family: 'JetBrains Mono', ui-monospace, "SF Mono", Menlo, monospace !important;
font-size: 12px !important;
padding: 12px 14px !important;
line-height: 1.6 !important;
max-height: 240px;
overflow: auto !important;
}
/* โ”€โ”€โ”€ Status bar โ”€โ”€โ”€ */
#status-bar {
display: flex;
align-items: center;
gap: 12px;
margin-top: 16px;
padding: 11px 16px;
background: var(--panel);
border: 1px solid var(--border);
border-radius: 10px;
font-size: 13px;
color: var(--text);
}
#status-bar .pill {
display: inline-flex;
align-items: center;
padding: 2px 10px;
font-size: 11px;
font-weight: 600;
letter-spacing: 0.04em;
text-transform: uppercase;
border-radius: 5px;
white-space: nowrap;
}
#status-bar .pill.ok { background: #E8F3EC; color: var(--ok); }
#status-bar .pill.err { background: #FBE9E7; color: var(--err); }
#status-bar .pill.idle { background: var(--panel-tint); color: var(--text-muted); }
#status-bar .pill.run { background: var(--accent-soft); color: var(--accent-hover); }
/* โ”€โ”€โ”€ Form chrome (dropdown, slider, key field) โ”€โ”€โ”€ */
.gradio-dropdown input,
.gradio-dropdown .wrap {
background: var(--panel) !important;
color: var(--text) !important;
border: 1px solid var(--border) !important;
border-radius: 6px !important;
font-size: 13px !important;
}
.gradio-dropdown input:focus,
.gradio-dropdown .wrap:focus-within {
border-color: var(--accent) !important;
outline: none !important;
}
input[type="range"] { accent-color: var(--accent) !important; }
.gradio-slider .head, .gradio-slider input {
color: var(--text) !important;
font-size: 13px !important;
background: var(--panel) !important;
font-variant-numeric: tabular-nums;
}
input[type="password"], input[type="text"] {
border-radius: 6px !important;
}
input[type="password"]:focus, input[type="text"]:focus {
border-color: var(--accent) !important;
}
/* inputs / textareas โ€” never white on white */
textarea, .gradio-textbox textarea, .gradio-accordion textarea,
input[type="text"], input[type="password"] {
color: #1A1A1A !important;
background: var(--panel) !important;
}
::placeholder { color: var(--text-muted) !important; opacity: 1 !important; }
/* dropdown popover โ€” pinned light so options never go white-on-white */
ul[role="listbox"],
.gradio-dropdown ul,
.gradio-dropdown [role="listbox"] {
background: #FFFFFF !important;
border: 1px solid var(--border) !important;
border-radius: 7px !important;
color: #1A1A1A !important;
}
ul[role="listbox"] li,
.gradio-dropdown li,
.gradio-dropdown [role="option"] {
color: #1A1A1A !important;
background: #FFFFFF !important;
font-size: 13px !important;
}
ul[role="listbox"] li[aria-selected="true"],
ul[role="listbox"] li:hover,
.gradio-dropdown li:hover,
.gradio-dropdown [role="option"][aria-selected="true"] {
background: var(--accent-soft) !important;
color: #1A1A1A !important;
}
"""
def solve_proof(lean_code: str, model_name: str, max_retries: int, anthropic_api_key: str = ""):
if not lean_code.strip():
return _status_html("idle", "No input โ€” paste a Lean 4 theorem on the left."), "", ""
claude = _is_claude(model_name)
if claude:
if not anthropic_api_key or not anthropic_api_key.strip():
return _status_html("err", "Anthropic API key required for Claude models โ€” paste it above."), "", ""
api_key = anthropic_api_key.strip()
else:
if not os.environ.get("GROQ_API_KEY"):
return _status_html("err", "GROQ_API_KEY missing โ€” add it as a Space secret."), "", ""
api_key = None # ChatGroq picks it up from env
tmp_path = None
try:
tmp = tempfile.NamedTemporaryFile(
suffix=".lean", mode="w", delete=False, dir="/tmp", encoding="utf-8",
)
tmp_path = tmp.name
tmp.write(lean_code)
tmp.close()
lean_env, retriever = _get_components()
log_buf = io.StringIO()
with _capture_stdout(log_buf):
# fast_model=None: two-stage retry is disabled so every attempt
# uses the user-selected model and behavior matches the dropdown.
agent = LangGraphAgent(
model_name=model_name,
max_retries=int(max_retries),
api_key=api_key,
fast_model=None,
lean_env=lean_env,
retriever=retriever,
)
result = agent.solve_file_detailed(tmp_path)
with open(tmp_path, encoding="utf-8") as f:
final_code = f.read()
logs = log_buf.getvalue()
if result["success"]:
status = _status_html(
"ok",
f"Verified on attempt {result['solved_at_attempt']} of {result['total_attempts']}.",
)
else:
status = _status_html("err", f"No proof found after {result['total_attempts']} attempts.")
return status, final_code, logs
except Exception as exc:
return _status_html("err", f"Error: {exc}"), "", ""
finally:
if tmp_path is not None:
try:
os.unlink(tmp_path)
except OSError:
pass
def _status_html(kind: str, message: str) -> str:
label = {"ok": "Solved", "err": "Failed", "idle": "Idle", "run": "Solving"}.get(kind, kind)
return (
f'<div id="status-bar">'
f'<span class="pill {kind}">{label}</span>'
f'<span>{message}</span>'
f'</div>'
)
with gr.Blocks(
title="Lean 4 Proof Assistant",
theme=gr.themes.Base(
primary_hue="orange",
neutral_hue="slate",
font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
),
css=CUSTOM_CSS,
) as demo:
# โ”€โ”€โ”€ Header โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
gr.HTML(
"""
<div id="header">
<div class="title">Lean 4 Proof Assistant</div>
<div class="sub">Mathlib RAG ยท LangGraph ยท formally verified</div>
</div>
"""
)
# โ”€โ”€โ”€ Controls bar โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Row(elem_id="controls"):
model_dropdown = gr.Dropdown(
choices=ALL_MODELS, value=GROQ_MODELS[0],
label="Model", scale=2,
)
retries_slider = gr.Slider(
minimum=1, maximum=10, value=5, step=1,
label="Max retries", scale=1,
)
anthropic_key_input = gr.Textbox(
label="Claude API key (optional)",
placeholder="sk-ant-โ€ฆ โ€” only for Claude models",
type="password",
scale=1,
)
# โ”€โ”€โ”€ Two editor panes โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Row(elem_id="editor-row", equal_height=True):
# Input pane
with gr.Column(scale=1, elem_classes="panel"):
with gr.Row(elem_classes="panel-header"):
gr.HTML('<span class="panel-title">theorem.lean</span>')
with gr.Row(elem_classes="panel-actions"):
solve_btn = gr.Button("Solve", elem_classes="btn btn-primary")
reset_btn = gr.Button("Reset", elem_classes="btn")
lean_input = gr.Code(
language=None, value=EXAMPLE_CODE,
lines=22, show_label=False, container=False,
)
# Output pane
with gr.Column(scale=1, elem_classes="panel"):
with gr.Row(elem_classes="panel-header"):
gr.HTML('<span class="panel-title">proof.lean</span>')
with gr.Row(elem_classes="panel-actions"):
regen_btn = gr.Button("Regenerate", elem_classes="btn")
copy_btn = gr.Button("Copy", elem_classes="btn")
code_output = gr.Code(
language=None, interactive=False,
lines=22, show_label=False, container=False,
)
# โ”€โ”€โ”€ Status bar โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
status_output = gr.HTML(_status_html("idle", "Ready. Paste a theorem and click Solve."))
# โ”€โ”€โ”€ Logs (collapsed by default) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Accordion("Agent logs", open=False):
logs_output = gr.Textbox(
interactive=False, lines=10, show_label=False, container=False,
placeholder="Agent attempts, retrieval, and verifier output will stream hereโ€ฆ",
)
# โ”€โ”€โ”€ Generation-state tracking โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# Flips True while solve_proof is running so the model-change handler
# can warn the user that switching mid-flight won't affect the current
# attempt.
running_state = gr.State(False)
def _start_run():
return True
def _end_run():
return False
def _show_running(model_name: str):
return _status_html("run", f"Solving with {model_name} โ€” this can take a minuteโ€ฆ")
def _warn_on_model_change(running: bool):
if running:
gr.Warning(
"Generation in progress โ€” switching the model now won't "
"affect the current attempt and will reset for the next "
"Solve / Regenerate click."
)
model_dropdown.change(_warn_on_model_change, inputs=running_state)
# โ”€โ”€โ”€ Wiring โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
solve_btn.click(_start_run, outputs=running_state).then(
_show_running, inputs=model_dropdown, outputs=status_output,
).then(
solve_proof,
inputs=[lean_input, model_dropdown, retries_slider, anthropic_key_input],
outputs=[status_output, code_output, logs_output],
).then(_end_run, outputs=running_state)
regen_btn.click(_start_run, outputs=running_state).then(
_show_running, inputs=model_dropdown, outputs=status_output,
).then(
solve_proof,
inputs=[lean_input, model_dropdown, retries_slider, anthropic_key_input],
outputs=[status_output, code_output, logs_output],
).then(_end_run, outputs=running_state)
reset_btn.click(lambda: EXAMPLE_CODE, outputs=lean_input)
copy_btn.click(
fn=None, inputs=code_output,
js="(code) => { navigator.clipboard.writeText(code); return []; }",
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)