#!/usr/bin/env python3 """Lean+AI conjecture analysis Space app with explicit work traces.""" from __future__ import annotations import datetime as dt import heapq import os import re import textwrap from pathlib import Path from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple import gradio as gr from datasets import load_dataset from huggingface_hub import InferenceClient, hf_hub_download DEFAULT_DATASET_REPO = "NorthernTribe-Research/math-conjecture-training-corpus" DEFAULT_MODEL_REPO = "NorthernTribe-Research/math-conjecture-model" DEFAULT_FALLBACK_MODEL = "Qwen/Qwen2.5-Math-7B-Instruct" WORKSPACE = Path(__file__).resolve().parent / "workspace" DATA_CACHE = WORKSPACE / "data" DEFAULT_CONJECTURE = ( "Erdos-style Sidon set conjecture: every finite Sidon set can be extended " "to a finite perfect difference set." ) UI_CSS = r""" :root { --bg-0: #07090b; --bg-1: #0d1115; --bg-2: #13181d; --panel: rgba(17, 21, 26, 0.92); --panel-strong: rgba(22, 27, 34, 0.98); --panel-soft: rgba(12, 16, 20, 0.9); --line: rgba(157, 176, 193, 0.16); --line-strong: rgba(193, 210, 223, 0.28); --text: #eff4f8; --muted: #94a2af; --accent: #c6d1db; --accent-soft: rgba(198, 209, 219, 0.14); --success: #95c7ad; --warn: #c8b27f; --danger: #d39c9c; --shadow: 0 26px 70px rgba(0, 0, 0, 0.36); --radius-lg: 22px; --radius-md: 16px; --radius-sm: 12px; --pill: 999px; --transition: 180ms ease; } body, .gradio-container { color: var(--text) !important; background: radial-gradient(circle at 14% 8%, rgba(198, 209, 219, 0.08), rgba(198, 209, 219, 0) 32%), radial-gradient(circle at 88% 4%, rgba(157, 176, 193, 0.06), rgba(157, 176, 193, 0) 28%), linear-gradient(160deg, var(--bg-0) 0%, var(--bg-1) 52%, var(--bg-2) 100%) !important; } .gradio-container { font-family: "IBM Plex Sans", "Segoe UI", "Aptos", sans-serif !important; --color-accent: var(--accent) !important; --color-accent-soft: var(--accent-soft) !important; --color-accent-emphasis: var(--accent) !important; --button-primary-background-fill: linear-gradient(135deg, rgba(40, 46, 53, 0.98), rgba(24, 30, 37, 0.98)) !important; --button-primary-background-fill-hover: linear-gradient(135deg, rgba(53, 60, 68, 0.98), rgba(31, 37, 44, 0.98)) !important; --button-primary-border-color: rgba(201, 214, 225, 0.34) !important; --button-primary-text-color: var(--text) !important; } .gradio-container .main { max-width: 1300px !important; padding-top: 18px !important; padding-bottom: 28px !important; } .gradio-container .block { background: transparent !important; border: 0 !important; box-shadow: none !important; } .hero { position: relative; overflow: hidden; border: 1px solid var(--line-strong); border-radius: var(--radius-lg); padding: clamp(20px, 3vw, 28px); background: radial-gradient(circle at 88% 12%, rgba(198, 209, 219, 0.08), transparent 30%), radial-gradient(circle at 6% 0%, rgba(157, 176, 193, 0.06), transparent 24%), linear-gradient(160deg, rgba(23, 28, 34, 0.98), rgba(12, 16, 20, 0.98)); box-shadow: var(--shadow); } .hero-grid { position: relative; z-index: 1; display: grid; grid-template-columns: minmax(0, 1.45fr) minmax(250px, 0.9fr); gap: 14px; align-items: stretch; } .hero-copy { min-width: 0; } .hero-side { display: grid; gap: 10px; } .hero-panel { border: 1px solid rgba(201, 214, 225, 0.2); border-radius: var(--radius-md); padding: 12px; background: linear-gradient(180deg, rgba(20, 25, 31, 0.96), rgba(11, 14, 18, 0.98)); box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.04), 0 10px 24px rgba(0, 0, 0, 0.22); } .hero-panel-label, .hero-kicker, .hero-topline { text-transform: uppercase; letter-spacing: 0.18em; } .hero-topline { position: relative; z-index: 1; margin-bottom: 12px; color: var(--muted); font-size: 0.68rem; } .hero-kicker { margin: 0 0 8px 0; color: var(--accent); font-size: 0.68rem; } .hero-panel-label { color: var(--accent); font-size: 0.64rem; } .hero-panel-value { margin-top: 5px; font-size: 1.12rem; font-weight: 700; letter-spacing: 0.04em; } .hero-panel-copy { margin-top: 6px; color: var(--muted); line-height: 1.45; font-size: 0.86rem; } .hero::before { content: ""; position: absolute; inset: 0; background: linear-gradient(90deg, rgba(198, 209, 219, 0.03) 1px, transparent 1px), linear-gradient(rgba(198, 209, 219, 0.03) 1px, transparent 1px); background-size: 48px 48px; mask-image: linear-gradient(180deg, rgba(0, 0, 0, 0.9), transparent 88%); pointer-events: none; } .hero::after { content: ""; position: absolute; inset: 14px; border: 1px solid rgba(201, 214, 225, 0.12); border-radius: calc(var(--radius-lg) - 8px); pointer-events: none; } .hero h1 { margin: 0 0 8px 0; font-size: clamp(1.9rem, 4vw, 3.1rem); letter-spacing: 0.14em; text-transform: uppercase; line-height: 1.05; } .hero p { margin: 0; color: var(--muted); max-width: 72ch; line-height: 1.6; } .chip-row { margin-top: 14px; display: flex; flex-wrap: wrap; gap: 8px; } .chip-row span { border: 1px solid rgba(201, 214, 225, 0.22); border-radius: var(--pill); padding: 6px 12px; font-size: 0.7rem; text-transform: uppercase; letter-spacing: 0.18em; color: var(--accent); background: rgba(12, 16, 20, 0.76); box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.04); } .section-label { display: flex; align-items: center; gap: 10px; margin: 22px 0 10px; color: var(--accent); text-transform: uppercase; letter-spacing: 0.22em; font-size: 0.72rem; } .section-label::before { content: ""; width: 28px; height: 1px; background: linear-gradient(90deg, var(--accent), transparent); } .control-row, .output-row { gap: 14px !important; align-items: stretch !important; } .control-row > div, .output-row > div { min-width: 0; } .tactical-card, .status-card, .report-card, .evidence-card, .trace-card, .prompt-card, .lean-card { position: relative; border: 1px solid var(--line); border-radius: var(--radius-md); background: linear-gradient(180deg, rgba(25, 31, 38, 0.94), rgba(14, 18, 22, 0.96)); box-shadow: 0 12px 40px rgba(0, 0, 0, 0.18); transition: transform var(--transition), border-color var(--transition), box-shadow var(--transition), background var(--transition); } .tactical-card:hover, .status-card:hover, .report-card:hover, .evidence-card:hover, .trace-card:hover, .prompt-card:hover, .lean-card:hover { border-color: rgba(201, 214, 225, 0.28); box-shadow: 0 16px 48px rgba(0, 0, 0, 0.24); transform: translateY(-1px); } .status-card { background: linear-gradient(180deg, rgba(28, 36, 44, 0.95), rgba(13, 17, 21, 0.97)); } .status-card :is(h1, h2, h3, h4, p, li, code), .report-card :is(h1, h2, h3, h4, p, li, code), .trace-card :is(h1, h2, h3, h4, p, li, code), .prompt-card :is(h1, h2, h3, h4, p, li, code), .lean-card :is(h1, h2, h3, h4, p, li, code), .evidence-card :is(h1, h2, h3, h4, p, li, code) { color: var(--text) !important; } .status-card :is(strong, b), .report-card :is(strong, b), .trace-card :is(strong, b), .prompt-card :is(strong, b), .lean-card :is(strong, b), .evidence-card :is(strong, b) { color: var(--accent) !important; } .status-card a, .report-card a, .trace-card a, .prompt-card a, .lean-card a, .evidence-card a { color: var(--accent); text-decoration-color: rgba(198, 209, 219, 0.34); } .status-card .markdown, .report-card .markdown { margin-bottom: 0; } .status-card .markdown h3, .report-card .markdown h3 { margin-top: 0; text-transform: uppercase; letter-spacing: 0.1em; } .status-card code, .report-card code, .trace-card code, .prompt-card code, .lean-card code, .evidence-card code { background: rgba(255, 255, 255, 0.04); border: 1px solid rgba(201, 214, 225, 0.12); border-radius: 8px; padding: 0.08rem 0.34rem; } .gradio-container .wrap:has(textarea), .gradio-container .wrap:has(input[type="text"]), .gradio-container .wrap:has(input[type="range"]), .gradio-container .wrap:has(fieldset) { transition: transform var(--transition), border-color var(--transition), box-shadow var(--transition); } .gradio-container textarea, .gradio-container input[type="text"], .gradio-container input[type="number"], .gradio-container input[type="range"], .gradio-container fieldset, .gradio-container .prose, .gradio-container .markdown, .gradio-container .output-class { color: var(--text) !important; } .gradio-container textarea, .gradio-container input[type="text"], .gradio-container input[type="number"] { background: rgba(9, 12, 15, 0.88) !important; border: 1px solid rgba(201, 214, 225, 0.16) !important; border-radius: var(--radius-sm) !important; box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.03) !important; } .gradio-container textarea:focus, .gradio-container input[type="text"]:focus, .gradio-container input[type="number"]:focus { border-color: rgba(201, 214, 225, 0.32) !important; box-shadow: 0 0 0 1px rgba(198, 209, 219, 0.16) !important; } .gradio-container fieldset { background: rgba(10, 13, 16, 0.7) !important; border: 1px solid rgba(201, 214, 225, 0.14) !important; border-radius: var(--radius-sm) !important; } .gradio-container .gr-button, .gradio-container button.primary, .gradio-container .gr-button-primary, .gradio-container [data-testid="button"][variant="primary"] { border-radius: var(--pill) !important; padding: 0.8rem 1.1rem !important; transition: transform var(--transition), box-shadow var(--transition), border-color var(--transition), background var(--transition); } .gradio-container button.primary, .gradio-container .gr-button-primary, .gradio-container [data-testid="button"][variant="primary"] { background: linear-gradient(135deg, rgba(49, 56, 64, 0.98), rgba(26, 31, 38, 0.98)) !important; border: 1px solid rgba(201, 214, 225, 0.38) !important; color: var(--text) !important; box-shadow: 0 10px 24px rgba(0, 0, 0, 0.2); } .gradio-container button.primary:hover, .gradio-container .gr-button-primary:hover, .gradio-container [data-testid="button"][variant="primary"]:hover { transform: translateY(-1px); border-color: rgba(201, 214, 225, 0.46) !important; background: linear-gradient(135deg, rgba(61, 69, 79, 0.98), rgba(31, 37, 45, 0.98)) !important; } .gradio-container button.primary:active, .gradio-container .gr-button-primary:active, .gradio-container [data-testid="button"][variant="primary"]:active { transform: translateY(0); } .status-card { box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.02), 0 10px 30px rgba(0, 0, 0, 0.16); } .status-card .markdown ul { margin-bottom: 0; } .status-card .markdown li { margin: 0.16rem 0; } .evidence-card table { background: transparent !important; } .evidence-card th, .evidence-card td { border-color: rgba(201, 214, 225, 0.12) !important; } .evidence-card thead th { background: rgba(255, 255, 255, 0.03) !important; color: var(--accent) !important; text-transform: uppercase; letter-spacing: 0.08em; } .evidence-card tbody tr:hover { background: rgba(255, 255, 255, 0.02) !important; } .trace-card textarea, .prompt-card textarea, .lean-card textarea { min-height: 240px !important; } .gradio-container footer:not(.lab-footer) { display: none !important; } .lab-footer { margin-top: 10px; text-align: center; color: var(--muted); font-size: 0.82rem; letter-spacing: 0.14em; text-transform: uppercase; } ::-webkit-scrollbar { width: 10px; height: 10px; } ::-webkit-scrollbar-track { background: rgba(255, 255, 255, 0.03); } ::-webkit-scrollbar-thumb { background: rgba(201, 214, 225, 0.18); border-radius: 999px; } @media (max-width: 900px) { .gradio-container .main { padding-left: 10px !important; padding-right: 10px !important; } .hero { padding: 18px; } .hero-grid { grid-template-columns: 1fr; gap: 10px; } .control-row, .output-row { flex-direction: column !important; } .chip-row span { font-size: 0.66rem; letter-spacing: 0.14em; } .gradio-container .gr-button, .gradio-container button.primary, .gradio-container .gr-button-primary, .gradio-container [data-testid="button"][variant="primary"] { width: 100% !important; } } @media (max-width: 640px) { .hero h1 { letter-spacing: 0.09em; } .hero-topline, .hero-kicker { letter-spacing: 0.14em; } .hero p { font-size: 0.96rem; } .section-label { font-size: 0.66rem; letter-spacing: 0.18em; } .trace-card textarea, .prompt-card textarea, .lean-card textarea { min-height: 200px !important; } } """ def now_iso() -> str: return dt.datetime.now(dt.timezone.utc).replace(microsecond=0).isoformat() def ensure_workspace() -> None: DATA_CACHE.mkdir(parents=True, exist_ok=True) def auth_token() -> Optional[str]: token = (os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or "").strip() return token or None def tokenize(text: str) -> List[str]: return re.findall(r"[a-zA-Z0-9_]{3,}", (text or "").lower()) def clip(text: str, max_len: int) -> str: s = (text or "").strip() if len(s) <= max_len: return s return s[: max_len - 3] + "..." def score_example(query_terms: Sequence[str], row: Dict[str, Any]) -> float: text = " ".join( str(row.get(k, "")) for k in ("prompt", "target", "task_type", "family", "source_dataset") ).lower() terms = set(tokenize(text)) if not terms: return 0.0 overlap = len(set(query_terms) & terms) if overlap == 0: return 0.0 density = overlap / max(1, len(query_terms)) return overlap + density def iter_rows_from_split(parquet_path: Path, limit: int) -> Iterable[Dict[str, Any]]: dataset = load_dataset("parquet", data_files={"rows": str(parquet_path)}, split="rows") capped = min(limit, len(dataset)) for idx in range(capped): yield dataset[idx] def retrieve_evidence( *, dataset_repo_id: str, query: str, token: Optional[str], rows_to_scan: int, top_k: int, ) -> Tuple[List[Dict[str, Any]], List[str]]: ensure_workspace() logs: List[str] = [] logs.append(f"[{now_iso()}] Evidence retrieval started for repo: {dataset_repo_id}") query_terms = tokenize(query) if not query_terms: query_terms = tokenize(DEFAULT_CONJECTURE) heap: List[Tuple[float, int, Dict[str, Any]]] = [] total_scanned = 0 for split in ("validation", "test"): cached = hf_hub_download( repo_id=dataset_repo_id, repo_type="dataset", filename=f"{split}.parquet", token=token, ) logs.append(f"[{now_iso()}] Downloaded {split}.parquet to cache") split_quota = max(1, rows_to_scan // 2) for idx, row in enumerate(iter_rows_from_split(Path(cached), split_quota)): total_scanned += 1 score = score_example(query_terms, row) if score <= 0: continue packed = { "split": split, "row_index": idx, "score": round(score, 4), "family": str(row.get("family") or ""), "task_type": str(row.get("task_type") or ""), "source_dataset": str(row.get("source_dataset") or ""), "prompt": clip(str(row.get("prompt") or ""), 320), "target": clip(str(row.get("target") or ""), 420), } if len(heap) < top_k: heapq.heappush(heap, (score, idx, packed)) elif score > heap[0][0]: heapq.heapreplace(heap, (score, idx, packed)) best = [item[2] for item in sorted(heap, key=lambda x: (-x[0], x[1]))] logs.append(f"[{now_iso()}] Scanned rows: {total_scanned}; retained evidence: {len(best)}") return best, logs def build_prompt( *, conjecture: str, goal_mode: str, response_level: str, evidence_rows: Sequence[Dict[str, Any]], always_show_work: bool, ) -> str: evidence_lines: List[str] = [] for idx, row in enumerate(evidence_rows, start=1): evidence_lines.append( textwrap.dedent( f"""\ Evidence {idx} - split: {row.get("split")} - family: {row.get("family")} - task_type: {row.get("task_type")} - source_dataset: {row.get("source_dataset")} - prompt: {row.get("prompt")} - target_snippet: {row.get("target")} """ ).strip() ) work_policy = ( "Always show your full step-by-step work, assumptions, candidate constructions, " "and verification plan. Do not hide intermediate reasoning." if always_show_work else "Provide concise reasoning." ) mode = (response_level or "").strip() or "Progressive (simple to Lean)" if mode == "Simple intuition": depth_policy = ( "Use beginner-friendly language, define uncommon terms briefly, and keep equations minimal." ) output_format = textwrap.dedent( """\ 1) Claim status (supported / refuted / unknown) 2) Plain-language intuition 3) Short solution path (3-6 steps) 4) Next concrete check 5) Risks and uncertainty """ ).strip() elif mode == "Advanced technical": depth_policy = ( "Use rigorous notation, state assumptions explicitly, and separate proven facts from conjectural steps." ) output_format = textwrap.dedent( """\ 1) Claim status (supported / refuted / unknown) 2) Technical derivation with key lemmas 3) Candidate proof/counterexample architecture 4) Lean verification plan 5) Risks and uncertainty """ ).strip() elif mode == "Lean formalization": depth_policy = ( "Prioritize formal structure: theorem statement, lemma decomposition, and Lean-oriented tactic plan." ) output_format = textwrap.dedent( """\ 1) Claim status (supported / refuted / unknown) 2) Formal assumptions and theorem shape 3) Lean-style theorem/lemma skeletons 4) Tactic-level verification plan 5) Risks and uncertainty """ ).strip() elif mode == "Standard proof sketch": depth_policy = ( "Balance readability with rigor: concise structure, equations where needed, and explicit caveats." ) output_format = textwrap.dedent( """\ 1) Claim status (supported / refuted / unknown) 2) Structured proof sketch 3) Candidate proof or counterexample strategy 4) Lean verification plan 5) Risks and uncertainty """ ).strip() else: mode = "Progressive (simple to Lean)" depth_policy = ( "Respond in layers: start simple, then increase rigor, then end with Lean-oriented formalization." ) output_format = textwrap.dedent( """\ 1) Claim status (supported / refuted / unknown) 2) Simple intuition layer 3) Intermediate derivation layer 4) Advanced technical layer 5) Lean verification plan 6) Risks and uncertainty """ ).strip() return textwrap.dedent( f"""\ You are NorthernTribe Research's Lean+AI conjecture assistant. Goal mode: {goal_mode} Response depth mode: {mode} Depth policy: {depth_policy} {work_policy} Conjecture: {conjecture} Use the evidence below from our conjecture training corpus. If the evidence is insufficient, say so explicitly and propose the next formal checks. {chr(10).join(evidence_lines) if evidence_lines else "No evidence retrieved."} Output format: {output_format} """ ).strip() def infer_with_repo( *, repo_id: str, prompt: str, token: Optional[str], max_new_tokens: int, temperature: float, ) -> str: client = InferenceClient(model=repo_id, token=token) try: return str( client.text_generation( prompt, max_new_tokens=max_new_tokens, temperature=temperature, return_full_text=False, do_sample=temperature > 0, ) ) except Exception: # Try chat-style fallback if provider only exposes chat completion. response = client.chat.completions.create( model=repo_id, messages=[ {"role": "system", "content": "You are a mathematical conjecture assistant."}, {"role": "user", "content": prompt}, ], max_tokens=max_new_tokens, temperature=temperature, ) return str(response.choices[0].message.content or "") def known_reference_hint(conjecture: str) -> str: lower = conjecture.lower() if "sidon" in lower and "perfect difference" in lower: return ( "Reference note: arXiv:2510.19804 (v2, Jan 16, 2026) reports a counterexample " "to the Sidon extension conjecture, including Lean artifacts." ) return "No direct literature hint matched from built-in references." def build_lean_stub(conjecture: str, status: str) -> str: sanitized = clip(conjecture.replace("\n", " "), 220) theorem_name = "conjecture_attempt" if "sidon" in conjecture.lower(): theorem_name = "sidon_extension_attempt" return textwrap.dedent( f"""\ /- Auto-generated Lean sketch from Conjecture Lean+AI Lab -/ import Mathlib namespace ConjectureLab /-- Conjecture statement (natural language): {sanitized} -/ theorem {theorem_name} : Prop := by -- Status from model attempt: {status} -- TODO: -- 1) Encode finite set / Sidon / perfect difference definitions. -- 2) Formalize candidate construction or counterexample. -- 3) Prove theorem or derive contradiction. trivial end ConjectureLab """ ) def analyze_conjecture( conjecture: str, goal_mode: str, response_level: str, dataset_repo_id: str, model_repo_id: str, fallback_model_id: str, rows_to_scan: int, top_k: int, temperature: float, max_new_tokens: int, always_show_work: bool, ) -> Tuple[str, str, List[Dict[str, Any]], str, str, str]: work_log: List[str] = [] work_log.append(f"[{now_iso()}] Analysis requested") work_log.append(f"[{now_iso()}] Goal mode: {goal_mode}") work_log.append(f"[{now_iso()}] Response level: {response_level}") work_log.append(f"[{now_iso()}] Dataset repo: {dataset_repo_id}") work_log.append(f"[{now_iso()}] Preferred model repo: {model_repo_id}") token = auth_token() if token: work_log.append(f"[{now_iso()}] HF token source detected from runtime secrets") else: work_log.append(f"[{now_iso()}] No HF token found; proceeding with public-access paths only") try: evidence, retrieval_log = retrieve_evidence( dataset_repo_id=dataset_repo_id, query=conjecture, token=token, rows_to_scan=max(100, int(rows_to_scan)), top_k=max(1, int(top_k)), ) work_log.extend(retrieval_log) except Exception as exc: evidence = [] work_log.append(f"[{now_iso()}] Evidence retrieval error: {type(exc).__name__}: {exc}") prompt = build_prompt( conjecture=conjecture or DEFAULT_CONJECTURE, goal_mode=goal_mode, response_level=response_level, evidence_rows=evidence, always_show_work=always_show_work, ) model_used = model_repo_id answer = "" try: answer = infer_with_repo( repo_id=model_repo_id, prompt=prompt, token=token, max_new_tokens=int(max_new_tokens), temperature=float(temperature), ) work_log.append(f"[{now_iso()}] Inference succeeded using preferred model repo") except Exception as exc: work_log.append( f"[{now_iso()}] Preferred model inference failed ({type(exc).__name__}). " f"Falling back to {fallback_model_id}." ) model_used = fallback_model_id try: answer = infer_with_repo( repo_id=fallback_model_id, prompt=prompt, token=token, max_new_tokens=int(max_new_tokens), temperature=float(temperature), ) work_log.append(f"[{now_iso()}] Fallback model inference succeeded") except Exception as inner_exc: work_log.append(f"[{now_iso()}] Fallback inference failed: {type(inner_exc).__name__}: {inner_exc}") answer = ( "Model inference unavailable. Based on known literature context, " "this conjecture may require a counterexample-driven disproof workflow. " "Use retrieved evidence plus Lean formalization to validate next steps." ) reference_hint = known_reference_hint(conjecture) work_log.append(f"[{now_iso()}] {reference_hint}") status = "unknown" lowered = answer.lower() if "refuted" in lowered or "counterexample" in lowered: status = "refuted_candidate" elif "proved" in lowered or "supported" in lowered: status = "supported_candidate" lean_stub = build_lean_stub(conjecture, status=status) summary = ( f"### Conjecture Analysis Status\n\n" f"- `status`: **{status}**\n" f"- `model_used`: `{model_used}`\n" f"- `evidence_rows`: `{len(evidence)}`\n" f"- `timestamp`: `{now_iso()}`\n" ) if always_show_work: answer = ( f"### Model Attempt\n\n{answer.strip()}\n\n" f"### Literature Hint\n\n{reference_hint}\n\n" f"### Explicit Work Policy\n\n" f"This run was configured to show full intermediate work in the UI." ) return ( summary, answer.strip(), evidence, "\n".join(work_log), prompt, lean_stub, ) with gr.Blocks(title="Conjecture Lean+AI Lab") as demo: gr.HTML( """
NorthernTribe Research / Conjecture Operations Deck
Lean + Evidence + Model Analysis

Conjecture Lean+AI Lab

Tactical control surface for conjecture triage, evidence recall, and Lean-ready verification plans. Every run remains explicit, traceable, and policy-aware.

Mission Control Dataset-Grounded Lean-Oriented Trace Complete
Mission
Conjecture Triage
Structured retrieval, reasoning synthesis, and formal-verification handoff.
Posture
Minimal Tactical
Low-noise visuals, compact controls, and high-signal telemetry outputs.
""" ) gr.HTML('
Mission Inputs
') with gr.Row(elem_classes=["control-row", "tactical-row"]): dataset_repo_id = gr.Textbox(label="Dataset Repo", value=DEFAULT_DATASET_REPO, elem_classes="tactical-card") model_repo_id = gr.Textbox( label="Preferred Model Repo", value=DEFAULT_MODEL_REPO, elem_classes="tactical-card", ) fallback_model_id = gr.Textbox( label="Fallback Base Model", value=DEFAULT_FALLBACK_MODEL, elem_classes="tactical-card", ) conjecture_text = gr.Textbox( label="Conjecture", lines=5, value=DEFAULT_CONJECTURE, elem_classes="tactical-card", ) gr.HTML('
Analysis Settings
') with gr.Row(elem_classes=["control-row", "tactical-row"]): goal_mode = gr.Radio( label="Goal Mode", choices=[ "Try to disprove (counterexample search)", "Try to support (proof sketch search)", "Balanced analysis (both directions)", ], value="Balanced analysis (both directions)", elem_classes="tactical-card", ) response_level = gr.Radio( label="Response Depth", choices=[ "Progressive (simple to Lean)", "Simple intuition", "Standard proof sketch", "Advanced technical", "Lean formalization", ], value="Progressive (simple to Lean)", elem_classes="tactical-card", ) always_show_work = gr.Checkbox( label="Always Show Full Work", value=True, elem_classes="tactical-card", ) with gr.Row(elem_classes=["control-row", "tactical-row"]): rows_to_scan = gr.Slider( label="Rows to Scan", minimum=200, maximum=6000, step=100, value=1800, elem_classes="tactical-card", ) top_k = gr.Slider( label="Top Evidence Rows", minimum=3, maximum=25, step=1, value=8, elem_classes="tactical-card", ) max_new_tokens = gr.Slider( label="Max New Tokens", minimum=200, maximum=2000, step=50, value=900, elem_classes="tactical-card", ) temperature = gr.Slider( label="Temperature", minimum=0.0, maximum=1.2, step=0.05, value=0.2, elem_classes="tactical-card", ) run_btn = gr.Button("Run Conjecture Analysis", variant="primary", elem_classes="tactical-card") gr.HTML('
Telemetry
') status_md = gr.Markdown(elem_classes="status-card") answer_md = gr.Markdown(elem_classes="report-card") evidence_df = gr.Dataframe( label="Retrieved Evidence (from dataset)", wrap=True, interactive=False, elem_classes="evidence-card", ) work_log_box = gr.Textbox( label="Work Log (always visible)", lines=18, max_lines=30, interactive=False, elem_classes="trace-card", ) prompt_box = gr.Textbox( label="Prompt Sent to Model", lines=14, max_lines=22, interactive=False, elem_classes="prompt-card", ) lean_box = gr.Textbox( label="Lean Verification Stub", lines=12, max_lines=22, interactive=False, elem_classes="lean-card", ) run_btn.click( fn=analyze_conjecture, inputs=[ conjecture_text, goal_mode, response_level, dataset_repo_id, model_repo_id, fallback_model_id, rows_to_scan, top_k, temperature, max_new_tokens, always_show_work, ], outputs=[status_md, answer_md, evidence_df, work_log_box, prompt_box, lean_box], ) gr.HTML('') demo.queue(default_concurrency_limit=2) APP_THEME = gr.themes.Default(primary_hue="gray", secondary_hue="gray", neutral_hue="gray") if __name__ == "__main__": demo.launch( theme=APP_THEME, css=UI_CSS, footer_links=[], )