| |
| """Lean+AI conjecture analysis Space app with explicit work traces.""" |
|
|
| from __future__ import annotations |
|
|
| import datetime as dt |
| import heapq |
| import os |
| import re |
| import textwrap |
| from pathlib import Path |
| from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple |
|
|
| import gradio as gr |
| from datasets import load_dataset |
| from huggingface_hub import InferenceClient, hf_hub_download |
|
|
|
|
| DEFAULT_DATASET_REPO = "NorthernTribe-Research/math-conjecture-training-corpus" |
| DEFAULT_MODEL_REPO = "NorthernTribe-Research/math-conjecture-model" |
| DEFAULT_FALLBACK_MODEL = "Qwen/Qwen2.5-Math-7B-Instruct" |
| WORKSPACE = Path(__file__).resolve().parent / "workspace" |
| DATA_CACHE = WORKSPACE / "data" |
|
|
| DEFAULT_CONJECTURE = ( |
| "Erdos-style Sidon set conjecture: every finite Sidon set can be extended " |
| "to a finite perfect difference set." |
| ) |
|
|
|
|
| UI_CSS = r""" |
| :root { |
| --bg-0: #07090b; |
| --bg-1: #0d1115; |
| --bg-2: #13181d; |
| --panel: rgba(17, 21, 26, 0.92); |
| --panel-strong: rgba(22, 27, 34, 0.98); |
| --panel-soft: rgba(12, 16, 20, 0.9); |
| --line: rgba(157, 176, 193, 0.16); |
| --line-strong: rgba(193, 210, 223, 0.28); |
| --text: #eff4f8; |
| --muted: #94a2af; |
| --accent: #c6d1db; |
| --accent-soft: rgba(198, 209, 219, 0.14); |
| --success: #95c7ad; |
| --warn: #c8b27f; |
| --danger: #d39c9c; |
| --shadow: 0 26px 70px rgba(0, 0, 0, 0.36); |
| --radius-lg: 22px; |
| --radius-md: 16px; |
| --radius-sm: 12px; |
| --pill: 999px; |
| --transition: 180ms ease; |
| } |
| |
| body, .gradio-container { |
| color: var(--text) !important; |
| background: |
| radial-gradient(circle at 14% 8%, rgba(198, 209, 219, 0.08), rgba(198, 209, 219, 0) 32%), |
| radial-gradient(circle at 88% 4%, rgba(157, 176, 193, 0.06), rgba(157, 176, 193, 0) 28%), |
| linear-gradient(160deg, var(--bg-0) 0%, var(--bg-1) 52%, var(--bg-2) 100%) !important; |
| } |
| |
| .gradio-container { |
| font-family: "IBM Plex Sans", "Segoe UI", "Aptos", sans-serif !important; |
| --color-accent: var(--accent) !important; |
| --color-accent-soft: var(--accent-soft) !important; |
| --color-accent-emphasis: var(--accent) !important; |
| --button-primary-background-fill: linear-gradient(135deg, rgba(40, 46, 53, 0.98), rgba(24, 30, 37, 0.98)) !important; |
| --button-primary-background-fill-hover: linear-gradient(135deg, rgba(53, 60, 68, 0.98), rgba(31, 37, 44, 0.98)) !important; |
| --button-primary-border-color: rgba(201, 214, 225, 0.34) !important; |
| --button-primary-text-color: var(--text) !important; |
| } |
| |
| .gradio-container .main { |
| max-width: 1300px !important; |
| padding-top: 18px !important; |
| padding-bottom: 28px !important; |
| } |
| |
| .gradio-container .block { |
| background: transparent !important; |
| border: 0 !important; |
| box-shadow: none !important; |
| } |
| |
| .hero { |
| position: relative; |
| overflow: hidden; |
| border: 1px solid var(--line-strong); |
| border-radius: var(--radius-lg); |
| padding: clamp(20px, 3vw, 28px); |
| background: |
| radial-gradient(circle at 88% 12%, rgba(198, 209, 219, 0.08), transparent 30%), |
| radial-gradient(circle at 6% 0%, rgba(157, 176, 193, 0.06), transparent 24%), |
| linear-gradient(160deg, rgba(23, 28, 34, 0.98), rgba(12, 16, 20, 0.98)); |
| box-shadow: var(--shadow); |
| } |
| |
| .hero-grid { |
| position: relative; |
| z-index: 1; |
| display: grid; |
| grid-template-columns: minmax(0, 1.45fr) minmax(250px, 0.9fr); |
| gap: 14px; |
| align-items: stretch; |
| } |
| |
| .hero-copy { |
| min-width: 0; |
| } |
| |
| .hero-side { |
| display: grid; |
| gap: 10px; |
| } |
| |
| .hero-panel { |
| border: 1px solid rgba(201, 214, 225, 0.2); |
| border-radius: var(--radius-md); |
| padding: 12px; |
| background: |
| linear-gradient(180deg, rgba(20, 25, 31, 0.96), rgba(11, 14, 18, 0.98)); |
| box-shadow: |
| inset 0 1px 0 rgba(255, 255, 255, 0.04), |
| 0 10px 24px rgba(0, 0, 0, 0.22); |
| } |
| |
| .hero-panel-label, |
| .hero-kicker, |
| .hero-topline { |
| text-transform: uppercase; |
| letter-spacing: 0.18em; |
| } |
| |
| .hero-topline { |
| position: relative; |
| z-index: 1; |
| margin-bottom: 12px; |
| color: var(--muted); |
| font-size: 0.68rem; |
| } |
| |
| .hero-kicker { |
| margin: 0 0 8px 0; |
| color: var(--accent); |
| font-size: 0.68rem; |
| } |
| |
| .hero-panel-label { |
| color: var(--accent); |
| font-size: 0.64rem; |
| } |
| |
| .hero-panel-value { |
| margin-top: 5px; |
| font-size: 1.12rem; |
| font-weight: 700; |
| letter-spacing: 0.04em; |
| } |
| |
| .hero-panel-copy { |
| margin-top: 6px; |
| color: var(--muted); |
| line-height: 1.45; |
| font-size: 0.86rem; |
| } |
| |
| .hero::before { |
| content: ""; |
| position: absolute; |
| inset: 0; |
| background: |
| linear-gradient(90deg, rgba(198, 209, 219, 0.03) 1px, transparent 1px), |
| linear-gradient(rgba(198, 209, 219, 0.03) 1px, transparent 1px); |
| background-size: 48px 48px; |
| mask-image: linear-gradient(180deg, rgba(0, 0, 0, 0.9), transparent 88%); |
| pointer-events: none; |
| } |
| |
| .hero::after { |
| content: ""; |
| position: absolute; |
| inset: 14px; |
| border: 1px solid rgba(201, 214, 225, 0.12); |
| border-radius: calc(var(--radius-lg) - 8px); |
| pointer-events: none; |
| } |
| |
| .hero h1 { |
| margin: 0 0 8px 0; |
| font-size: clamp(1.9rem, 4vw, 3.1rem); |
| letter-spacing: 0.14em; |
| text-transform: uppercase; |
| line-height: 1.05; |
| } |
| |
| .hero p { |
| margin: 0; |
| color: var(--muted); |
| max-width: 72ch; |
| line-height: 1.6; |
| } |
| |
| .chip-row { |
| margin-top: 14px; |
| display: flex; |
| flex-wrap: wrap; |
| gap: 8px; |
| } |
| |
| .chip-row span { |
| border: 1px solid rgba(201, 214, 225, 0.22); |
| border-radius: var(--pill); |
| padding: 6px 12px; |
| font-size: 0.7rem; |
| text-transform: uppercase; |
| letter-spacing: 0.18em; |
| color: var(--accent); |
| background: rgba(12, 16, 20, 0.76); |
| box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.04); |
| } |
| |
| .section-label { |
| display: flex; |
| align-items: center; |
| gap: 10px; |
| margin: 22px 0 10px; |
| color: var(--accent); |
| text-transform: uppercase; |
| letter-spacing: 0.22em; |
| font-size: 0.72rem; |
| } |
| |
| .section-label::before { |
| content: ""; |
| width: 28px; |
| height: 1px; |
| background: linear-gradient(90deg, var(--accent), transparent); |
| } |
| |
| .control-row, |
| .output-row { |
| gap: 14px !important; |
| align-items: stretch !important; |
| } |
| |
| .control-row > div, |
| .output-row > div { |
| min-width: 0; |
| } |
| |
| .tactical-card, |
| .status-card, |
| .report-card, |
| .evidence-card, |
| .trace-card, |
| .prompt-card, |
| .lean-card { |
| position: relative; |
| border: 1px solid var(--line); |
| border-radius: var(--radius-md); |
| background: |
| linear-gradient(180deg, rgba(25, 31, 38, 0.94), rgba(14, 18, 22, 0.96)); |
| box-shadow: 0 12px 40px rgba(0, 0, 0, 0.18); |
| transition: |
| transform var(--transition), |
| border-color var(--transition), |
| box-shadow var(--transition), |
| background var(--transition); |
| } |
| |
| .tactical-card:hover, |
| .status-card:hover, |
| .report-card:hover, |
| .evidence-card:hover, |
| .trace-card:hover, |
| .prompt-card:hover, |
| .lean-card:hover { |
| border-color: rgba(201, 214, 225, 0.28); |
| box-shadow: 0 16px 48px rgba(0, 0, 0, 0.24); |
| transform: translateY(-1px); |
| } |
| |
| .status-card { |
| background: |
| linear-gradient(180deg, rgba(28, 36, 44, 0.95), rgba(13, 17, 21, 0.97)); |
| } |
| |
| .status-card :is(h1, h2, h3, h4, p, li, code), |
| .report-card :is(h1, h2, h3, h4, p, li, code), |
| .trace-card :is(h1, h2, h3, h4, p, li, code), |
| .prompt-card :is(h1, h2, h3, h4, p, li, code), |
| .lean-card :is(h1, h2, h3, h4, p, li, code), |
| .evidence-card :is(h1, h2, h3, h4, p, li, code) { |
| color: var(--text) !important; |
| } |
| |
| .status-card :is(strong, b), |
| .report-card :is(strong, b), |
| .trace-card :is(strong, b), |
| .prompt-card :is(strong, b), |
| .lean-card :is(strong, b), |
| .evidence-card :is(strong, b) { |
| color: var(--accent) !important; |
| } |
| |
| .status-card a, |
| .report-card a, |
| .trace-card a, |
| .prompt-card a, |
| .lean-card a, |
| .evidence-card a { |
| color: var(--accent); |
| text-decoration-color: rgba(198, 209, 219, 0.34); |
| } |
| |
| .status-card .markdown, |
| .report-card .markdown { |
| margin-bottom: 0; |
| } |
| |
| .status-card .markdown h3, |
| .report-card .markdown h3 { |
| margin-top: 0; |
| text-transform: uppercase; |
| letter-spacing: 0.1em; |
| } |
| |
| .status-card code, |
| .report-card code, |
| .trace-card code, |
| .prompt-card code, |
| .lean-card code, |
| .evidence-card code { |
| background: rgba(255, 255, 255, 0.04); |
| border: 1px solid rgba(201, 214, 225, 0.12); |
| border-radius: 8px; |
| padding: 0.08rem 0.34rem; |
| } |
| |
| .gradio-container .wrap:has(textarea), |
| .gradio-container .wrap:has(input[type="text"]), |
| .gradio-container .wrap:has(input[type="range"]), |
| .gradio-container .wrap:has(fieldset) { |
| transition: |
| transform var(--transition), |
| border-color var(--transition), |
| box-shadow var(--transition); |
| } |
| |
| .gradio-container textarea, |
| .gradio-container input[type="text"], |
| .gradio-container input[type="number"], |
| .gradio-container input[type="range"], |
| .gradio-container fieldset, |
| .gradio-container .prose, |
| .gradio-container .markdown, |
| .gradio-container .output-class { |
| color: var(--text) !important; |
| } |
| |
| .gradio-container textarea, |
| .gradio-container input[type="text"], |
| .gradio-container input[type="number"] { |
| background: rgba(9, 12, 15, 0.88) !important; |
| border: 1px solid rgba(201, 214, 225, 0.16) !important; |
| border-radius: var(--radius-sm) !important; |
| box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.03) !important; |
| } |
| |
| .gradio-container textarea:focus, |
| .gradio-container input[type="text"]:focus, |
| .gradio-container input[type="number"]:focus { |
| border-color: rgba(201, 214, 225, 0.32) !important; |
| box-shadow: 0 0 0 1px rgba(198, 209, 219, 0.16) !important; |
| } |
| |
| .gradio-container fieldset { |
| background: rgba(10, 13, 16, 0.7) !important; |
| border: 1px solid rgba(201, 214, 225, 0.14) !important; |
| border-radius: var(--radius-sm) !important; |
| } |
| |
| .gradio-container .gr-button, |
| .gradio-container button.primary, |
| .gradio-container .gr-button-primary, |
| .gradio-container [data-testid="button"][variant="primary"] { |
| border-radius: var(--pill) !important; |
| padding: 0.8rem 1.1rem !important; |
| transition: |
| transform var(--transition), |
| box-shadow var(--transition), |
| border-color var(--transition), |
| background var(--transition); |
| } |
| |
| .gradio-container button.primary, |
| .gradio-container .gr-button-primary, |
| .gradio-container [data-testid="button"][variant="primary"] { |
| background: linear-gradient(135deg, rgba(49, 56, 64, 0.98), rgba(26, 31, 38, 0.98)) !important; |
| border: 1px solid rgba(201, 214, 225, 0.38) !important; |
| color: var(--text) !important; |
| box-shadow: 0 10px 24px rgba(0, 0, 0, 0.2); |
| } |
| |
| .gradio-container button.primary:hover, |
| .gradio-container .gr-button-primary:hover, |
| .gradio-container [data-testid="button"][variant="primary"]:hover { |
| transform: translateY(-1px); |
| border-color: rgba(201, 214, 225, 0.46) !important; |
| background: linear-gradient(135deg, rgba(61, 69, 79, 0.98), rgba(31, 37, 45, 0.98)) !important; |
| } |
| |
| .gradio-container button.primary:active, |
| .gradio-container .gr-button-primary:active, |
| .gradio-container [data-testid="button"][variant="primary"]:active { |
| transform: translateY(0); |
| } |
| |
| .status-card { |
| box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.02), 0 10px 30px rgba(0, 0, 0, 0.16); |
| } |
| |
| .status-card .markdown ul { |
| margin-bottom: 0; |
| } |
| |
| .status-card .markdown li { |
| margin: 0.16rem 0; |
| } |
| |
| .evidence-card table { |
| background: transparent !important; |
| } |
| |
| .evidence-card th, |
| .evidence-card td { |
| border-color: rgba(201, 214, 225, 0.12) !important; |
| } |
| |
| .evidence-card thead th { |
| background: rgba(255, 255, 255, 0.03) !important; |
| color: var(--accent) !important; |
| text-transform: uppercase; |
| letter-spacing: 0.08em; |
| } |
| |
| .evidence-card tbody tr:hover { |
| background: rgba(255, 255, 255, 0.02) !important; |
| } |
| |
| .trace-card textarea, |
| .prompt-card textarea, |
| .lean-card textarea { |
| min-height: 240px !important; |
| } |
| |
| .gradio-container footer:not(.lab-footer) { |
| display: none !important; |
| } |
| |
| .lab-footer { |
| margin-top: 10px; |
| text-align: center; |
| color: var(--muted); |
| font-size: 0.82rem; |
| letter-spacing: 0.14em; |
| text-transform: uppercase; |
| } |
| |
| ::-webkit-scrollbar { |
| width: 10px; |
| height: 10px; |
| } |
| |
| ::-webkit-scrollbar-track { |
| background: rgba(255, 255, 255, 0.03); |
| } |
| |
| ::-webkit-scrollbar-thumb { |
| background: rgba(201, 214, 225, 0.18); |
| border-radius: 999px; |
| } |
| |
| @media (max-width: 900px) { |
| .gradio-container .main { |
| padding-left: 10px !important; |
| padding-right: 10px !important; |
| } |
| |
| .hero { |
| padding: 18px; |
| } |
| |
| .hero-grid { |
| grid-template-columns: 1fr; |
| gap: 10px; |
| } |
| |
| .control-row, |
| .output-row { |
| flex-direction: column !important; |
| } |
| |
| .chip-row span { |
| font-size: 0.66rem; |
| letter-spacing: 0.14em; |
| } |
| |
| .gradio-container .gr-button, |
| .gradio-container button.primary, |
| .gradio-container .gr-button-primary, |
| .gradio-container [data-testid="button"][variant="primary"] { |
| width: 100% !important; |
| } |
| } |
| |
| @media (max-width: 640px) { |
| .hero h1 { |
| letter-spacing: 0.09em; |
| } |
| |
| .hero-topline, |
| .hero-kicker { |
| letter-spacing: 0.14em; |
| } |
| |
| .hero p { |
| font-size: 0.96rem; |
| } |
| |
| .section-label { |
| font-size: 0.66rem; |
| letter-spacing: 0.18em; |
| } |
| |
| .trace-card textarea, |
| .prompt-card textarea, |
| .lean-card textarea { |
| min-height: 200px !important; |
| } |
| } |
| """ |
|
|
|
|
| def now_iso() -> str: |
| return dt.datetime.now(dt.timezone.utc).replace(microsecond=0).isoformat() |
|
|
|
|
| def ensure_workspace() -> None: |
| DATA_CACHE.mkdir(parents=True, exist_ok=True) |
|
|
|
|
| def auth_token() -> Optional[str]: |
| token = (os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or "").strip() |
| return token or None |
|
|
|
|
| def tokenize(text: str) -> List[str]: |
| return re.findall(r"[a-zA-Z0-9_]{3,}", (text or "").lower()) |
|
|
|
|
| def clip(text: str, max_len: int) -> str: |
| s = (text or "").strip() |
| if len(s) <= max_len: |
| return s |
| return s[: max_len - 3] + "..." |
|
|
|
|
| def score_example(query_terms: Sequence[str], row: Dict[str, Any]) -> float: |
| text = " ".join( |
| str(row.get(k, "")) for k in ("prompt", "target", "task_type", "family", "source_dataset") |
| ).lower() |
| terms = set(tokenize(text)) |
| if not terms: |
| return 0.0 |
| overlap = len(set(query_terms) & terms) |
| if overlap == 0: |
| return 0.0 |
| density = overlap / max(1, len(query_terms)) |
| return overlap + density |
|
|
|
|
| def iter_rows_from_split(parquet_path: Path, limit: int) -> Iterable[Dict[str, Any]]: |
| dataset = load_dataset("parquet", data_files={"rows": str(parquet_path)}, split="rows") |
| capped = min(limit, len(dataset)) |
| for idx in range(capped): |
| yield dataset[idx] |
|
|
|
|
| def retrieve_evidence( |
| *, |
| dataset_repo_id: str, |
| query: str, |
| token: Optional[str], |
| rows_to_scan: int, |
| top_k: int, |
| ) -> Tuple[List[Dict[str, Any]], List[str]]: |
| ensure_workspace() |
| logs: List[str] = [] |
| logs.append(f"[{now_iso()}] Evidence retrieval started for repo: {dataset_repo_id}") |
| query_terms = tokenize(query) |
| if not query_terms: |
| query_terms = tokenize(DEFAULT_CONJECTURE) |
|
|
| heap: List[Tuple[float, int, Dict[str, Any]]] = [] |
| total_scanned = 0 |
|
|
| for split in ("validation", "test"): |
| cached = hf_hub_download( |
| repo_id=dataset_repo_id, |
| repo_type="dataset", |
| filename=f"{split}.parquet", |
| token=token, |
| ) |
| logs.append(f"[{now_iso()}] Downloaded {split}.parquet to cache") |
| split_quota = max(1, rows_to_scan // 2) |
| for idx, row in enumerate(iter_rows_from_split(Path(cached), split_quota)): |
| total_scanned += 1 |
| score = score_example(query_terms, row) |
| if score <= 0: |
| continue |
| packed = { |
| "split": split, |
| "row_index": idx, |
| "score": round(score, 4), |
| "family": str(row.get("family") or ""), |
| "task_type": str(row.get("task_type") or ""), |
| "source_dataset": str(row.get("source_dataset") or ""), |
| "prompt": clip(str(row.get("prompt") or ""), 320), |
| "target": clip(str(row.get("target") or ""), 420), |
| } |
| if len(heap) < top_k: |
| heapq.heappush(heap, (score, idx, packed)) |
| elif score > heap[0][0]: |
| heapq.heapreplace(heap, (score, idx, packed)) |
|
|
| best = [item[2] for item in sorted(heap, key=lambda x: (-x[0], x[1]))] |
| logs.append(f"[{now_iso()}] Scanned rows: {total_scanned}; retained evidence: {len(best)}") |
| return best, logs |
|
|
|
|
| def build_prompt( |
| *, |
| conjecture: str, |
| goal_mode: str, |
| response_level: str, |
| evidence_rows: Sequence[Dict[str, Any]], |
| always_show_work: bool, |
| ) -> str: |
| evidence_lines: List[str] = [] |
| for idx, row in enumerate(evidence_rows, start=1): |
| evidence_lines.append( |
| textwrap.dedent( |
| f"""\ |
| Evidence {idx} |
| - split: {row.get("split")} |
| - family: {row.get("family")} |
| - task_type: {row.get("task_type")} |
| - source_dataset: {row.get("source_dataset")} |
| - prompt: {row.get("prompt")} |
| - target_snippet: {row.get("target")} |
| """ |
| ).strip() |
| ) |
|
|
| work_policy = ( |
| "Always show your full step-by-step work, assumptions, candidate constructions, " |
| "and verification plan. Do not hide intermediate reasoning." |
| if always_show_work |
| else "Provide concise reasoning." |
| ) |
|
|
| mode = (response_level or "").strip() or "Progressive (simple to Lean)" |
| if mode == "Simple intuition": |
| depth_policy = ( |
| "Use beginner-friendly language, define uncommon terms briefly, and keep equations minimal." |
| ) |
| output_format = textwrap.dedent( |
| """\ |
| 1) Claim status (supported / refuted / unknown) |
| 2) Plain-language intuition |
| 3) Short solution path (3-6 steps) |
| 4) Next concrete check |
| 5) Risks and uncertainty |
| """ |
| ).strip() |
| elif mode == "Advanced technical": |
| depth_policy = ( |
| "Use rigorous notation, state assumptions explicitly, and separate proven facts from conjectural steps." |
| ) |
| output_format = textwrap.dedent( |
| """\ |
| 1) Claim status (supported / refuted / unknown) |
| 2) Technical derivation with key lemmas |
| 3) Candidate proof/counterexample architecture |
| 4) Lean verification plan |
| 5) Risks and uncertainty |
| """ |
| ).strip() |
| elif mode == "Lean formalization": |
| depth_policy = ( |
| "Prioritize formal structure: theorem statement, lemma decomposition, and Lean-oriented tactic plan." |
| ) |
| output_format = textwrap.dedent( |
| """\ |
| 1) Claim status (supported / refuted / unknown) |
| 2) Formal assumptions and theorem shape |
| 3) Lean-style theorem/lemma skeletons |
| 4) Tactic-level verification plan |
| 5) Risks and uncertainty |
| """ |
| ).strip() |
| elif mode == "Standard proof sketch": |
| depth_policy = ( |
| "Balance readability with rigor: concise structure, equations where needed, and explicit caveats." |
| ) |
| output_format = textwrap.dedent( |
| """\ |
| 1) Claim status (supported / refuted / unknown) |
| 2) Structured proof sketch |
| 3) Candidate proof or counterexample strategy |
| 4) Lean verification plan |
| 5) Risks and uncertainty |
| """ |
| ).strip() |
| else: |
| mode = "Progressive (simple to Lean)" |
| depth_policy = ( |
| "Respond in layers: start simple, then increase rigor, then end with Lean-oriented formalization." |
| ) |
| output_format = textwrap.dedent( |
| """\ |
| 1) Claim status (supported / refuted / unknown) |
| 2) Simple intuition layer |
| 3) Intermediate derivation layer |
| 4) Advanced technical layer |
| 5) Lean verification plan |
| 6) Risks and uncertainty |
| """ |
| ).strip() |
|
|
| return textwrap.dedent( |
| f"""\ |
| You are NorthernTribe Research's Lean+AI conjecture assistant. |
| Goal mode: {goal_mode} |
| Response depth mode: {mode} |
| Depth policy: {depth_policy} |
| {work_policy} |
| |
| Conjecture: |
| {conjecture} |
| |
| Use the evidence below from our conjecture training corpus. |
| If the evidence is insufficient, say so explicitly and propose the next formal checks. |
| |
| {chr(10).join(evidence_lines) if evidence_lines else "No evidence retrieved."} |
| |
| Output format: |
| {output_format} |
| """ |
| ).strip() |
|
|
|
|
| def infer_with_repo( |
| *, |
| repo_id: str, |
| prompt: str, |
| token: Optional[str], |
| max_new_tokens: int, |
| temperature: float, |
| ) -> str: |
| client = InferenceClient(model=repo_id, token=token) |
| try: |
| return str( |
| client.text_generation( |
| prompt, |
| max_new_tokens=max_new_tokens, |
| temperature=temperature, |
| return_full_text=False, |
| do_sample=temperature > 0, |
| ) |
| ) |
| except Exception: |
| |
| response = client.chat.completions.create( |
| model=repo_id, |
| messages=[ |
| {"role": "system", "content": "You are a mathematical conjecture assistant."}, |
| {"role": "user", "content": prompt}, |
| ], |
| max_tokens=max_new_tokens, |
| temperature=temperature, |
| ) |
| return str(response.choices[0].message.content or "") |
|
|
|
|
| def known_reference_hint(conjecture: str) -> str: |
| lower = conjecture.lower() |
| if "sidon" in lower and "perfect difference" in lower: |
| return ( |
| "Reference note: arXiv:2510.19804 (v2, Jan 16, 2026) reports a counterexample " |
| "to the Sidon extension conjecture, including Lean artifacts." |
| ) |
| return "No direct literature hint matched from built-in references." |
|
|
|
|
| def build_lean_stub(conjecture: str, status: str) -> str: |
| sanitized = clip(conjecture.replace("\n", " "), 220) |
| theorem_name = "conjecture_attempt" |
| if "sidon" in conjecture.lower(): |
| theorem_name = "sidon_extension_attempt" |
| return textwrap.dedent( |
| f"""\ |
| /- Auto-generated Lean sketch from Conjecture Lean+AI Lab -/ |
| import Mathlib |
| |
| namespace ConjectureLab |
| |
| /-- Conjecture statement (natural language): |
| {sanitized} |
| -/ |
| theorem {theorem_name} : Prop := by |
| -- Status from model attempt: {status} |
| -- TODO: |
| -- 1) Encode finite set / Sidon / perfect difference definitions. |
| -- 2) Formalize candidate construction or counterexample. |
| -- 3) Prove theorem or derive contradiction. |
| trivial |
| |
| end ConjectureLab |
| """ |
| ) |
|
|
|
|
| def analyze_conjecture( |
| conjecture: str, |
| goal_mode: str, |
| response_level: str, |
| dataset_repo_id: str, |
| model_repo_id: str, |
| fallback_model_id: str, |
| rows_to_scan: int, |
| top_k: int, |
| temperature: float, |
| max_new_tokens: int, |
| always_show_work: bool, |
| ) -> Tuple[str, str, List[Dict[str, Any]], str, str, str]: |
| work_log: List[str] = [] |
| work_log.append(f"[{now_iso()}] Analysis requested") |
| work_log.append(f"[{now_iso()}] Goal mode: {goal_mode}") |
| work_log.append(f"[{now_iso()}] Response level: {response_level}") |
| work_log.append(f"[{now_iso()}] Dataset repo: {dataset_repo_id}") |
| work_log.append(f"[{now_iso()}] Preferred model repo: {model_repo_id}") |
|
|
| token = auth_token() |
| if token: |
| work_log.append(f"[{now_iso()}] HF token source detected from runtime secrets") |
| else: |
| work_log.append(f"[{now_iso()}] No HF token found; proceeding with public-access paths only") |
|
|
| try: |
| evidence, retrieval_log = retrieve_evidence( |
| dataset_repo_id=dataset_repo_id, |
| query=conjecture, |
| token=token, |
| rows_to_scan=max(100, int(rows_to_scan)), |
| top_k=max(1, int(top_k)), |
| ) |
| work_log.extend(retrieval_log) |
| except Exception as exc: |
| evidence = [] |
| work_log.append(f"[{now_iso()}] Evidence retrieval error: {type(exc).__name__}: {exc}") |
|
|
| prompt = build_prompt( |
| conjecture=conjecture or DEFAULT_CONJECTURE, |
| goal_mode=goal_mode, |
| response_level=response_level, |
| evidence_rows=evidence, |
| always_show_work=always_show_work, |
| ) |
|
|
| model_used = model_repo_id |
| answer = "" |
| try: |
| answer = infer_with_repo( |
| repo_id=model_repo_id, |
| prompt=prompt, |
| token=token, |
| max_new_tokens=int(max_new_tokens), |
| temperature=float(temperature), |
| ) |
| work_log.append(f"[{now_iso()}] Inference succeeded using preferred model repo") |
| except Exception as exc: |
| work_log.append( |
| f"[{now_iso()}] Preferred model inference failed ({type(exc).__name__}). " |
| f"Falling back to {fallback_model_id}." |
| ) |
| model_used = fallback_model_id |
| try: |
| answer = infer_with_repo( |
| repo_id=fallback_model_id, |
| prompt=prompt, |
| token=token, |
| max_new_tokens=int(max_new_tokens), |
| temperature=float(temperature), |
| ) |
| work_log.append(f"[{now_iso()}] Fallback model inference succeeded") |
| except Exception as inner_exc: |
| work_log.append(f"[{now_iso()}] Fallback inference failed: {type(inner_exc).__name__}: {inner_exc}") |
| answer = ( |
| "Model inference unavailable. Based on known literature context, " |
| "this conjecture may require a counterexample-driven disproof workflow. " |
| "Use retrieved evidence plus Lean formalization to validate next steps." |
| ) |
|
|
| reference_hint = known_reference_hint(conjecture) |
| work_log.append(f"[{now_iso()}] {reference_hint}") |
|
|
| status = "unknown" |
| lowered = answer.lower() |
| if "refuted" in lowered or "counterexample" in lowered: |
| status = "refuted_candidate" |
| elif "proved" in lowered or "supported" in lowered: |
| status = "supported_candidate" |
|
|
| lean_stub = build_lean_stub(conjecture, status=status) |
| summary = ( |
| f"### Conjecture Analysis Status\n\n" |
| f"- `status`: **{status}**\n" |
| f"- `model_used`: `{model_used}`\n" |
| f"- `evidence_rows`: `{len(evidence)}`\n" |
| f"- `timestamp`: `{now_iso()}`\n" |
| ) |
|
|
| if always_show_work: |
| answer = ( |
| f"### Model Attempt\n\n{answer.strip()}\n\n" |
| f"### Literature Hint\n\n{reference_hint}\n\n" |
| f"### Explicit Work Policy\n\n" |
| f"This run was configured to show full intermediate work in the UI." |
| ) |
|
|
| return ( |
| summary, |
| answer.strip(), |
| evidence, |
| "\n".join(work_log), |
| prompt, |
| lean_stub, |
| ) |
|
|
|
|
| with gr.Blocks(title="Conjecture Lean+AI Lab") as demo: |
| gr.HTML( |
| """ |
| <section class="hero"> |
| <div class="hero-topline">NorthernTribe Research / Conjecture Operations Deck</div> |
| <div class="hero-grid"> |
| <div class="hero-copy"> |
| <div class="hero-kicker">Lean + Evidence + Model Analysis</div> |
| <h1>Conjecture Lean+AI Lab</h1> |
| <p> |
| Tactical control surface for conjecture triage, evidence recall, and Lean-ready verification plans. |
| Every run remains explicit, traceable, and policy-aware. |
| </p> |
| <div class="chip-row"> |
| <span>Mission Control</span> |
| <span>Dataset-Grounded</span> |
| <span>Lean-Oriented</span> |
| <span>Trace Complete</span> |
| </div> |
| </div> |
| <div class="hero-side"> |
| <div class="hero-panel"> |
| <div class="hero-panel-label">Mission</div> |
| <div class="hero-panel-value">Conjecture Triage</div> |
| <div class="hero-panel-copy">Structured retrieval, reasoning synthesis, and formal-verification handoff.</div> |
| </div> |
| <div class="hero-panel"> |
| <div class="hero-panel-label">Posture</div> |
| <div class="hero-panel-value">Minimal Tactical</div> |
| <div class="hero-panel-copy">Low-noise visuals, compact controls, and high-signal telemetry outputs.</div> |
| </div> |
| </div> |
| </div> |
| </section> |
| """ |
| ) |
|
|
| gr.HTML('<div class="section-label">Mission Inputs</div>') |
|
|
| with gr.Row(elem_classes=["control-row", "tactical-row"]): |
| dataset_repo_id = gr.Textbox(label="Dataset Repo", value=DEFAULT_DATASET_REPO, elem_classes="tactical-card") |
| model_repo_id = gr.Textbox( |
| label="Preferred Model Repo", |
| value=DEFAULT_MODEL_REPO, |
| elem_classes="tactical-card", |
| ) |
| fallback_model_id = gr.Textbox( |
| label="Fallback Base Model", |
| value=DEFAULT_FALLBACK_MODEL, |
| elem_classes="tactical-card", |
| ) |
|
|
| conjecture_text = gr.Textbox( |
| label="Conjecture", |
| lines=5, |
| value=DEFAULT_CONJECTURE, |
| elem_classes="tactical-card", |
| ) |
|
|
| gr.HTML('<div class="section-label">Analysis Settings</div>') |
|
|
| with gr.Row(elem_classes=["control-row", "tactical-row"]): |
| goal_mode = gr.Radio( |
| label="Goal Mode", |
| choices=[ |
| "Try to disprove (counterexample search)", |
| "Try to support (proof sketch search)", |
| "Balanced analysis (both directions)", |
| ], |
| value="Balanced analysis (both directions)", |
| elem_classes="tactical-card", |
| ) |
| response_level = gr.Radio( |
| label="Response Depth", |
| choices=[ |
| "Progressive (simple to Lean)", |
| "Simple intuition", |
| "Standard proof sketch", |
| "Advanced technical", |
| "Lean formalization", |
| ], |
| value="Progressive (simple to Lean)", |
| elem_classes="tactical-card", |
| ) |
| always_show_work = gr.Checkbox( |
| label="Always Show Full Work", |
| value=True, |
| elem_classes="tactical-card", |
| ) |
|
|
| with gr.Row(elem_classes=["control-row", "tactical-row"]): |
| rows_to_scan = gr.Slider( |
| label="Rows to Scan", |
| minimum=200, |
| maximum=6000, |
| step=100, |
| value=1800, |
| elem_classes="tactical-card", |
| ) |
| top_k = gr.Slider( |
| label="Top Evidence Rows", |
| minimum=3, |
| maximum=25, |
| step=1, |
| value=8, |
| elem_classes="tactical-card", |
| ) |
| max_new_tokens = gr.Slider( |
| label="Max New Tokens", |
| minimum=200, |
| maximum=2000, |
| step=50, |
| value=900, |
| elem_classes="tactical-card", |
| ) |
| temperature = gr.Slider( |
| label="Temperature", |
| minimum=0.0, |
| maximum=1.2, |
| step=0.05, |
| value=0.2, |
| elem_classes="tactical-card", |
| ) |
|
|
| run_btn = gr.Button("Run Conjecture Analysis", variant="primary", elem_classes="tactical-card") |
|
|
| gr.HTML('<div class="section-label">Telemetry</div>') |
|
|
| status_md = gr.Markdown(elem_classes="status-card") |
| answer_md = gr.Markdown(elem_classes="report-card") |
| evidence_df = gr.Dataframe( |
| label="Retrieved Evidence (from dataset)", |
| wrap=True, |
| interactive=False, |
| elem_classes="evidence-card", |
| ) |
| work_log_box = gr.Textbox( |
| label="Work Log (always visible)", |
| lines=18, |
| max_lines=30, |
| interactive=False, |
| elem_classes="trace-card", |
| ) |
| prompt_box = gr.Textbox( |
| label="Prompt Sent to Model", |
| lines=14, |
| max_lines=22, |
| interactive=False, |
| elem_classes="prompt-card", |
| ) |
| lean_box = gr.Textbox( |
| label="Lean Verification Stub", |
| lines=12, |
| max_lines=22, |
| interactive=False, |
| elem_classes="lean-card", |
| ) |
|
|
| run_btn.click( |
| fn=analyze_conjecture, |
| inputs=[ |
| conjecture_text, |
| goal_mode, |
| response_level, |
| dataset_repo_id, |
| model_repo_id, |
| fallback_model_id, |
| rows_to_scan, |
| top_k, |
| temperature, |
| max_new_tokens, |
| always_show_work, |
| ], |
| outputs=[status_md, answer_md, evidence_df, work_log_box, prompt_box, lean_box], |
| ) |
|
|
| gr.HTML('<footer class="lab-footer">© 2026 NorthernTribe Research</footer>') |
|
|
|
|
| demo.queue(default_concurrency_limit=2) |
|
|
| APP_THEME = gr.themes.Default(primary_hue="gray", secondary_hue="gray", neutral_hue="gray") |
|
|
|
|
| if __name__ == "__main__": |
| demo.launch( |
| theme=APP_THEME, |
| css=UI_CSS, |
| footer_links=[], |
| ) |
|
|