NorthernTribe-Research's picture
Upgrade UI to advanced minimalist tactical design; remove default footer links.
8669227 verified
#!/usr/bin/env python3
"""Lean+AI conjecture analysis Space app with explicit work traces."""
from __future__ import annotations
import datetime as dt
import heapq
import os
import re
import textwrap
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
import gradio as gr
from datasets import load_dataset
from huggingface_hub import InferenceClient, hf_hub_download
DEFAULT_DATASET_REPO = "NorthernTribe-Research/math-conjecture-training-corpus"
DEFAULT_MODEL_REPO = "NorthernTribe-Research/math-conjecture-model"
DEFAULT_FALLBACK_MODEL = "Qwen/Qwen2.5-Math-7B-Instruct"
WORKSPACE = Path(__file__).resolve().parent / "workspace"
DATA_CACHE = WORKSPACE / "data"
DEFAULT_CONJECTURE = (
"Erdos-style Sidon set conjecture: every finite Sidon set can be extended "
"to a finite perfect difference set."
)
UI_CSS = r"""
:root {
--bg-0: #07090b;
--bg-1: #0d1115;
--bg-2: #13181d;
--panel: rgba(17, 21, 26, 0.92);
--panel-strong: rgba(22, 27, 34, 0.98);
--panel-soft: rgba(12, 16, 20, 0.9);
--line: rgba(157, 176, 193, 0.16);
--line-strong: rgba(193, 210, 223, 0.28);
--text: #eff4f8;
--muted: #94a2af;
--accent: #c6d1db;
--accent-soft: rgba(198, 209, 219, 0.14);
--success: #95c7ad;
--warn: #c8b27f;
--danger: #d39c9c;
--shadow: 0 26px 70px rgba(0, 0, 0, 0.36);
--radius-lg: 22px;
--radius-md: 16px;
--radius-sm: 12px;
--pill: 999px;
--transition: 180ms ease;
}
body, .gradio-container {
color: var(--text) !important;
background:
radial-gradient(circle at 14% 8%, rgba(198, 209, 219, 0.08), rgba(198, 209, 219, 0) 32%),
radial-gradient(circle at 88% 4%, rgba(157, 176, 193, 0.06), rgba(157, 176, 193, 0) 28%),
linear-gradient(160deg, var(--bg-0) 0%, var(--bg-1) 52%, var(--bg-2) 100%) !important;
}
.gradio-container {
font-family: "IBM Plex Sans", "Segoe UI", "Aptos", sans-serif !important;
--color-accent: var(--accent) !important;
--color-accent-soft: var(--accent-soft) !important;
--color-accent-emphasis: var(--accent) !important;
--button-primary-background-fill: linear-gradient(135deg, rgba(40, 46, 53, 0.98), rgba(24, 30, 37, 0.98)) !important;
--button-primary-background-fill-hover: linear-gradient(135deg, rgba(53, 60, 68, 0.98), rgba(31, 37, 44, 0.98)) !important;
--button-primary-border-color: rgba(201, 214, 225, 0.34) !important;
--button-primary-text-color: var(--text) !important;
}
.gradio-container .main {
max-width: 1300px !important;
padding-top: 18px !important;
padding-bottom: 28px !important;
}
.gradio-container .block {
background: transparent !important;
border: 0 !important;
box-shadow: none !important;
}
.hero {
position: relative;
overflow: hidden;
border: 1px solid var(--line-strong);
border-radius: var(--radius-lg);
padding: clamp(20px, 3vw, 28px);
background:
radial-gradient(circle at 88% 12%, rgba(198, 209, 219, 0.08), transparent 30%),
radial-gradient(circle at 6% 0%, rgba(157, 176, 193, 0.06), transparent 24%),
linear-gradient(160deg, rgba(23, 28, 34, 0.98), rgba(12, 16, 20, 0.98));
box-shadow: var(--shadow);
}
.hero-grid {
position: relative;
z-index: 1;
display: grid;
grid-template-columns: minmax(0, 1.45fr) minmax(250px, 0.9fr);
gap: 14px;
align-items: stretch;
}
.hero-copy {
min-width: 0;
}
.hero-side {
display: grid;
gap: 10px;
}
.hero-panel {
border: 1px solid rgba(201, 214, 225, 0.2);
border-radius: var(--radius-md);
padding: 12px;
background:
linear-gradient(180deg, rgba(20, 25, 31, 0.96), rgba(11, 14, 18, 0.98));
box-shadow:
inset 0 1px 0 rgba(255, 255, 255, 0.04),
0 10px 24px rgba(0, 0, 0, 0.22);
}
.hero-panel-label,
.hero-kicker,
.hero-topline {
text-transform: uppercase;
letter-spacing: 0.18em;
}
.hero-topline {
position: relative;
z-index: 1;
margin-bottom: 12px;
color: var(--muted);
font-size: 0.68rem;
}
.hero-kicker {
margin: 0 0 8px 0;
color: var(--accent);
font-size: 0.68rem;
}
.hero-panel-label {
color: var(--accent);
font-size: 0.64rem;
}
.hero-panel-value {
margin-top: 5px;
font-size: 1.12rem;
font-weight: 700;
letter-spacing: 0.04em;
}
.hero-panel-copy {
margin-top: 6px;
color: var(--muted);
line-height: 1.45;
font-size: 0.86rem;
}
.hero::before {
content: "";
position: absolute;
inset: 0;
background:
linear-gradient(90deg, rgba(198, 209, 219, 0.03) 1px, transparent 1px),
linear-gradient(rgba(198, 209, 219, 0.03) 1px, transparent 1px);
background-size: 48px 48px;
mask-image: linear-gradient(180deg, rgba(0, 0, 0, 0.9), transparent 88%);
pointer-events: none;
}
.hero::after {
content: "";
position: absolute;
inset: 14px;
border: 1px solid rgba(201, 214, 225, 0.12);
border-radius: calc(var(--radius-lg) - 8px);
pointer-events: none;
}
.hero h1 {
margin: 0 0 8px 0;
font-size: clamp(1.9rem, 4vw, 3.1rem);
letter-spacing: 0.14em;
text-transform: uppercase;
line-height: 1.05;
}
.hero p {
margin: 0;
color: var(--muted);
max-width: 72ch;
line-height: 1.6;
}
.chip-row {
margin-top: 14px;
display: flex;
flex-wrap: wrap;
gap: 8px;
}
.chip-row span {
border: 1px solid rgba(201, 214, 225, 0.22);
border-radius: var(--pill);
padding: 6px 12px;
font-size: 0.7rem;
text-transform: uppercase;
letter-spacing: 0.18em;
color: var(--accent);
background: rgba(12, 16, 20, 0.76);
box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.04);
}
.section-label {
display: flex;
align-items: center;
gap: 10px;
margin: 22px 0 10px;
color: var(--accent);
text-transform: uppercase;
letter-spacing: 0.22em;
font-size: 0.72rem;
}
.section-label::before {
content: "";
width: 28px;
height: 1px;
background: linear-gradient(90deg, var(--accent), transparent);
}
.control-row,
.output-row {
gap: 14px !important;
align-items: stretch !important;
}
.control-row > div,
.output-row > div {
min-width: 0;
}
.tactical-card,
.status-card,
.report-card,
.evidence-card,
.trace-card,
.prompt-card,
.lean-card {
position: relative;
border: 1px solid var(--line);
border-radius: var(--radius-md);
background:
linear-gradient(180deg, rgba(25, 31, 38, 0.94), rgba(14, 18, 22, 0.96));
box-shadow: 0 12px 40px rgba(0, 0, 0, 0.18);
transition:
transform var(--transition),
border-color var(--transition),
box-shadow var(--transition),
background var(--transition);
}
.tactical-card:hover,
.status-card:hover,
.report-card:hover,
.evidence-card:hover,
.trace-card:hover,
.prompt-card:hover,
.lean-card:hover {
border-color: rgba(201, 214, 225, 0.28);
box-shadow: 0 16px 48px rgba(0, 0, 0, 0.24);
transform: translateY(-1px);
}
.status-card {
background:
linear-gradient(180deg, rgba(28, 36, 44, 0.95), rgba(13, 17, 21, 0.97));
}
.status-card :is(h1, h2, h3, h4, p, li, code),
.report-card :is(h1, h2, h3, h4, p, li, code),
.trace-card :is(h1, h2, h3, h4, p, li, code),
.prompt-card :is(h1, h2, h3, h4, p, li, code),
.lean-card :is(h1, h2, h3, h4, p, li, code),
.evidence-card :is(h1, h2, h3, h4, p, li, code) {
color: var(--text) !important;
}
.status-card :is(strong, b),
.report-card :is(strong, b),
.trace-card :is(strong, b),
.prompt-card :is(strong, b),
.lean-card :is(strong, b),
.evidence-card :is(strong, b) {
color: var(--accent) !important;
}
.status-card a,
.report-card a,
.trace-card a,
.prompt-card a,
.lean-card a,
.evidence-card a {
color: var(--accent);
text-decoration-color: rgba(198, 209, 219, 0.34);
}
.status-card .markdown,
.report-card .markdown {
margin-bottom: 0;
}
.status-card .markdown h3,
.report-card .markdown h3 {
margin-top: 0;
text-transform: uppercase;
letter-spacing: 0.1em;
}
.status-card code,
.report-card code,
.trace-card code,
.prompt-card code,
.lean-card code,
.evidence-card code {
background: rgba(255, 255, 255, 0.04);
border: 1px solid rgba(201, 214, 225, 0.12);
border-radius: 8px;
padding: 0.08rem 0.34rem;
}
.gradio-container .wrap:has(textarea),
.gradio-container .wrap:has(input[type="text"]),
.gradio-container .wrap:has(input[type="range"]),
.gradio-container .wrap:has(fieldset) {
transition:
transform var(--transition),
border-color var(--transition),
box-shadow var(--transition);
}
.gradio-container textarea,
.gradio-container input[type="text"],
.gradio-container input[type="number"],
.gradio-container input[type="range"],
.gradio-container fieldset,
.gradio-container .prose,
.gradio-container .markdown,
.gradio-container .output-class {
color: var(--text) !important;
}
.gradio-container textarea,
.gradio-container input[type="text"],
.gradio-container input[type="number"] {
background: rgba(9, 12, 15, 0.88) !important;
border: 1px solid rgba(201, 214, 225, 0.16) !important;
border-radius: var(--radius-sm) !important;
box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.03) !important;
}
.gradio-container textarea:focus,
.gradio-container input[type="text"]:focus,
.gradio-container input[type="number"]:focus {
border-color: rgba(201, 214, 225, 0.32) !important;
box-shadow: 0 0 0 1px rgba(198, 209, 219, 0.16) !important;
}
.gradio-container fieldset {
background: rgba(10, 13, 16, 0.7) !important;
border: 1px solid rgba(201, 214, 225, 0.14) !important;
border-radius: var(--radius-sm) !important;
}
.gradio-container .gr-button,
.gradio-container button.primary,
.gradio-container .gr-button-primary,
.gradio-container [data-testid="button"][variant="primary"] {
border-radius: var(--pill) !important;
padding: 0.8rem 1.1rem !important;
transition:
transform var(--transition),
box-shadow var(--transition),
border-color var(--transition),
background var(--transition);
}
.gradio-container button.primary,
.gradio-container .gr-button-primary,
.gradio-container [data-testid="button"][variant="primary"] {
background: linear-gradient(135deg, rgba(49, 56, 64, 0.98), rgba(26, 31, 38, 0.98)) !important;
border: 1px solid rgba(201, 214, 225, 0.38) !important;
color: var(--text) !important;
box-shadow: 0 10px 24px rgba(0, 0, 0, 0.2);
}
.gradio-container button.primary:hover,
.gradio-container .gr-button-primary:hover,
.gradio-container [data-testid="button"][variant="primary"]:hover {
transform: translateY(-1px);
border-color: rgba(201, 214, 225, 0.46) !important;
background: linear-gradient(135deg, rgba(61, 69, 79, 0.98), rgba(31, 37, 45, 0.98)) !important;
}
.gradio-container button.primary:active,
.gradio-container .gr-button-primary:active,
.gradio-container [data-testid="button"][variant="primary"]:active {
transform: translateY(0);
}
.status-card {
box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.02), 0 10px 30px rgba(0, 0, 0, 0.16);
}
.status-card .markdown ul {
margin-bottom: 0;
}
.status-card .markdown li {
margin: 0.16rem 0;
}
.evidence-card table {
background: transparent !important;
}
.evidence-card th,
.evidence-card td {
border-color: rgba(201, 214, 225, 0.12) !important;
}
.evidence-card thead th {
background: rgba(255, 255, 255, 0.03) !important;
color: var(--accent) !important;
text-transform: uppercase;
letter-spacing: 0.08em;
}
.evidence-card tbody tr:hover {
background: rgba(255, 255, 255, 0.02) !important;
}
.trace-card textarea,
.prompt-card textarea,
.lean-card textarea {
min-height: 240px !important;
}
.gradio-container footer:not(.lab-footer) {
display: none !important;
}
.lab-footer {
margin-top: 10px;
text-align: center;
color: var(--muted);
font-size: 0.82rem;
letter-spacing: 0.14em;
text-transform: uppercase;
}
::-webkit-scrollbar {
width: 10px;
height: 10px;
}
::-webkit-scrollbar-track {
background: rgba(255, 255, 255, 0.03);
}
::-webkit-scrollbar-thumb {
background: rgba(201, 214, 225, 0.18);
border-radius: 999px;
}
@media (max-width: 900px) {
.gradio-container .main {
padding-left: 10px !important;
padding-right: 10px !important;
}
.hero {
padding: 18px;
}
.hero-grid {
grid-template-columns: 1fr;
gap: 10px;
}
.control-row,
.output-row {
flex-direction: column !important;
}
.chip-row span {
font-size: 0.66rem;
letter-spacing: 0.14em;
}
.gradio-container .gr-button,
.gradio-container button.primary,
.gradio-container .gr-button-primary,
.gradio-container [data-testid="button"][variant="primary"] {
width: 100% !important;
}
}
@media (max-width: 640px) {
.hero h1 {
letter-spacing: 0.09em;
}
.hero-topline,
.hero-kicker {
letter-spacing: 0.14em;
}
.hero p {
font-size: 0.96rem;
}
.section-label {
font-size: 0.66rem;
letter-spacing: 0.18em;
}
.trace-card textarea,
.prompt-card textarea,
.lean-card textarea {
min-height: 200px !important;
}
}
"""
def now_iso() -> str:
return dt.datetime.now(dt.timezone.utc).replace(microsecond=0).isoformat()
def ensure_workspace() -> None:
DATA_CACHE.mkdir(parents=True, exist_ok=True)
def auth_token() -> Optional[str]:
token = (os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or "").strip()
return token or None
def tokenize(text: str) -> List[str]:
return re.findall(r"[a-zA-Z0-9_]{3,}", (text or "").lower())
def clip(text: str, max_len: int) -> str:
s = (text or "").strip()
if len(s) <= max_len:
return s
return s[: max_len - 3] + "..."
def score_example(query_terms: Sequence[str], row: Dict[str, Any]) -> float:
text = " ".join(
str(row.get(k, "")) for k in ("prompt", "target", "task_type", "family", "source_dataset")
).lower()
terms = set(tokenize(text))
if not terms:
return 0.0
overlap = len(set(query_terms) & terms)
if overlap == 0:
return 0.0
density = overlap / max(1, len(query_terms))
return overlap + density
def iter_rows_from_split(parquet_path: Path, limit: int) -> Iterable[Dict[str, Any]]:
dataset = load_dataset("parquet", data_files={"rows": str(parquet_path)}, split="rows")
capped = min(limit, len(dataset))
for idx in range(capped):
yield dataset[idx]
def retrieve_evidence(
*,
dataset_repo_id: str,
query: str,
token: Optional[str],
rows_to_scan: int,
top_k: int,
) -> Tuple[List[Dict[str, Any]], List[str]]:
ensure_workspace()
logs: List[str] = []
logs.append(f"[{now_iso()}] Evidence retrieval started for repo: {dataset_repo_id}")
query_terms = tokenize(query)
if not query_terms:
query_terms = tokenize(DEFAULT_CONJECTURE)
heap: List[Tuple[float, int, Dict[str, Any]]] = []
total_scanned = 0
for split in ("validation", "test"):
cached = hf_hub_download(
repo_id=dataset_repo_id,
repo_type="dataset",
filename=f"{split}.parquet",
token=token,
)
logs.append(f"[{now_iso()}] Downloaded {split}.parquet to cache")
split_quota = max(1, rows_to_scan // 2)
for idx, row in enumerate(iter_rows_from_split(Path(cached), split_quota)):
total_scanned += 1
score = score_example(query_terms, row)
if score <= 0:
continue
packed = {
"split": split,
"row_index": idx,
"score": round(score, 4),
"family": str(row.get("family") or ""),
"task_type": str(row.get("task_type") or ""),
"source_dataset": str(row.get("source_dataset") or ""),
"prompt": clip(str(row.get("prompt") or ""), 320),
"target": clip(str(row.get("target") or ""), 420),
}
if len(heap) < top_k:
heapq.heappush(heap, (score, idx, packed))
elif score > heap[0][0]:
heapq.heapreplace(heap, (score, idx, packed))
best = [item[2] for item in sorted(heap, key=lambda x: (-x[0], x[1]))]
logs.append(f"[{now_iso()}] Scanned rows: {total_scanned}; retained evidence: {len(best)}")
return best, logs
def build_prompt(
*,
conjecture: str,
goal_mode: str,
response_level: str,
evidence_rows: Sequence[Dict[str, Any]],
always_show_work: bool,
) -> str:
evidence_lines: List[str] = []
for idx, row in enumerate(evidence_rows, start=1):
evidence_lines.append(
textwrap.dedent(
f"""\
Evidence {idx}
- split: {row.get("split")}
- family: {row.get("family")}
- task_type: {row.get("task_type")}
- source_dataset: {row.get("source_dataset")}
- prompt: {row.get("prompt")}
- target_snippet: {row.get("target")}
"""
).strip()
)
work_policy = (
"Always show your full step-by-step work, assumptions, candidate constructions, "
"and verification plan. Do not hide intermediate reasoning."
if always_show_work
else "Provide concise reasoning."
)
mode = (response_level or "").strip() or "Progressive (simple to Lean)"
if mode == "Simple intuition":
depth_policy = (
"Use beginner-friendly language, define uncommon terms briefly, and keep equations minimal."
)
output_format = textwrap.dedent(
"""\
1) Claim status (supported / refuted / unknown)
2) Plain-language intuition
3) Short solution path (3-6 steps)
4) Next concrete check
5) Risks and uncertainty
"""
).strip()
elif mode == "Advanced technical":
depth_policy = (
"Use rigorous notation, state assumptions explicitly, and separate proven facts from conjectural steps."
)
output_format = textwrap.dedent(
"""\
1) Claim status (supported / refuted / unknown)
2) Technical derivation with key lemmas
3) Candidate proof/counterexample architecture
4) Lean verification plan
5) Risks and uncertainty
"""
).strip()
elif mode == "Lean formalization":
depth_policy = (
"Prioritize formal structure: theorem statement, lemma decomposition, and Lean-oriented tactic plan."
)
output_format = textwrap.dedent(
"""\
1) Claim status (supported / refuted / unknown)
2) Formal assumptions and theorem shape
3) Lean-style theorem/lemma skeletons
4) Tactic-level verification plan
5) Risks and uncertainty
"""
).strip()
elif mode == "Standard proof sketch":
depth_policy = (
"Balance readability with rigor: concise structure, equations where needed, and explicit caveats."
)
output_format = textwrap.dedent(
"""\
1) Claim status (supported / refuted / unknown)
2) Structured proof sketch
3) Candidate proof or counterexample strategy
4) Lean verification plan
5) Risks and uncertainty
"""
).strip()
else:
mode = "Progressive (simple to Lean)"
depth_policy = (
"Respond in layers: start simple, then increase rigor, then end with Lean-oriented formalization."
)
output_format = textwrap.dedent(
"""\
1) Claim status (supported / refuted / unknown)
2) Simple intuition layer
3) Intermediate derivation layer
4) Advanced technical layer
5) Lean verification plan
6) Risks and uncertainty
"""
).strip()
return textwrap.dedent(
f"""\
You are NorthernTribe Research's Lean+AI conjecture assistant.
Goal mode: {goal_mode}
Response depth mode: {mode}
Depth policy: {depth_policy}
{work_policy}
Conjecture:
{conjecture}
Use the evidence below from our conjecture training corpus.
If the evidence is insufficient, say so explicitly and propose the next formal checks.
{chr(10).join(evidence_lines) if evidence_lines else "No evidence retrieved."}
Output format:
{output_format}
"""
).strip()
def infer_with_repo(
*,
repo_id: str,
prompt: str,
token: Optional[str],
max_new_tokens: int,
temperature: float,
) -> str:
client = InferenceClient(model=repo_id, token=token)
try:
return str(
client.text_generation(
prompt,
max_new_tokens=max_new_tokens,
temperature=temperature,
return_full_text=False,
do_sample=temperature > 0,
)
)
except Exception:
# Try chat-style fallback if provider only exposes chat completion.
response = client.chat.completions.create(
model=repo_id,
messages=[
{"role": "system", "content": "You are a mathematical conjecture assistant."},
{"role": "user", "content": prompt},
],
max_tokens=max_new_tokens,
temperature=temperature,
)
return str(response.choices[0].message.content or "")
def known_reference_hint(conjecture: str) -> str:
lower = conjecture.lower()
if "sidon" in lower and "perfect difference" in lower:
return (
"Reference note: arXiv:2510.19804 (v2, Jan 16, 2026) reports a counterexample "
"to the Sidon extension conjecture, including Lean artifacts."
)
return "No direct literature hint matched from built-in references."
def build_lean_stub(conjecture: str, status: str) -> str:
sanitized = clip(conjecture.replace("\n", " "), 220)
theorem_name = "conjecture_attempt"
if "sidon" in conjecture.lower():
theorem_name = "sidon_extension_attempt"
return textwrap.dedent(
f"""\
/- Auto-generated Lean sketch from Conjecture Lean+AI Lab -/
import Mathlib
namespace ConjectureLab
/-- Conjecture statement (natural language):
{sanitized}
-/
theorem {theorem_name} : Prop := by
-- Status from model attempt: {status}
-- TODO:
-- 1) Encode finite set / Sidon / perfect difference definitions.
-- 2) Formalize candidate construction or counterexample.
-- 3) Prove theorem or derive contradiction.
trivial
end ConjectureLab
"""
)
def analyze_conjecture(
conjecture: str,
goal_mode: str,
response_level: str,
dataset_repo_id: str,
model_repo_id: str,
fallback_model_id: str,
rows_to_scan: int,
top_k: int,
temperature: float,
max_new_tokens: int,
always_show_work: bool,
) -> Tuple[str, str, List[Dict[str, Any]], str, str, str]:
work_log: List[str] = []
work_log.append(f"[{now_iso()}] Analysis requested")
work_log.append(f"[{now_iso()}] Goal mode: {goal_mode}")
work_log.append(f"[{now_iso()}] Response level: {response_level}")
work_log.append(f"[{now_iso()}] Dataset repo: {dataset_repo_id}")
work_log.append(f"[{now_iso()}] Preferred model repo: {model_repo_id}")
token = auth_token()
if token:
work_log.append(f"[{now_iso()}] HF token source detected from runtime secrets")
else:
work_log.append(f"[{now_iso()}] No HF token found; proceeding with public-access paths only")
try:
evidence, retrieval_log = retrieve_evidence(
dataset_repo_id=dataset_repo_id,
query=conjecture,
token=token,
rows_to_scan=max(100, int(rows_to_scan)),
top_k=max(1, int(top_k)),
)
work_log.extend(retrieval_log)
except Exception as exc:
evidence = []
work_log.append(f"[{now_iso()}] Evidence retrieval error: {type(exc).__name__}: {exc}")
prompt = build_prompt(
conjecture=conjecture or DEFAULT_CONJECTURE,
goal_mode=goal_mode,
response_level=response_level,
evidence_rows=evidence,
always_show_work=always_show_work,
)
model_used = model_repo_id
answer = ""
try:
answer = infer_with_repo(
repo_id=model_repo_id,
prompt=prompt,
token=token,
max_new_tokens=int(max_new_tokens),
temperature=float(temperature),
)
work_log.append(f"[{now_iso()}] Inference succeeded using preferred model repo")
except Exception as exc:
work_log.append(
f"[{now_iso()}] Preferred model inference failed ({type(exc).__name__}). "
f"Falling back to {fallback_model_id}."
)
model_used = fallback_model_id
try:
answer = infer_with_repo(
repo_id=fallback_model_id,
prompt=prompt,
token=token,
max_new_tokens=int(max_new_tokens),
temperature=float(temperature),
)
work_log.append(f"[{now_iso()}] Fallback model inference succeeded")
except Exception as inner_exc:
work_log.append(f"[{now_iso()}] Fallback inference failed: {type(inner_exc).__name__}: {inner_exc}")
answer = (
"Model inference unavailable. Based on known literature context, "
"this conjecture may require a counterexample-driven disproof workflow. "
"Use retrieved evidence plus Lean formalization to validate next steps."
)
reference_hint = known_reference_hint(conjecture)
work_log.append(f"[{now_iso()}] {reference_hint}")
status = "unknown"
lowered = answer.lower()
if "refuted" in lowered or "counterexample" in lowered:
status = "refuted_candidate"
elif "proved" in lowered or "supported" in lowered:
status = "supported_candidate"
lean_stub = build_lean_stub(conjecture, status=status)
summary = (
f"### Conjecture Analysis Status\n\n"
f"- `status`: **{status}**\n"
f"- `model_used`: `{model_used}`\n"
f"- `evidence_rows`: `{len(evidence)}`\n"
f"- `timestamp`: `{now_iso()}`\n"
)
if always_show_work:
answer = (
f"### Model Attempt\n\n{answer.strip()}\n\n"
f"### Literature Hint\n\n{reference_hint}\n\n"
f"### Explicit Work Policy\n\n"
f"This run was configured to show full intermediate work in the UI."
)
return (
summary,
answer.strip(),
evidence,
"\n".join(work_log),
prompt,
lean_stub,
)
with gr.Blocks(title="Conjecture Lean+AI Lab") as demo:
gr.HTML(
"""
<section class="hero">
<div class="hero-topline">NorthernTribe Research / Conjecture Operations Deck</div>
<div class="hero-grid">
<div class="hero-copy">
<div class="hero-kicker">Lean + Evidence + Model Analysis</div>
<h1>Conjecture Lean+AI Lab</h1>
<p>
Tactical control surface for conjecture triage, evidence recall, and Lean-ready verification plans.
Every run remains explicit, traceable, and policy-aware.
</p>
<div class="chip-row">
<span>Mission Control</span>
<span>Dataset-Grounded</span>
<span>Lean-Oriented</span>
<span>Trace Complete</span>
</div>
</div>
<div class="hero-side">
<div class="hero-panel">
<div class="hero-panel-label">Mission</div>
<div class="hero-panel-value">Conjecture Triage</div>
<div class="hero-panel-copy">Structured retrieval, reasoning synthesis, and formal-verification handoff.</div>
</div>
<div class="hero-panel">
<div class="hero-panel-label">Posture</div>
<div class="hero-panel-value">Minimal Tactical</div>
<div class="hero-panel-copy">Low-noise visuals, compact controls, and high-signal telemetry outputs.</div>
</div>
</div>
</div>
</section>
"""
)
gr.HTML('<div class="section-label">Mission Inputs</div>')
with gr.Row(elem_classes=["control-row", "tactical-row"]):
dataset_repo_id = gr.Textbox(label="Dataset Repo", value=DEFAULT_DATASET_REPO, elem_classes="tactical-card")
model_repo_id = gr.Textbox(
label="Preferred Model Repo",
value=DEFAULT_MODEL_REPO,
elem_classes="tactical-card",
)
fallback_model_id = gr.Textbox(
label="Fallback Base Model",
value=DEFAULT_FALLBACK_MODEL,
elem_classes="tactical-card",
)
conjecture_text = gr.Textbox(
label="Conjecture",
lines=5,
value=DEFAULT_CONJECTURE,
elem_classes="tactical-card",
)
gr.HTML('<div class="section-label">Analysis Settings</div>')
with gr.Row(elem_classes=["control-row", "tactical-row"]):
goal_mode = gr.Radio(
label="Goal Mode",
choices=[
"Try to disprove (counterexample search)",
"Try to support (proof sketch search)",
"Balanced analysis (both directions)",
],
value="Balanced analysis (both directions)",
elem_classes="tactical-card",
)
response_level = gr.Radio(
label="Response Depth",
choices=[
"Progressive (simple to Lean)",
"Simple intuition",
"Standard proof sketch",
"Advanced technical",
"Lean formalization",
],
value="Progressive (simple to Lean)",
elem_classes="tactical-card",
)
always_show_work = gr.Checkbox(
label="Always Show Full Work",
value=True,
elem_classes="tactical-card",
)
with gr.Row(elem_classes=["control-row", "tactical-row"]):
rows_to_scan = gr.Slider(
label="Rows to Scan",
minimum=200,
maximum=6000,
step=100,
value=1800,
elem_classes="tactical-card",
)
top_k = gr.Slider(
label="Top Evidence Rows",
minimum=3,
maximum=25,
step=1,
value=8,
elem_classes="tactical-card",
)
max_new_tokens = gr.Slider(
label="Max New Tokens",
minimum=200,
maximum=2000,
step=50,
value=900,
elem_classes="tactical-card",
)
temperature = gr.Slider(
label="Temperature",
minimum=0.0,
maximum=1.2,
step=0.05,
value=0.2,
elem_classes="tactical-card",
)
run_btn = gr.Button("Run Conjecture Analysis", variant="primary", elem_classes="tactical-card")
gr.HTML('<div class="section-label">Telemetry</div>')
status_md = gr.Markdown(elem_classes="status-card")
answer_md = gr.Markdown(elem_classes="report-card")
evidence_df = gr.Dataframe(
label="Retrieved Evidence (from dataset)",
wrap=True,
interactive=False,
elem_classes="evidence-card",
)
work_log_box = gr.Textbox(
label="Work Log (always visible)",
lines=18,
max_lines=30,
interactive=False,
elem_classes="trace-card",
)
prompt_box = gr.Textbox(
label="Prompt Sent to Model",
lines=14,
max_lines=22,
interactive=False,
elem_classes="prompt-card",
)
lean_box = gr.Textbox(
label="Lean Verification Stub",
lines=12,
max_lines=22,
interactive=False,
elem_classes="lean-card",
)
run_btn.click(
fn=analyze_conjecture,
inputs=[
conjecture_text,
goal_mode,
response_level,
dataset_repo_id,
model_repo_id,
fallback_model_id,
rows_to_scan,
top_k,
temperature,
max_new_tokens,
always_show_work,
],
outputs=[status_md, answer_md, evidence_df, work_log_box, prompt_box, lean_box],
)
gr.HTML('<footer class="lab-footer">© 2026 NorthernTribe Research</footer>')
demo.queue(default_concurrency_limit=2)
APP_THEME = gr.themes.Default(primary_hue="gray", secondary_hue="gray", neutral_hue="gray")
if __name__ == "__main__":
demo.launch(
theme=APP_THEME,
css=UI_CSS,
footer_links=[],
)