Spaces:

NorthernTribe-Research
/

math_conjecture_lean_ai_lab

Running

App Files Files Community

NorthernTribe-Research commited on 3 days ago

Commit

b628dab

verified ·

1 Parent(s): 2b52bbd

Deploy Conjecture Lean+AI Lab Space

Browse files

Files changed (3) hide show

README.md +42 -6
app.py +531 -0
requirements.txt +3 -0

README.md CHANGED Viewed

@@ -1,12 +1,48 @@
 ---
-title: Math Conjecture Lean Ai Lab
-emoji: ⚡
-colorFrom: purple
-colorTo: blue
 sdk: gradio
-sdk_version: 6.9.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Conjecture Lean+AI Lab
 sdk: gradio
+sdk_version: "6.6.0"
+python_version: "3.10"
 app_file: app.py
 pinned: false
+emoji: "🔬"
 ---
+# Conjecture Lean+AI Lab
+This Space is tailored for conjecture-focused math research workflows that
+combine:
+- AI-assisted reasoning and strategy generation
+- dataset-grounded retrieval from `NorthernTribe-Research/math-conjecture-training-corpus`
+- model execution using `NorthernTribe-Research/math-conjecture-model` (with fallback)
+- explicit work trace emission on every run
+- Lean-oriented verification stubs for formal follow-up
+## What this Space does
+1. Accepts a conjecture statement and analysis mode.
+2. Retrieves relevant examples from the dataset (validation/test splits).
+3. Builds a transparent prompt with evidence context.
+4. Runs model inference (preferred repo, then fallback).
+5. Displays:
+   - full work log,
+   - retrieved evidence table,
+   - exact prompt sent to the model,
+   - model answer with explicit work sections,
+   - generated Lean theorem skeleton for formal verification.
+## Runtime secrets
+Set at least one token secret in Space settings:
+- `HF_TOKEN` or `HUGGINGFACE_HUB_TOKEN`
+Optional:
+- `HF_USERNAME`
+## Notes
+- The app is intentionally designed for auditability and reproducibility.
+- Every run should expose intermediate work artifacts in the UI.

app.py ADDED Viewed

	@@ -0,0 +1,531 @@

+#!/usr/bin/env python3
+"""Lean+AI conjecture analysis Space app with explicit work traces."""
+from __future__ import annotations
+import datetime as dt
+import heapq
+import os
+import re
+import textwrap
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
+import gradio as gr
+from datasets import load_dataset
+from huggingface_hub import InferenceClient, hf_hub_download
+DEFAULT_DATASET_REPO = "NorthernTribe-Research/math-conjecture-training-corpus"
+DEFAULT_MODEL_REPO = "NorthernTribe-Research/math-conjecture-model"
+DEFAULT_FALLBACK_MODEL = "deepseek-ai/deepseek-math-7b-instruct"
+WORKSPACE = Path(__file__).resolve().parent / "workspace"
+DATA_CACHE = WORKSPACE / "data"
+DEFAULT_CONJECTURE = (
+    "Erdos-style Sidon set conjecture: every finite Sidon set can be extended "
+    "to a finite perfect difference set."
+)
+UI_CSS = r"""
+:root {
+  --bg-0: #04101a;
+  --bg-1: #081826;
+  --panel: #0d2433;
+  --panel-2: #113044;
+  --border: rgba(170, 225, 250, 0.22);
+  --text: #eaf6ff;
+  --muted: #9ec0d4;
+  --cyan: #54d7ff;
+  --teal: #5df2c1;
+  --amber: #ffc56f;
+}
+body, .gradio-container {
+  color: var(--text) !important;
+  background:
+    radial-gradient(circle at 8% 0%, rgba(84, 215, 255, 0.18), rgba(84, 215, 255, 0) 34%),
+    radial-gradient(circle at 88% 0%, rgba(93, 242, 193, 0.14), rgba(93, 242, 193, 0) 30%),
+    linear-gradient(140deg, var(--bg-0) 0%, var(--bg-1) 100%) !important;
+}
+.gradio-container .main {
+  max-width: 1320px !important;
+}
+.block {
+  background: linear-gradient(180deg, rgba(15, 36, 51, 0.94), rgba(9, 27, 39, 0.95)) !important;
+  border: 1px solid var(--border) !important;
+  border-radius: 16px !important;
+}
+.hero {
+  border: 1px solid rgba(84, 215, 255, 0.34);
+  border-radius: 18px;
+  padding: 20px;
+  background:
+    radial-gradient(circle at 90% 10%, rgba(84, 215, 255, 0.15), transparent 36%),
+    radial-gradient(circle at 0% 0%, rgba(93, 242, 193, 0.14), transparent 30%),
+    linear-gradient(165deg, rgba(10, 30, 43, 0.98), rgba(7, 20, 31, 0.98));
+}
+.hero h1 {
+  margin: 0 0 8px 0;
+  letter-spacing: 0.02em;
+}
+.hero p {
+  margin: 0;
+  color: var(--muted);
+}
+.chip-row {
+  margin-top: 14px;
+  display: flex;
+  flex-wrap: wrap;
+  gap: 8px;
+}
+.chip-row span {
+  border: 1px solid rgba(84, 215, 255, 0.35);
+  border-radius: 999px;
+  padding: 4px 10px;
+  font-size: 0.7rem;
+  text-transform: uppercase;
+  letter-spacing: 0.12em;
+  color: var(--muted);
+  background: rgba(8, 24, 35, 0.95);
+}
+.status-ok {
+  color: var(--teal);
+}
+.status-warn {
+  color: var(--amber);
+}
+"""
+def now_iso() -> str:
+    return dt.datetime.now(dt.timezone.utc).replace(microsecond=0).isoformat()
+def ensure_workspace() -> None:
+    DATA_CACHE.mkdir(parents=True, exist_ok=True)
+def auth_token() -> Optional[str]:
+    token = (os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or "").strip()
+    return token or None
+def tokenize(text: str) -> List[str]:
+    return re.findall(r"[a-zA-Z0-9_]{3,}", (text or "").lower())
+def clip(text: str, max_len: int) -> str:
+    s = (text or "").strip()
+    if len(s) <= max_len:
+        return s
+    return s[: max_len - 3] + "..."
+def score_example(query_terms: Sequence[str], row: Dict[str, Any]) -> float:
+    text = " ".join(
+        str(row.get(k, "")) for k in ("prompt", "target", "task_type", "family", "source_dataset")
+    ).lower()
+    terms = set(tokenize(text))
+    if not terms:
+        return 0.0
+    overlap = len(set(query_terms) & terms)
+    if overlap == 0:
+        return 0.0
+    density = overlap / max(1, len(query_terms))
+    return overlap + density
+def iter_rows_from_split(parquet_path: Path, limit: int) -> Iterable[Dict[str, Any]]:
+    dataset = load_dataset("parquet", data_files={"rows": str(parquet_path)}, split="rows")
+    capped = min(limit, len(dataset))
+    for idx in range(capped):
+        yield dataset[idx]
+def retrieve_evidence(
+    *,
+    dataset_repo_id: str,
+    query: str,
+    token: Optional[str],
+    rows_to_scan: int,
+    top_k: int,
+) -> Tuple[List[Dict[str, Any]], List[str]]:
+    ensure_workspace()
+    logs: List[str] = []
+    logs.append(f"[{now_iso()}] Evidence retrieval started for repo: {dataset_repo_id}")
+    query_terms = tokenize(query)
+    if not query_terms:
+        query_terms = tokenize(DEFAULT_CONJECTURE)
+    heap: List[Tuple[float, int, Dict[str, Any]]] = []
+    total_scanned = 0
+    for split in ("validation", "test"):
+        cached = hf_hub_download(
+            repo_id=dataset_repo_id,
+            repo_type="dataset",
+            filename=f"{split}.parquet",
+            token=token,
+        )
+        logs.append(f"[{now_iso()}] Downloaded {split}.parquet to cache")
+        split_quota = max(1, rows_to_scan // 2)
+        for idx, row in enumerate(iter_rows_from_split(Path(cached), split_quota)):
+            total_scanned += 1
+            score = score_example(query_terms, row)
+            if score <= 0:
+                continue
+            packed = {
+                "split": split,
+                "row_index": idx,
+                "score": round(score, 4),
+                "family": str(row.get("family") or ""),
+                "task_type": str(row.get("task_type") or ""),
+                "source_dataset": str(row.get("source_dataset") or ""),
+                "prompt": clip(str(row.get("prompt") or ""), 320),
+                "target": clip(str(row.get("target") or ""), 420),
+            }
+            if len(heap) < top_k:
+                heapq.heappush(heap, (score, idx, packed))
+            elif score > heap[0][0]:
+                heapq.heapreplace(heap, (score, idx, packed))
+    best = [item[2] for item in sorted(heap, key=lambda x: (-x[0], x[1]))]
+    logs.append(f"[{now_iso()}] Scanned rows: {total_scanned}; retained evidence: {len(best)}")
+    return best, logs
+def build_prompt(
+    *,
+    conjecture: str,
+    goal_mode: str,
+    evidence_rows: Sequence[Dict[str, Any]],
+    always_show_work: bool,
+) -> str:
+    evidence_lines: List[str] = []
+    for idx, row in enumerate(evidence_rows, start=1):
+        evidence_lines.append(
+            textwrap.dedent(
+                f"""\
+                Evidence {idx}
+                - split: {row.get("split")}
+                - family: {row.get("family")}
+                - task_type: {row.get("task_type")}
+                - source_dataset: {row.get("source_dataset")}
+                - prompt: {row.get("prompt")}
+                - target_snippet: {row.get("target")}
+                """
+            ).strip()
+        )
+    work_policy = (
+        "Always show your full step-by-step work, assumptions, candidate constructions, "
+        "and verification plan. Do not hide intermediate reasoning."
+        if always_show_work
+        else "Provide concise reasoning."
+    )
+    return textwrap.dedent(
+        f"""\
+        You are NorthernTribe Research's Lean+AI conjecture assistant.
+        Goal mode: {goal_mode}
+        {work_policy}
+        Conjecture:
+        {conjecture}
+        Use the evidence below from our conjecture training corpus.
+        If the evidence is insufficient, say so explicitly and propose the next formal checks.
+        {chr(10).join(evidence_lines) if evidence_lines else "No evidence retrieved."}
+        Output format:
+        1) Claim status (supported / refuted / unknown)
+        2) Full work log
+        3) Candidate proof or counterexample strategy
+        4) Lean verification plan
+        5) Risks and uncertainty
+        """
+    ).strip()
+def infer_with_repo(
+    *,
+    repo_id: str,
+    prompt: str,
+    token: Optional[str],
+    max_new_tokens: int,
+    temperature: float,
+) -> str:
+    client = InferenceClient(model=repo_id, token=token)
+    try:
+        return str(
+            client.text_generation(
+                prompt,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                return_full_text=False,
+                do_sample=temperature > 0,
+            )
+        )
+    except Exception:
+        # Try chat-style fallback if provider only exposes chat completion.
+        response = client.chat.completions.create(
+            model=repo_id,
+            messages=[
+                {"role": "system", "content": "You are a mathematical conjecture assistant."},
+                {"role": "user", "content": prompt},
+            ],
+            max_tokens=max_new_tokens,
+            temperature=temperature,
+        )
+        return str(response.choices[0].message.content or "")
+def known_reference_hint(conjecture: str) -> str:
+    lower = conjecture.lower()
+    if "sidon" in lower and "perfect difference" in lower:
+        return (
+            "Reference note: arXiv:2510.19804 (v2, Jan 16, 2026) reports a counterexample "
+            "to the Sidon extension conjecture, including Lean artifacts."
+        )
+    return "No direct literature hint matched from built-in references."
+def build_lean_stub(conjecture: str, status: str) -> str:
+    sanitized = clip(conjecture.replace("\n", " "), 220)
+    theorem_name = "conjecture_attempt"
+    if "sidon" in conjecture.lower():
+        theorem_name = "sidon_extension_attempt"
+    return textwrap.dedent(
+        f"""\
+        /- Auto-generated Lean sketch from Conjecture Lean+AI Lab -/
+        import Mathlib
+        namespace ConjectureLab
+        /-- Conjecture statement (natural language):
+        {sanitized}
+        -/
+        theorem {theorem_name} : Prop := by
+          -- Status from model attempt: {status}
+          -- TODO:
+          -- 1) Encode finite set / Sidon / perfect difference definitions.
+          -- 2) Formalize candidate construction or counterexample.
+          -- 3) Prove theorem or derive contradiction.
+          trivial
+        end ConjectureLab
+        """
+    )
+def analyze_conjecture(
+    conjecture: str,
+    goal_mode: str,
+    dataset_repo_id: str,
+    model_repo_id: str,
+    fallback_model_id: str,
+    rows_to_scan: int,
+    top_k: int,
+    temperature: float,
+    max_new_tokens: int,
+    always_show_work: bool,
+) -> Tuple[str, str, List[Dict[str, Any]], str, str, str]:
+    work_log: List[str] = []
+    work_log.append(f"[{now_iso()}] Analysis requested")
+    work_log.append(f"[{now_iso()}] Goal mode: {goal_mode}")
+    work_log.append(f"[{now_iso()}] Dataset repo: {dataset_repo_id}")
+    work_log.append(f"[{now_iso()}] Preferred model repo: {model_repo_id}")
+    token = auth_token()
+    if token:
+        work_log.append(f"[{now_iso()}] HF token source detected from runtime secrets")
+    else:
+        work_log.append(f"[{now_iso()}] No HF token found; proceeding with public-access paths only")
+    try:
+        evidence, retrieval_log = retrieve_evidence(
+            dataset_repo_id=dataset_repo_id,
+            query=conjecture,
+            token=token,
+            rows_to_scan=max(100, int(rows_to_scan)),
+            top_k=max(1, int(top_k)),
+        )
+        work_log.extend(retrieval_log)
+    except Exception as exc:
+        evidence = []
+        work_log.append(f"[{now_iso()}] Evidence retrieval error: {type(exc).__name__}: {exc}")
+    prompt = build_prompt(
+        conjecture=conjecture or DEFAULT_CONJECTURE,
+        goal_mode=goal_mode,
+        evidence_rows=evidence,
+        always_show_work=always_show_work,
+    )
+    model_used = model_repo_id
+    answer = ""
+    try:
+        answer = infer_with_repo(
+            repo_id=model_repo_id,
+            prompt=prompt,
+            token=token,
+            max_new_tokens=int(max_new_tokens),
+            temperature=float(temperature),
+        )
+        work_log.append(f"[{now_iso()}] Inference succeeded using preferred model repo")
+    except Exception as exc:
+        work_log.append(
+            f"[{now_iso()}] Preferred model inference failed ({type(exc).__name__}). "
+            f"Falling back to {fallback_model_id}."
+        )
+        model_used = fallback_model_id
+        try:
+            answer = infer_with_repo(
+                repo_id=fallback_model_id,
+                prompt=prompt,
+                token=token,
+                max_new_tokens=int(max_new_tokens),
+                temperature=float(temperature),
+            )
+            work_log.append(f"[{now_iso()}] Fallback model inference succeeded")
+        except Exception as inner_exc:
+            work_log.append(f"[{now_iso()}] Fallback inference failed: {type(inner_exc).__name__}: {inner_exc}")
+            answer = (
+                "Model inference unavailable. Based on known literature context, "
+                "this conjecture may require a counterexample-driven disproof workflow. "
+                "Use retrieved evidence plus Lean formalization to validate next steps."
+            )
+    reference_hint = known_reference_hint(conjecture)
+    work_log.append(f"[{now_iso()}] {reference_hint}")
+    status = "unknown"
+    lowered = answer.lower()
+    if "refuted" in lowered or "counterexample" in lowered:
+        status = "refuted_candidate"
+    elif "proved" in lowered or "supported" in lowered:
+        status = "supported_candidate"
+    lean_stub = build_lean_stub(conjecture, status=status)
+    summary = (
+        f"### Conjecture Analysis Status\n\n"
+        f"- `status`: **{status}**\n"
+        f"- `model_used`: `{model_used}`\n"
+        f"- `evidence_rows`: `{len(evidence)}`\n"
+        f"- `timestamp`: `{now_iso()}`\n"
+    )
+    if always_show_work:
+        answer = (
+            f"### Model Attempt\n\n{answer.strip()}\n\n"
+            f"### Literature Hint\n\n{reference_hint}\n\n"
+            f"### Explicit Work Policy\n\n"
+            f"This run was configured to show full intermediate work in the UI."
+        )
+    return (
+        summary,
+        answer.strip(),
+        evidence,
+        "\n".join(work_log),
+        prompt,
+        lean_stub,
+    )
+with gr.Blocks(title="Conjecture Lean+AI Lab", css=UI_CSS) as demo:
+    gr.HTML(
+        """
+        <section class="hero">
+          <h1>Conjecture Lean+AI Lab</h1>
+          <p>
+            A dedicated Space for conjecture analysis using NorthernTribe datasets and models.
+            Every run emits an explicit work log, retrieved evidence, prompt trace, and Lean stub.
+          </p>
+          <div class="chip-row">
+            <span>Lean-Oriented</span>
+            <span>Dataset-Grounded</span>
+            <span>Model-Assisted</span>
+            <span>Full Work Trace</span>
+          </div>
+        </section>
+        """
+    )
+    with gr.Row():
+        dataset_repo_id = gr.Textbox(label="Dataset Repo", value=DEFAULT_DATASET_REPO)
+        model_repo_id = gr.Textbox(label="Preferred Model Repo", value=DEFAULT_MODEL_REPO)
+        fallback_model_id = gr.Textbox(label="Fallback Base Model", value=DEFAULT_FALLBACK_MODEL)
+    conjecture_text = gr.Textbox(
+        label="Conjecture",
+        lines=5,
+        value=DEFAULT_CONJECTURE,
+    )
+    with gr.Row():
+        goal_mode = gr.Radio(
+            label="Goal Mode",
+            choices=[
+                "Try to disprove (counterexample search)",
+                "Try to support (proof sketch search)",
+                "Balanced analysis (both directions)",
+            ],
+            value="Balanced analysis (both directions)",
+        )
+        always_show_work = gr.Checkbox(label="Always Show Full Work", value=True)
+    with gr.Row():
+        rows_to_scan = gr.Slider(label="Rows to Scan", minimum=200, maximum=6000, step=100, value=1800)
+        top_k = gr.Slider(label="Top Evidence Rows", minimum=3, maximum=25, step=1, value=8)
+        max_new_tokens = gr.Slider(label="Max New Tokens", minimum=200, maximum=2000, step=50, value=900)
+        temperature = gr.Slider(label="Temperature", minimum=0.0, maximum=1.2, step=0.05, value=0.2)
+    run_btn = gr.Button("Run Conjecture Analysis", variant="primary")
+    status_md = gr.Markdown()
+    answer_md = gr.Markdown()
+    evidence_df = gr.Dataframe(
+        label="Retrieved Evidence (from dataset)",
+        wrap=True,
+        interactive=False,
+    )
+    work_log_box = gr.Textbox(label="Work Log (always visible)", lines=18, max_lines=30, interactive=False)
+    prompt_box = gr.Textbox(label="Prompt Sent to Model", lines=14, max_lines=22, interactive=False)
+    lean_box = gr.Code(label="Lean Verification Stub", language="lean")
+    run_btn.click(
+        fn=analyze_conjecture,
+        inputs=[
+            conjecture_text,
+            goal_mode,
+            dataset_repo_id,
+            model_repo_id,
+            fallback_model_id,
+            rows_to_scan,
+            top_k,
+            temperature,
+            max_new_tokens,
+            always_show_work,
+        ],
+        outputs=[status_md, answer_md, evidence_df, work_log_box, prompt_box, lean_box],
+    )
+demo.queue(default_concurrency_limit=2)
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio>=6.6.0,<7
+huggingface_hub>=1.5.0
+datasets>=2.21.0,<3