Spaces:

NorthernTribe-Research
/

math_trainer

Running

File size: 50,733 Bytes

#!/usr/bin/env python3
"""Gradio app to run SOTA conjecture-model training on Hugging Face Space GPU."""

from __future__ import annotations

import datetime as dt
import html
import inspect
import json
import os
import re
import select
import shutil
import signal
import subprocess
import sys
import threading
import time
from pathlib import Path
from typing import Any, Dict, Generator, List, Optional, Tuple

import gradio as gr
import torch
import yaml
from huggingface_hub import hf_hub_download


ROOT = Path(__file__).resolve().parent
WORKSPACE_DIR = ROOT / "workspace"
DATA_DIR = WORKSPACE_DIR / "data" / "releases" / "v1"
RUNTIME_DIR = WORKSPACE_DIR / "runtime"
CONFIG_TEMPLATE = ROOT / "configs" / "deepseek_math_sota.yaml"
TRAIN_SCRIPT = ROOT / "scripts" / "train_sota.py"
EVAL_SCRIPT = ROOT / "scripts" / "eval_sota.py"
TRAIN_OUTPUT_DIR = WORKSPACE_DIR / "runs" / "math-conjecture-sota"
CREDENTIAL_FILE_CANDIDATES = [
    ROOT / "huggingface-api-key.json",
    ROOT.parent / "huggingface-api-key.json",
]

REPO_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]{0,95}/[A-Za-z0-9][A-Za-z0-9._-]{0,95}$")
STAGE_LOG_RE = re.compile(r"\[stage\s+(\d+)\]")
LOSS_LOG_RE = re.compile(r"(?:^|[\s{,'\"])(?:loss|train_loss)\s*[:=]\s*([-+]?\d+(?:\.\d+)?(?:[eE][-+]?\d+)?)")

RUN_STATE_LOCK = threading.Lock()
RUN_IN_PROGRESS = False
CANCEL_REQUESTED = False
ACTIVE_PROCESS: Optional[subprocess.Popen] = None
ACTIVE_RUN_LABEL = ""

TACTICAL_CSS = """
@import url("https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;500;600&family=Rajdhani:wght@500;600;700&display=swap");

:root {
  --ops-bg: #070707;
  --ops-bg-2: #0f0f0f;
  --ops-panel: #111111;
  --ops-panel-2: #161616;
  --ops-border: #2a2a2a;
  --ops-border-strong: #3d3d3d;
  --ops-text: #ececec;
  --ops-muted: #a8a8a8;
  --ops-bright: #ffffff;
}

.gradio-container {
  color: var(--ops-text) !important;
  background:
    linear-gradient(rgba(255, 255, 255, 0.02) 1px, transparent 1px),
    linear-gradient(90deg, rgba(255, 255, 255, 0.02) 1px, transparent 1px),
    radial-gradient(circle at 50% -10%, #1d1d1d 0%, #0f0f0f 38%, #070707 100%) !important;
  background-size: 26px 26px, 26px 26px, 100% 100% !important;
  font-family: "IBM Plex Mono", "JetBrains Mono", "Fira Code", monospace !important;
}

.gradio-container .prose h1,
.gradio-container .prose h2,
.gradio-container .prose h3,
.gradio-container .prose p,
.gradio-container .prose li,
.gradio-container .prose strong {
  color: var(--ops-text) !important;
}

.gradio-container .prose h1,
.gradio-container .prose h2 {
  font-family: "Rajdhani", "IBM Plex Mono", monospace !important;
  letter-spacing: 0.08em !important;
  text-transform: uppercase !important;
}

.gradio-container .prose code {
  color: var(--ops-bright) !important;
  background: #1b1b1b !important;
  border: 1px solid var(--ops-border) !important;
}

.gradio-container .block,
.gradio-container .form {
  background: linear-gradient(180deg, var(--ops-panel) 0%, var(--ops-panel-2) 100%) !important;
  border: 1px solid var(--ops-border) !important;
  box-shadow: inset 0 0 0 1px rgba(255, 255, 255, 0.03), 0 12px 28px rgba(0, 0, 0, 0.35) !important;
}

.gradio-container label span,
.gradio-container .block-info,
.gradio-container [data-testid="block-info"] {
  color: var(--ops-muted) !important;
  letter-spacing: 0.12em !important;
  text-transform: uppercase !important;
  font-size: 0.74rem !important;
}

.gradio-container input,
.gradio-container textarea {
  background: #0c0c0c !important;
  color: var(--ops-text) !important;
  border: 1px solid var(--ops-border-strong) !important;
  box-shadow: none !important;
  font-family: "IBM Plex Mono", "JetBrains Mono", monospace !important;
}

.gradio-container input::placeholder,
.gradio-container textarea::placeholder {
  color: #7f7f7f !important;
}

.gradio-container input:focus,
.gradio-container textarea:focus {
  border-color: #656565 !important;
  outline: none !important;
}

.gradio-container button {
  border: 1px solid #565656 !important;
  background: linear-gradient(180deg, #212121 0%, #151515 100%) !important;
  color: var(--ops-bright) !important;
  letter-spacing: 0.08em !important;
  text-transform: uppercase !important;
  font-family: "Rajdhani", "IBM Plex Mono", monospace !important;
}

.gradio-container button.primary,
.gradio-container button.stop,
.gradio-container button.secondary {
  background: linear-gradient(180deg, #2a2a2a 0%, #171717 100%) !important;
  border-color: #686868 !important;
  color: #f7f7f7 !important;
}

.gradio-container button:hover {
  filter: brightness(1.08);
}

.ops-header {
  border: 1px solid var(--ops-border);
  background: linear-gradient(180deg, #101010 0%, #0c0c0c 100%);
  padding: 12px 14px;
  margin: 2px 0 8px 0;
}

.ops-header-title {
  font-family: "Rajdhani", "IBM Plex Mono", monospace;
  letter-spacing: 0.16em;
  text-transform: uppercase;
  color: #f4f4f4;
  font-weight: 700;
  font-size: 1rem;
}

.ops-header-tags {
  margin-top: 8px;
  display: flex;
  flex-wrap: wrap;
  gap: 8px;
}

.ops-tag {
  border: 1px solid #474747;
  background: #181818;
  color: #d5d5d5;
  padding: 3px 7px;
  font-size: 0.72rem;
  letter-spacing: 0.12em;
  text-transform: uppercase;
}

.ops-visual {
  border: 1px solid var(--ops-border);
  background: linear-gradient(180deg, #101010 0%, #0b0b0b 100%);
  padding: 12px;
}

.ops-visual-head {
  display: flex;
  justify-content: space-between;
  align-items: center;
  margin-bottom: 10px;
  gap: 10px;
}

.ops-visual-title {
  font-family: "Rajdhani", "IBM Plex Mono", monospace;
  font-weight: 700;
  letter-spacing: 0.14em;
  text-transform: uppercase;
  color: #f1f1f1;
}

.ops-visual-sub {
  color: #9f9f9f;
  font-size: 0.78rem;
  letter-spacing: 0.08em;
  text-transform: uppercase;
}

.ops-grid {
  display: grid;
  grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
  gap: 10px;
}

.ops-card {
  border: 1px solid #323232;
  background: linear-gradient(180deg, #161616 0%, #101010 100%);
  padding: 9px;
  min-height: 72px;
}

.ops-k {
  color: #9a9a9a;
  font-size: 0.68rem;
  letter-spacing: 0.11em;
  text-transform: uppercase;
}

.ops-v {
  color: #f0f0f0;
  font-family: "Rajdhani", "IBM Plex Mono", monospace;
  font-size: 1.05rem;
  margin-top: 5px;
  letter-spacing: 0.05em;
}

.ops-v-small {
  color: #d1d1d1;
  font-size: 0.83rem;
  margin-top: 4px;
}

.ops-meter {
  margin-top: 8px;
  width: 100%;
  height: 8px;
  border: 1px solid #383838;
  background: #111111;
  position: relative;
  overflow: hidden;
}

.ops-meter-fill {
  position: absolute;
  left: 0;
  top: 0;
  bottom: 0;
  background: linear-gradient(90deg, #bdbdbd 0%, #f0f0f0 100%);
}

.ops-spark {
  margin-top: 8px;
  border: 1px solid #343434;
  background: #0e0e0e;
  padding: 3px;
}

.ops-spark svg {
  width: 100%;
  height: 74px;
  display: block;
}

.ops-foot {
  margin-top: 10px;
  color: #8f8f8f;
  font-size: 0.74rem;
  letter-spacing: 0.08em;
  text-transform: uppercase;
}

.gradio-container footer,
.gradio-container .built-with,
.gradio-container [data-testid="footer"] {
  display: none !important;
}

.nt-footer {
  margin-top: 12px;
  border: 1px solid #2f2f2f;
  background: linear-gradient(180deg, #111111 0%, #0b0b0b 100%);
  color: #bcbcbc;
  text-align: center;
  padding: 10px 12px;
  font-size: 0.74rem;
  letter-spacing: 0.08em;
  text-transform: uppercase;
}
"""

TACTICAL_HEADER_HTML = """
<div class="ops-header">
  <div class="ops-header-title">Maths Conjecture Solutions // Training Operations Console</div>
  <div class="ops-header-tags">
    <span class="ops-tag">Tactical Monochrome</span>
    <span class="ops-tag">Controlled Ops</span>
    <span class="ops-tag">Staged Curriculum</span>
    <span class="ops-tag">Live Telemetry</span>
  </div>
</div>
"""

TACTICAL_FOOTER_HTML = """
<div class="nt-footer">© 2026 NorthernTribe Research, Inc. All rights reserved.</div>
"""

PROJECT_DESCRIPTION = """
# Math Conjecture Trainer
This console runs the full training operations lane for the `maths-conjuncture-solutions` project:

An autonomous training operations console for DeepSeek-Math that runs multi-stage curriculum fine-tuning on Space GPU, executes post-training quality evaluation, and publishes only qualified adapters, checkpoints, and run reports to your Hugging Face model repository.

1. Pull released parquet splits from `NorthernTribe-Research/math-conjecture-training-corpus`.
2. Build runtime training configuration from `configs/deepseek_math_sota.yaml`.
3. Execute multi-stage DeepSeek-Math curriculum fine-tuning via `scripts/train_sota.py`.
4. Run post-training evaluation with pass@k-style sampling and family-level metrics.
5. Enforce autonomous quality gates before adapter promotion/push.
6. Stream live terminal telemetry, tactical visualization, and structured run summaries.

Autonomous Mode is enabled by default and applies full-stage execution parameters automatically.
"""


def _safe_float(value: Any, default: float) -> float:
    try:
        return float(value)
    except (TypeError, ValueError):
        return default


def _safe_int(value: Any, default: int) -> int:
    try:
        return int(value)
    except (TypeError, ValueError):
        return default


def load_template_defaults() -> Dict[str, Any]:
    if not CONFIG_TEMPLATE.exists():
        return {}
    try:
        cfg = yaml.safe_load(CONFIG_TEMPLATE.read_text(encoding="utf-8"))
    except Exception:
        return {}
    if not isinstance(cfg, dict):
        return {}
    return cfg


TEMPLATE_CFG = load_template_defaults()
TEMPLATE_STAGE_COUNT = max(1, len(TEMPLATE_CFG.get("stages", []) or [None]))
TEMPLATE_QUALITY_GATE = TEMPLATE_CFG.get("quality_gate", {})
if not isinstance(TEMPLATE_QUALITY_GATE, dict):
    TEMPLATE_QUALITY_GATE = {}
TEMPLATE_POST_EVAL = TEMPLATE_CFG.get("post_eval", {})
if not isinstance(TEMPLATE_POST_EVAL, dict):
    TEMPLATE_POST_EVAL = {}
TEMPLATE_HUB = TEMPLATE_CFG.get("hub", {})
if not isinstance(TEMPLATE_HUB, dict):
    TEMPLATE_HUB = {}
_raw_gate_enabled = TEMPLATE_QUALITY_GATE.get("enabled", True)
if isinstance(_raw_gate_enabled, bool):
    DEFAULT_GATE_ENABLED = _raw_gate_enabled
else:
    DEFAULT_GATE_ENABLED = str(_raw_gate_enabled).strip().lower() in {"1", "true", "yes", "y", "on"}
DEFAULT_GATE_MIN_ROWS = max(1, _safe_int(TEMPLATE_QUALITY_GATE.get("min_evaluated_rows"), 120))
DEFAULT_GATE_MIN_PASS_AT_1 = max(0.0, _safe_float(TEMPLATE_QUALITY_GATE.get("min_pass_at_1"), 0.01))
DEFAULT_GATE_MIN_PASS_AT_K = max(0.0, _safe_float(TEMPLATE_QUALITY_GATE.get("min_pass_at_k"), 0.06))
DEFAULT_AUTO_EVAL_K = max(1, _safe_int(TEMPLATE_POST_EVAL.get("k"), 4))
DEFAULT_AUTO_EVAL_SAMPLES = max(1, _safe_int(TEMPLATE_POST_EVAL.get("max_samples"), 300))
DEFAULT_AUTO_PUSH_TO_HUB = bool(TEMPLATE_HUB.get("push_to_hub", True))


def now_ts() -> str:
    return dt.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC")


def append_log(lines: List[str], message: str) -> str:
    lines.append(f"[{now_ts()}] {message}")
    text = "\n".join(lines)
    if len(text) > 200_000:
        text = text[-200_000:]
    return text


def summary_text(summary: Dict[str, Any]) -> str:
    if not summary:
        return ""
    return json.dumps(summary, ensure_ascii=True, indent=2)


def compose_ops_console(log_text: str, summary_json: str) -> str:
    payload = (log_text or "").strip()
    summary_payload = (summary_json or "").strip()
    if summary_payload:
        if payload:
            payload += "\n\n" + ("-" * 52) + "\nMission Summary (JSON)\n" + summary_payload
        else:
            payload = "Mission Summary (JSON)\n" + summary_payload
    return payload


def _as_dict(value: Any) -> Dict[str, Any]:
    return value if isinstance(value, dict) else {}


def _parse_summary_json(text: str) -> Dict[str, Any]:
    if not text:
        return {}
    try:
        parsed = json.loads(text)
    except json.JSONDecodeError:
        return {}
    return parsed if isinstance(parsed, dict) else {}


def _fmt_pct(value: Any) -> str:
    try:
        return f"{float(value) * 100:.1f}%"
    except (TypeError, ValueError):
        return "--"


def _fmt_float(value: Any, digits: int = 3) -> str:
    try:
        return f"{float(value):.{digits}f}"
    except (TypeError, ValueError):
        return "--"


def _extract_loss_values(log_text: str, limit: int = 48) -> List[float]:
    losses: List[float] = []
    for line in log_text.splitlines():
        lower = line.lower()
        if "eval_loss" in lower:
            continue
        match = LOSS_LOG_RE.search(lower)
        if match is None:
            continue
        try:
            value = float(match.group(1))
        except (TypeError, ValueError):
            continue
        if not (value >= 0.0):
            continue
        losses.append(value)
    if len(losses) > limit:
        losses = losses[-limit:]
    return losses


def _extract_summary_loss_values(summary: Dict[str, Any], limit: int = 24) -> List[float]:
    losses: List[float] = []
    training_summary = _as_dict(summary.get("training_summary"))
    stages_ran = training_summary.get("stages_ran")
    if not isinstance(stages_ran, list):
        return losses
    for stage in stages_ran:
        if not isinstance(stage, dict):
            continue
        train_metrics = stage.get("train_metrics")
        if not isinstance(train_metrics, dict):
            continue
        value = train_metrics.get("train_loss")
        try:
            loss = float(value)
        except (TypeError, ValueError):
            continue
        if loss >= 0.0:
            losses.append(loss)
    if len(losses) > limit:
        losses = losses[-limit:]
    return losses


def _build_loss_sparkline(losses: List[float]) -> str:
    if not losses:
        return "<div class='ops-v-small'>No live loss points yet.</div>"
    width = 520
    height = 74
    pad = 5
    min_v = min(losses)
    max_v = max(losses)
    span = max(max_v - min_v, 1e-9)

    points: List[str] = []
    for idx, value in enumerate(losses):
        x = pad + (idx * (width - 2 * pad) / max(1, len(losses) - 1))
        y = pad + ((max_v - value) * (height - 2 * pad) / span)
        points.append(f"{x:.2f},{y:.2f}")
    polyline = " ".join(points)
    latest = losses[-1]
    return (
        f"<div class='ops-v-small'>Latest train loss: <strong>{_fmt_float(latest, 4)}</strong></div>"
        "<div class='ops-spark'>"
        f"<svg viewBox='0 0 {width} {height}' preserveAspectRatio='none'>"
        f"<polyline points='{polyline}' fill='none' stroke='#f0f0f0' stroke-width='2' />"
        "</svg>"
        "</div>"
    )


def _infer_stage_snapshot(summary: Dict[str, Any], log_text: str) -> Dict[str, Any]:
    start_stage = max(1, _safe_int(summary.get("start_stage"), 1))
    stage_count = max(1, _safe_int(summary.get("max_stages"), TEMPLATE_STAGE_COUNT))
    completed = 0

    training_summary = _as_dict(summary.get("training_summary"))
    stages_ran = training_summary.get("stages_ran")
    if isinstance(stages_ran, list):
        completed = min(stage_count, len(stages_ran))

    active_stage = None
    for line in reversed(log_text.splitlines()[-350:]):
        match = STAGE_LOG_RE.search(line)
        if match:
            active_stage = _safe_int(match.group(1), 0)
            break

    if completed >= stage_count:
        progress = 1.0
    else:
        progress = completed / stage_count
        if active_stage and active_stage >= start_stage:
            relative_active = (active_stage - start_stage) + 0.35
            progress = max(progress, min(1.0, relative_active / stage_count))

    return {
        "start_stage": start_stage,
        "stage_count": stage_count,
        "completed": completed,
        "active_stage": active_stage,
        "progress": max(0.0, min(1.0, progress)),
    }


def render_ops_visual(summary: Dict[str, Any], status_text: str, log_text: str) -> str:
    safe_summary = _as_dict(summary)
    runtime = _as_dict(safe_summary.get("runtime"))
    quality_gate = _as_dict(safe_summary.get("quality_gate"))
    evaluation = _as_dict(safe_summary.get("evaluation"))
    push_report = _as_dict(safe_summary.get("push"))

    run_label = html.escape(str(safe_summary.get("run_label") or "not-started"))
    status_value = html.escape(status_text or "Idle")
    runtime_mode = "GPU READY" if runtime.get("cuda_available") else "CPU FALLBACK"
    runtime_mode = html.escape(runtime_mode)
    device_count = _safe_int(runtime.get("cuda_device_count"), 0)

    gate_enabled = bool(quality_gate.get("enabled"))
    gate_passed = quality_gate.get("passed")
    if not gate_enabled:
        gate_text = "Disabled"
    elif gate_passed is True:
        gate_text = "Passed"
    elif gate_passed is False:
        gate_text = "Failed"
    else:
        gate_text = "Pending"

    stage_meta = _infer_stage_snapshot(safe_summary, log_text)
    progress_pct = int(stage_meta["progress"] * 100)
    active_stage = stage_meta.get("active_stage")
    stage_hint = f"active stage {active_stage}" if active_stage else "awaiting stage telemetry"
    stage_hint = html.escape(stage_hint)

    losses = _extract_loss_values(log_text)
    if len(losses) < 2:
        summary_losses = _extract_summary_loss_values(safe_summary)
        if summary_losses:
            losses = summary_losses
    sparkline_html = _build_loss_sparkline(losses)

    pass_k = _fmt_pct(evaluation.get("pass_at_k"))
    pass_1 = _fmt_pct(evaluation.get("pass_at_1"))
    exact_k = _fmt_pct(evaluation.get("exact_at_k"))

    push_state = "Pending"
    if push_report:
        requested = bool(push_report.get("requested"))
        performed = bool(push_report.get("performed"))
        if not requested:
            push_state = "Not requested"
        elif performed:
            push_state = "Published"
        else:
            push_state = "Blocked"

    return f"""
<div class="ops-visual">
  <div class="ops-visual-head">
    <div class="ops-visual-title">Live Tactical Telemetry</div>
    <div class="ops-visual-sub">Monochrome Ops Feed</div>
  </div>
  <div class="ops-grid">
    <div class="ops-card">
      <div class="ops-k">Run</div>
      <div class="ops-v">{run_label}</div>
      <div class="ops-v-small">{status_value}</div>
    </div>
    <div class="ops-card">
      <div class="ops-k">Runtime</div>
      <div class="ops-v">{runtime_mode}</div>
      <div class="ops-v-small">cuda devices: {device_count}</div>
    </div>
    <div class="ops-card">
      <div class="ops-k">Stage Progress</div>
      <div class="ops-v">{stage_meta['completed']} / {stage_meta['stage_count']}</div>
      <div class="ops-v-small">{stage_hint}</div>
      <div class="ops-meter"><div class="ops-meter-fill" style="width:{progress_pct}%"></div></div>
    </div>
    <div class="ops-card">
      <div class="ops-k">Quality Gate</div>
      <div class="ops-v">{html.escape(gate_text)}</div>
      <div class="ops-v-small">push: {html.escape(push_state)}</div>
    </div>
    <div class="ops-card">
      <div class="ops-k">Eval pass@k</div>
      <div class="ops-v">{pass_k}</div>
      <div class="ops-v-small">pass@1 {pass_1} | exact@k {exact_k}</div>
    </div>
    <div class="ops-card">
      <div class="ops-k">Loss Stream</div>
      {sparkline_html}
    </div>
  </div>
  <div class="ops-foot">dull tactical theme · black / grey / white · anduril/palantir-inspired operations console</div>
</div>
""".strip()


def _token_from_credentials_file(path: Path) -> Optional[str]:
    try:
        data = json.loads(path.read_text(encoding="utf-8"))
    except Exception:
        return None
    for key in ("token", "key", "api_key", "hf_token"):
        value = data.get(key)
        if isinstance(value, str) and value.strip():
            return value.strip()
    return None


def resolve_hf_token() -> Tuple[Optional[str], str, Optional[str]]:
    env_token = (os.environ.get("HF_TOKEN") or "").strip()
    if env_token:
        return env_token, "env:HF_TOKEN", None

    env_hub_token = (os.environ.get("HUGGINGFACE_HUB_TOKEN") or "").strip()
    if env_hub_token:
        return env_hub_token, "env:HUGGINGFACE_HUB_TOKEN", None

    for path in CREDENTIAL_FILE_CANDIDATES:
        if path.exists():
            file_token = _token_from_credentials_file(path)
            if file_token:
                return file_token, "credentials_file", str(path)
    return None, "none", None


def validate_repo_id(repo_id: str, field_name: str) -> str:
    value = (repo_id or "").strip()
    if not value:
        raise ValueError(f"{field_name} is required.")
    if not REPO_ID_RE.match(value):
        raise ValueError(
            f"{field_name} must look like '<owner>/<repo>' and may only contain letters, digits, '-', '_' and '.'."
        )
    return value


def ensure_workspace() -> None:
    DATA_DIR.mkdir(parents=True, exist_ok=True)
    RUNTIME_DIR.mkdir(parents=True, exist_ok=True)


def run_runtime_snapshot() -> Dict[str, Any]:
    return {
        "python": sys.version.split()[0],
        "gradio": getattr(gr, "__version__", "unknown"),
        "torch": getattr(torch, "__version__", "unknown"),
        "cuda_available": bool(torch.cuda.is_available()),
        "cuda_device_count": int(torch.cuda.device_count()) if torch.cuda.is_available() else 0,
    }


def begin_run(run_label: str) -> bool:
    global RUN_IN_PROGRESS, CANCEL_REQUESTED, ACTIVE_RUN_LABEL
    with RUN_STATE_LOCK:
        if RUN_IN_PROGRESS:
            return False
        RUN_IN_PROGRESS = True
        CANCEL_REQUESTED = False
        ACTIVE_RUN_LABEL = run_label
    return True


def finish_run() -> None:
    global RUN_IN_PROGRESS, CANCEL_REQUESTED, ACTIVE_PROCESS, ACTIVE_RUN_LABEL
    with RUN_STATE_LOCK:
        RUN_IN_PROGRESS = False
        CANCEL_REQUESTED = False
        ACTIVE_PROCESS = None
        ACTIVE_RUN_LABEL = ""


def set_active_process(proc: subprocess.Popen) -> None:
    global ACTIVE_PROCESS
    with RUN_STATE_LOCK:
        ACTIVE_PROCESS = proc


def clear_active_process(proc: subprocess.Popen) -> None:
    global ACTIVE_PROCESS
    with RUN_STATE_LOCK:
        if ACTIVE_PROCESS is proc:
            ACTIVE_PROCESS = None


def is_cancel_requested() -> bool:
    with RUN_STATE_LOCK:
        return CANCEL_REQUESTED


def terminate_process_group(proc: subprocess.Popen) -> None:
    if proc.poll() is not None:
        return
    try:
        if os.name == "posix":
            os.killpg(proc.pid, signal.SIGTERM)
        else:
            proc.terminate()
        proc.wait(timeout=10)
        return
    except Exception:
        pass
    try:
        if os.name == "posix":
            os.killpg(proc.pid, signal.SIGKILL)
        else:
            proc.kill()
    except Exception:
        pass


def request_cancel() -> str:
    global CANCEL_REQUESTED
    with RUN_STATE_LOCK:
        if not RUN_IN_PROGRESS:
            return "No active run."
        CANCEL_REQUESTED = True
        proc = ACTIVE_PROCESS
        run_label = ACTIVE_RUN_LABEL

    if proc is not None and proc.poll() is None:
        terminate_process_group(proc)
        return f"Cancellation requested for {run_label}. Terminating subprocess."
    return f"Cancellation requested for {run_label}."


def download_dataset(
    dataset_repo_id: str,
    token: Optional[str],
    log_lines: List[str],
    force_redownload: bool,
) -> Tuple[str, str, str]:
    ensure_workspace()
    out_files: Dict[str, str] = {}
    for split in ("train", "validation", "test"):
        if is_cancel_requested():
            raise RuntimeError("Run cancelled by user.")

        out_path = DATA_DIR / f"{split}.parquet"
        if out_path.exists() and not force_redownload:
            append_log(log_lines, f"Using cached {split}.parquet at {out_path}")
            out_files[split] = str(out_path)
            continue

        download_attempts = 3
        for attempt in range(1, download_attempts + 1):
            try:
                cached_path = hf_hub_download(
                    repo_id=dataset_repo_id,
                    repo_type="dataset",
                    filename=f"{split}.parquet",
                    token=token,
                    force_download=force_redownload,
                )
                shutil.copy2(cached_path, out_path)
                append_log(log_lines, f"Downloaded {split}.parquet to {out_path}")
                break
            except Exception as exc:
                if attempt >= download_attempts:
                    raise RuntimeError(
                        f"Failed downloading {split}.parquet after {download_attempts} attempts: {exc}"
                    ) from exc
                wait_s = 2 ** attempt
                append_log(
                    log_lines,
                    f"Download retry {attempt}/{download_attempts - 1} for {split}.parquet after error: {exc}",
                )
                time.sleep(wait_s)
        out_files[split] = str(out_path)
    return out_files["train"], out_files["validation"], out_files["test"]


def write_runtime_config(
    base_model_id: str,
    model_repo_id: str,
    train_file: str,
    validation_file: str,
    test_file: str,
    run_eval: bool,
    eval_k: int,
    eval_samples: int,
    push_to_hub: bool,
    enforce_quality_gate: bool,
    gate_min_pass_at_1: float,
    gate_min_pass_at_k: float,
    gate_min_rows: int,
) -> Path:
    cfg = yaml.safe_load(CONFIG_TEMPLATE.read_text(encoding="utf-8"))
    cfg["model"]["base_model"] = base_model_id
    cfg["hub"]["repo_id"] = model_repo_id
    cfg["hub"]["push_to_hub"] = bool(push_to_hub)
    cfg["data"]["default_train_file"] = train_file
    cfg["data"]["default_validation_file"] = validation_file
    cfg["global"]["output_root"] = str(TRAIN_OUTPUT_DIR)

    cfg.setdefault("post_eval", {})
    cfg["post_eval"]["enabled"] = bool(run_eval)
    cfg["post_eval"]["eval_file"] = test_file
    cfg["post_eval"]["k"] = int(eval_k)
    cfg["post_eval"]["max_samples"] = int(eval_samples)
    cfg["post_eval"]["output_json"] = str(TRAIN_OUTPUT_DIR / "post_eval_report.json")

    cfg.setdefault("quality_gate", {})
    cfg["quality_gate"]["enabled"] = bool(enforce_quality_gate)
    cfg["quality_gate"]["min_evaluated_rows"] = int(gate_min_rows)
    cfg["quality_gate"]["min_pass_at_1"] = float(gate_min_pass_at_1)
    cfg["quality_gate"]["min_pass_at_k"] = float(gate_min_pass_at_k)
    cfg["quality_gate"]["require_post_eval"] = bool(enforce_quality_gate and run_eval)

    runtime_path = RUNTIME_DIR / "deepseek_math_sota.runtime.yaml"
    runtime_path.write_text(yaml.safe_dump(cfg, sort_keys=False), encoding="utf-8")
    return runtime_path


def stream_subprocess(
    cmd: List[str],
    env: dict,
    cwd: Path,
    log_lines: List[str],
    status_prefix: str,
) -> Generator[Tuple[str, str], None, int]:
    append_log(log_lines, f"Running command: {' '.join(cmd)}")
    yield "\n".join(log_lines), f"{status_prefix}: running"

    proc = subprocess.Popen(
        cmd,
        cwd=str(cwd),
        env=env,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
        bufsize=1,
        start_new_session=True,
    )
    set_active_process(proc)
    cancelled = False
    ret = 1
    last_heartbeat = time.monotonic()
    heartbeat_interval_s = 20.0
    assert proc.stdout is not None
    try:
        while True:
            if is_cancel_requested() and proc.poll() is None:
                cancelled = True
                append_log(log_lines, "Cancellation requested. Terminating subprocess.")
                yield "\n".join(log_lines), f"{status_prefix}: cancelling"
                terminate_process_group(proc)

            try:
                ready, _, _ = select.select([proc.stdout], [], [], 0.5)
            except (OSError, ValueError):
                ready = [proc.stdout]

            if ready:
                line = proc.stdout.readline()
                if line:
                    line = line.rstrip()
                    if line:
                        append_log(log_lines, line)
                        last_heartbeat = time.monotonic()
                        yield "\n".join(log_lines), f"{status_prefix}: running"
            elif proc.poll() is None:
                now = time.monotonic()
                if now - last_heartbeat >= heartbeat_interval_s:
                    append_log(log_lines, f"{status_prefix} heartbeat: process alive, waiting for next log chunk.")
                    last_heartbeat = now
                    yield "\n".join(log_lines), f"{status_prefix}: running"

            if proc.poll() is not None:
                break

        for tail_line in proc.stdout.readlines():
            tail_line = tail_line.rstrip()
            if tail_line:
                append_log(log_lines, tail_line)
                yield "\n".join(log_lines), f"{status_prefix}: running"

        ret = proc.wait()
    finally:
        clear_active_process(proc)

    if cancelled and ret != 0:
        append_log(log_lines, f"{status_prefix} cancelled.")
        yield "\n".join(log_lines), f"{status_prefix}: cancelled"
    else:
        append_log(log_lines, f"{status_prefix} finished with exit code {ret}")
        yield "\n".join(log_lines), f"{status_prefix}: {'ok' if ret == 0 else 'failed'}"
    return ret


def make_copyable_textbox(
    label: str,
    lines: int,
    max_lines: Optional[int] = None,
    value: str = "",
    interactive: bool = False,
) -> gr.Textbox:
    textbox_kwargs: Dict[str, Any] = {
        "label": label,
        "lines": lines,
        "value": value,
        "interactive": interactive,
    }
    if max_lines is not None:
        textbox_kwargs["max_lines"] = max_lines
    textbox_init_params = inspect.signature(gr.Textbox.__init__).parameters
    if "buttons" in textbox_init_params:
        textbox_kwargs["buttons"] = ["copy"]
    elif "show_copy_button" in textbox_init_params:
        textbox_kwargs["show_copy_button"] = True
    return gr.Textbox(**textbox_kwargs)


def clear_outputs() -> Tuple[str, str, str]:
    return "", "Idle", render_ops_visual({}, "Idle", "")


def cancel_pipeline() -> str:
    return request_cancel()


def run_pipeline_core(
    dataset_repo_id: str,
    model_repo_id: str,
    base_model_id: str,
    autonomous_mode: bool,
    start_stage: int,
    max_stages: int,
    run_eval: bool,
    eval_k: int,
    eval_samples: int,
    enforce_quality_gate: bool,
    gate_min_pass_at_1: float,
    gate_min_pass_at_k: float,
    gate_min_rows: int,
    push_to_hub: bool,
    force_redownload: bool,
    preflight_only: bool,
) -> Generator[Tuple[str, str, str], None, None]:
    log_lines: List[str] = []
    summary: Dict[str, Any] = {}
    run_label = dt.datetime.utcnow().strftime("run-%Y%m%d-%H%M%S")

    if not begin_run(run_label):
        append_log(log_lines, "A run is already in progress. Wait for it to finish or click Stop.")
        busy_summary = {
            "result": "busy",
            "message": "A run is already in progress.",
            "timestamp_utc": now_ts(),
        }
        yield "\n".join(log_lines), "Busy", summary_text(busy_summary)
        return

    try:
        token, _, _ = resolve_hf_token()

        dataset_repo_id = validate_repo_id(dataset_repo_id, "Dataset repo")
        model_repo_id = validate_repo_id(model_repo_id, "Model repo")
        base_model_id = (base_model_id or "").strip()
        if not base_model_id:
            raise ValueError("Base model is required.")

        stage_start = int(start_stage)
        stage_count = int(max_stages)
        eval_k = int(eval_k)
        eval_samples = int(eval_samples)
        gate_min_rows = int(gate_min_rows)
        gate_min_pass_at_1 = float(gate_min_pass_at_1)
        gate_min_pass_at_k = float(gate_min_pass_at_k)

        if autonomous_mode:
            stage_start = 1
            stage_count = TEMPLATE_STAGE_COUNT
            run_eval = True
            eval_k = DEFAULT_AUTO_EVAL_K
            eval_samples = DEFAULT_AUTO_EVAL_SAMPLES
            enforce_quality_gate = bool(DEFAULT_GATE_ENABLED)
            gate_min_rows = DEFAULT_GATE_MIN_ROWS
            gate_min_pass_at_1 = DEFAULT_GATE_MIN_PASS_AT_1
            gate_min_pass_at_k = DEFAULT_GATE_MIN_PASS_AT_K
            push_to_hub = bool(DEFAULT_AUTO_PUSH_TO_HUB)
            force_redownload = False
            preflight_only = False

        if stage_start < 1:
            raise ValueError("Start stage must be >= 1.")
        if stage_start > TEMPLATE_STAGE_COUNT:
            raise ValueError(f"Start stage must be <= {TEMPLATE_STAGE_COUNT}.")
        if stage_count < 1:
            raise ValueError("How many stages must be >= 1.")
        if eval_k < 1:
            raise ValueError("Eval K must be >= 1.")
        if eval_samples < 1:
            raise ValueError("Eval max samples must be >= 1.")
        if gate_min_rows < 1:
            raise ValueError("Gate minimum rows must be >= 1.")
        if not 0.0 <= gate_min_pass_at_1 <= 1.0:
            raise ValueError("Gate min pass@1 must be between 0 and 1.")
        if not 0.0 <= gate_min_pass_at_k <= 1.0:
            raise ValueError("Gate min pass@k must be between 0 and 1.")

        for required_path in (CONFIG_TEMPLATE, TRAIN_SCRIPT):
            if not required_path.exists():
                raise FileNotFoundError(f"Required file is missing: {required_path}")
        if run_eval and not EVAL_SCRIPT.exists():
            raise FileNotFoundError(f"Evaluation script is missing: {EVAL_SCRIPT}")

        runtime = run_runtime_snapshot()
        summary.update(
            {
                "run_label": run_label,
                "started_at_utc": now_ts(),
                "dataset_repo_id": dataset_repo_id,
                "model_repo_id": model_repo_id,
                "base_model_id": base_model_id,
                "autonomous_mode": bool(autonomous_mode),
                "start_stage": stage_start,
                "max_stages": stage_count,
                "run_eval": bool(run_eval),
                "eval_k": eval_k,
                "eval_samples": eval_samples,
                "enforce_quality_gate": bool(enforce_quality_gate),
                "gate_min_rows": gate_min_rows,
                "gate_min_pass_at_1": gate_min_pass_at_1,
                "gate_min_pass_at_k": gate_min_pass_at_k,
                "push_to_hub": bool(push_to_hub),
                "force_redownload": bool(force_redownload),
                "preflight_only": bool(preflight_only),
                "runtime": runtime,
            }
        )

        append_log(log_lines, f"Run {run_label} started.")
        if autonomous_mode:
            append_log(
                log_lines,
                "Autonomous mode active: full-stage training/eval/gating/publish profile applied.",
            )
        append_log(
            log_lines,
            f"Runtime: python={runtime['python']} gradio={runtime['gradio']} torch={runtime['torch']} "
            f"cuda_available={runtime['cuda_available']} devices={runtime['cuda_device_count']}",
        )
        if token:
            append_log(log_lines, "Environment posture validated.")
        else:
            append_log(log_lines, "Restricted mode active. Hub publish disabled for this run.")
        yield "\n".join(log_lines), "Validating environment", summary_text(summary)

        if not preflight_only and not torch.cuda.is_available():
            summary["compute_mode"] = "cpu_fallback"
            append_log(
                log_lines,
                "GPU is unavailable. Continuing with CPU fallback mode; training will be slower.",
            )
            yield "\n".join(log_lines), "CPU fallback active", summary_text(summary)
        elif torch.cuda.is_available():
            summary["compute_mode"] = "gpu"

        effective_push_to_hub = bool(push_to_hub)
        if effective_push_to_hub and not token:
            effective_push_to_hub = False
            summary["push_to_hub"] = False
            summary["push_disabled_reason"] = "missing_token"
            append_log(log_lines, "Push requested but no token is available. Disabling hub push for this run.")

        append_log(log_lines, "Preparing local workspace.")
        yield "\n".join(log_lines), "Preparing workspace", summary_text(summary)

        train_file, validation_file, test_file = download_dataset(
            dataset_repo_id=dataset_repo_id,
            token=token,
            log_lines=log_lines,
            force_redownload=bool(force_redownload),
        )
        summary["dataset_files"] = {
            "train": train_file,
            "validation": validation_file,
            "test": test_file,
        }
        yield "\n".join(log_lines), "Dataset ready", summary_text(summary)

        if is_cancel_requested():
            raise RuntimeError("Run cancelled by user.")

        runtime_cfg = write_runtime_config(
            base_model_id=base_model_id,
            model_repo_id=model_repo_id,
            train_file=train_file,
            validation_file=validation_file,
            test_file=test_file,
            run_eval=bool(run_eval),
            eval_k=eval_k,
            eval_samples=eval_samples,
            push_to_hub=effective_push_to_hub,
            enforce_quality_gate=bool(enforce_quality_gate),
            gate_min_pass_at_1=gate_min_pass_at_1,
            gate_min_pass_at_k=gate_min_pass_at_k,
            gate_min_rows=gate_min_rows,
        )
        summary["runtime_config"] = str(runtime_cfg)
        append_log(log_lines, f"Wrote runtime config: {runtime_cfg}")
        yield "\n".join(log_lines), "Config ready", summary_text(summary)

        env = os.environ.copy()
        if token:
            env["HF_TOKEN"] = token
            env["HUGGINGFACE_HUB_TOKEN"] = token
        else:
            env.pop("HF_TOKEN", None)
            env.pop("HUGGINGFACE_HUB_TOKEN", None)
        env["PYTHONUNBUFFERED"] = "1"

        train_cmd = [
            sys.executable,
            str(TRAIN_SCRIPT),
            "--config",
            str(runtime_cfg),
            "--start-stage",
            str(stage_start),
            "--max-stages",
            str(stage_count),
        ]
        if preflight_only:
            train_cmd.append("--dry-run")
            append_log(log_lines, "Validation mode enabled: running dry validation without full training.")

        train_gen = stream_subprocess(
            cmd=train_cmd,
            env=env,
            cwd=ROOT,
            log_lines=log_lines,
            status_prefix="Training",
        )
        train_ret = None
        while True:
            try:
                logs_text, status_text = next(train_gen)
                summary["status"] = status_text
                yield logs_text, status_text, summary_text(summary)
            except StopIteration as stop:
                train_ret = stop.value
                break

        if is_cancel_requested():
            summary["result"] = "cancelled"
            summary["finished_at_utc"] = now_ts()
            append_log(log_lines, "Run cancelled by user.")
            yield "\n".join(log_lines), "Cancelled", summary_text(summary)
            return

        if train_ret != 0:
            summary["result"] = "failed"
            summary["failure_stage"] = "training"
            summary["finished_at_utc"] = now_ts()
            yield "\n".join(log_lines), "Failed", summary_text(summary)
            return

        if preflight_only:
            summary["result"] = "preflight_passed"
            summary["finished_at_utc"] = now_ts()
            append_log(log_lines, "Validation mode completed successfully.")
            yield "\n".join(log_lines), "Preflight complete", summary_text(summary)
            return

        training_summary_path = TRAIN_OUTPUT_DIR / "training_summary.json"
        training_summary: Optional[Dict[str, Any]] = None
        if training_summary_path.exists():
            try:
                summary["training_summary_path"] = str(training_summary_path)
                loaded_summary = json.loads(training_summary_path.read_text(encoding="utf-8"))
                if isinstance(loaded_summary, dict):
                    training_summary = loaded_summary
                    summary["training_summary"] = loaded_summary
                else:
                    summary["training_summary"] = {"warning": "Training summary JSON is not an object."}
            except json.JSONDecodeError:
                summary["training_summary_path"] = str(training_summary_path)
                summary["training_summary"] = {"warning": "Unable to parse training summary JSON."}

        if isinstance(training_summary, dict):
            quality_gate = training_summary.get("quality_gate")
            if isinstance(quality_gate, dict):
                summary["quality_gate"] = quality_gate
                append_log(
                    log_lines,
                    f"Quality gate: passed={quality_gate.get('passed')} enabled={quality_gate.get('enabled')}",
                )
            push_report = training_summary.get("push")
            if isinstance(push_report, dict):
                summary["push"] = push_report
                append_log(
                    log_lines,
                    f"Push decision: requested={push_report.get('requested')} performed={push_report.get('performed')}",
                )
            post_eval_report = training_summary.get("post_eval")
            if run_eval and isinstance(post_eval_report, dict):
                summary["evaluation"] = {
                    "source": "train_post_eval",
                    "evaluated_rows": post_eval_report.get("evaluated_rows"),
                    "pass_at_1": post_eval_report.get("pass_at_1"),
                    "pass_at_k": post_eval_report.get("pass_at_k"),
                    "exact_at_k": post_eval_report.get("exact_at_k"),
                    "composite_score": post_eval_report.get("composite_score"),
                    "k": post_eval_report.get("k"),
                    "report_path": post_eval_report.get("report_path"),
                }
                append_log(log_lines, "Using post-eval metrics emitted by training run.")

        if run_eval and "evaluation" not in summary:
            eval_report = WORKSPACE_DIR / "runs" / "latest_eval_report.json"
            eval_cmd = [
                sys.executable,
                str(EVAL_SCRIPT),
                "--config",
                str(runtime_cfg),
                "--base-model",
                base_model_id,
                "--adapter-path",
                str(TRAIN_OUTPUT_DIR / "final_adapter"),
                "--eval-file",
                str(DATA_DIR / "test.parquet"),
                "--k",
                str(eval_k),
                "--max-samples",
                str(eval_samples),
                "--output-json",
                str(eval_report),
            ]
            eval_gen = stream_subprocess(
                cmd=eval_cmd,
                env=env,
                cwd=ROOT,
                log_lines=log_lines,
                status_prefix="Evaluation",
            )
            eval_ret = None
            while True:
                try:
                    logs_text, status_text = next(eval_gen)
                    summary["status"] = status_text
                    yield logs_text, status_text, summary_text(summary)
                except StopIteration as stop:
                    eval_ret = stop.value
                    break

            if is_cancel_requested():
                summary["result"] = "cancelled"
                summary["finished_at_utc"] = now_ts()
                append_log(log_lines, "Run cancelled by user.")
                yield "\n".join(log_lines), "Cancelled", summary_text(summary)
                return

            if eval_ret != 0:
                summary["result"] = "failed"
                summary["failure_stage"] = "evaluation"
                summary["finished_at_utc"] = now_ts()
                yield "\n".join(log_lines), "Failed", summary_text(summary)
                return

            if eval_report.exists():
                report = json.loads(eval_report.read_text(encoding="utf-8"))
                summary["evaluation"] = {
                    "source": "fallback_eval",
                    "evaluated_rows": report.get("evaluated_rows"),
                    "pass_at_1": report.get("pass_at_1"),
                    "pass_at_k": report.get("pass_at_k"),
                    "exact_at_k": report.get("exact_at_k"),
                    "composite_score": report.get("composite_score"),
                    "k": report.get("k"),
                    "report_path": str(eval_report),
                }
                append_log(log_lines, f"Eval summary: {json.dumps(summary['evaluation'])}")

        summary["result"] = "completed"
        summary["finished_at_utc"] = now_ts()
        append_log(log_lines, "Pipeline completed.")
        yield "\n".join(log_lines), "Completed", summary_text(summary)
    except Exception as exc:
        cancelled = is_cancel_requested() or str(exc) == "Run cancelled by user."
        summary["result"] = "cancelled" if cancelled else "failed"
        summary["error"] = {"type": type(exc).__name__, "message": str(exc)}
        summary["finished_at_utc"] = now_ts()
        append_log(
            log_lines,
            f"Pipeline {'cancelled' if cancelled else 'failed'}: {type(exc).__name__}: {exc}",
        )
        yield "\n".join(log_lines), "Cancelled" if cancelled else "Failed", summary_text(summary)
    finally:
        finish_run()


def run_pipeline(
    dataset_repo_id: str,
    model_repo_id: str,
    base_model_id: str,
    autonomous_mode: bool,
    start_stage: int,
    max_stages: int,
    run_eval: bool,
    eval_k: int,
    eval_samples: int,
    enforce_quality_gate: bool,
    gate_min_pass_at_1: float,
    gate_min_pass_at_k: float,
    gate_min_rows: int,
    push_to_hub: bool,
    force_redownload: bool,
    preflight_only: bool,
) -> Generator[Tuple[str, str, str], None, None]:
    pipeline = run_pipeline_core(
        dataset_repo_id=dataset_repo_id,
        model_repo_id=model_repo_id,
        base_model_id=base_model_id,
        autonomous_mode=autonomous_mode,
        start_stage=start_stage,
        max_stages=max_stages,
        run_eval=run_eval,
        eval_k=eval_k,
        eval_samples=eval_samples,
        enforce_quality_gate=enforce_quality_gate,
        gate_min_pass_at_1=gate_min_pass_at_1,
        gate_min_pass_at_k=gate_min_pass_at_k,
        gate_min_rows=gate_min_rows,
        push_to_hub=push_to_hub,
        force_redownload=force_redownload,
        preflight_only=preflight_only,
    )
    for logs_text, status_text, summary_json in pipeline:
        summary = _parse_summary_json(summary_json)
        console_text = compose_ops_console(logs_text, summary_json)
        yield console_text, status_text, render_ops_visual(summary, status_text, logs_text)


with gr.Blocks(title="Math Conjecture Trainer Space") as demo:
    gr.HTML(TACTICAL_HEADER_HTML)
    gr.Markdown(PROJECT_DESCRIPTION)
    with gr.Row():
        dataset_repo_id = gr.Textbox(
            label="Dataset Source",
            value="NorthernTribe-Research/math-conjecture-training-corpus",
        )
    with gr.Row():
        model_repo_id = gr.Textbox(
            label="Model Destination",
            value="NorthernTribe-Research/math-conjecture-model",
        )
        base_model_id = gr.Textbox(
            label="Base Model ID",
            value="deepseek-ai/deepseek-math-v2",
        )
    with gr.Row():
        autonomous_mode = gr.Checkbox(label="Autonomous Mode", value=True)
    with gr.Row():
        start_stage = gr.Slider(label="Stage Start", minimum=1, maximum=TEMPLATE_STAGE_COUNT, step=1, value=1)
        max_stages = gr.Slider(
            label="Stage Count",
            minimum=1,
            maximum=TEMPLATE_STAGE_COUNT,
            step=1,
            value=TEMPLATE_STAGE_COUNT,
        )
        run_eval = gr.Checkbox(label="Run Evaluation After Training", value=True)
    with gr.Row():
        eval_k = gr.Slider(label="Evaluation K", minimum=1, maximum=8, step=1, value=4)
        eval_samples = gr.Slider(label="Evaluation Max Samples", minimum=50, maximum=1000, step=50, value=300)
    with gr.Row():
        enforce_quality_gate = gr.Checkbox(label="Enforce Quality Gate", value=DEFAULT_GATE_ENABLED)
        gate_min_pass_at_1 = gr.Slider(
            label="Gate Min pass@1",
            minimum=0.0,
            maximum=0.5,
            step=0.005,
            value=min(max(DEFAULT_GATE_MIN_PASS_AT_1, 0.0), 0.5),
        )
        gate_min_pass_at_k = gr.Slider(
            label="Gate Min pass@k",
            minimum=0.0,
            maximum=1.0,
            step=0.01,
            value=min(max(DEFAULT_GATE_MIN_PASS_AT_K, 0.0), 1.0),
        )
        gate_min_rows = gr.Slider(
            label="Gate Min Rows",
            minimum=10,
            maximum=2000,
            step=10,
            value=min(max(DEFAULT_GATE_MIN_ROWS, 10), 2000),
        )
    with gr.Row():
        push_to_hub = gr.Checkbox(label="Push Adapter to Hub", value=True)
        force_redownload = gr.Checkbox(label="Force Dataset Redownload", value=False)
        preflight_only = gr.Checkbox(label="Validation Mode (No Training)", value=False)

    with gr.Row():
        run_button = gr.Button("Execute Training Run", variant="primary")
        stop_button = gr.Button("Abort Active Run", variant="stop")
        clear_button = gr.Button("Reset Console")

    ops_visual = gr.HTML(value=render_ops_visual({}, "Idle", ""))
    status = gr.Textbox(label="Run Status", value="Idle", interactive=False)
    logs = make_copyable_textbox(
        label="Ops Console (Live Log + Mission JSON)",
        lines=26,
        max_lines=36,
        interactive=False,
    )

    run_button.click(
        fn=run_pipeline,
        inputs=[
            dataset_repo_id,
            model_repo_id,
            base_model_id,
            autonomous_mode,
            start_stage,
            max_stages,
            run_eval,
            eval_k,
            eval_samples,
            enforce_quality_gate,
            gate_min_pass_at_1,
            gate_min_pass_at_k,
            gate_min_rows,
            push_to_hub,
            force_redownload,
            preflight_only,
        ],
        outputs=[logs, status, ops_visual],
    )
    stop_button.click(fn=cancel_pipeline, inputs=None, outputs=[status], queue=False)
    clear_button.click(fn=clear_outputs, inputs=None, outputs=[logs, status, ops_visual], queue=False)
    gr.HTML(TACTICAL_FOOTER_HTML)


if __name__ == "__main__":
    demo.queue(default_concurrency_limit=1).launch(css=TACTICAL_CSS)