#!/usr/bin/env python3
"""
HF Hub Benchmark Dashboard — Gradio app.
Run: python app.py
"""

import html as _html
import json
import urllib.request
import urllib.error
import concurrent.futures
from collections import defaultdict
from datetime import datetime, timezone
from pathlib import Path

import gradio as gr

# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------

ROOT = Path(__file__).resolve().parent
CACHE_PATH = ROOT / "dashboard_cache.json"
CACHE_TTL_SECONDS = 6 * 60 * 60  # 6 hours

CATEGORY_ORDER = [
    "Knowledge",
    "Math / Reasoning",
    "Code / Engineering",
    "Agents",
    "Vision",
    "Audio / Speech",
    "Document / OCR",
    "Retrieval / Embedding",
    "NLP / Classification",
    "Robotics",
    "Other",
]

CATEGORY_ICONS = {
    "Knowledge": "🧠",
    "Math / Reasoning": "🔢",
    "Code / Engineering": "💻",
    "Agents": "🤖",
    "Vision": "👁️",
    "Audio / Speech": "🔊",
    "Document / OCR": "📄",
    "Retrieval / Embedding": "🔎",
    "NLP / Classification": "🏷️",
    "Robotics": "🦾",
    "Other": "📦",
}

BENCHMARK_DISPLAY_NAMES = {
    "openai/gsm8k": "GSM8K",
    "Idavidrein/gpqa": "GPQA",
    "allenai/olmOCR-bench": "olmOCR-Bench",
    "llamaindex/ParseBench": "ParseBench",
    "mercor/apex-agents": "APEX-Agents",
    "harborframework/terminal-bench-2.0": "Terminal-Bench 2.0",
    "SWE-bench/SWE-bench_Verified": "SWE-bench Verified",
    "TIGER-Lab/MMLU-Pro": "MMLU-Pro",
    "hf-audio/open-asr-leaderboard": "Open ASR Leaderboard",
    "MathArena/aime_2026": "AIME 2026",
    "claw-eval/Claw-Eval": "Claw-Eval",
    "cais/hle": "HLE",
    "likaixin/ScreenSpot-Pro": "ScreenSpot-Pro",
    "nvidia/compute-eval": "ComputeEval",
    "ScaleAI/SWE-bench_Pro": "SWE-bench Pro",
    "FutureMa/EvasionBench": "EvasionBench",
    "mteb/BRIGHT": "BRIGHT",
    "Delores-Lin/MDPBench": "MDPBench",
    "mteb/arguana": "ArguAna",
    "MMMU/MMMU_Pro": "MMMU-Pro",
    "LEXam-Benchmark/LEXam": "LEXam",
    "mercor/ACE": "ACE",
    "mercor/APEX-v1-extended": "APEX-v1",
    "VLABench/vlabench_primitive_ft_lerobot_video": "VLABench",
    "tiiuae/PBench": "PBench",
    "MathArena/hmmt_feb_2026": "HMMT Feb 2026",
    "collinear-ai/yc-bench": "YC-Bench",
    "internlm/WildClawBench": "WildClawBench",
    "MME-Benchmarks/Video-MME-v2": "Video-MME v2",
    "open-agent-leaderboard/results": "Open Agent Leaderboard",
}

CUSTOM_CSS = """
/* ---- Topbar ---- */
.topbar {
    display: flex; align-items: center; justify-content: space-between;
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
    color: white; padding: 14px 24px; border-radius: 10px;
    margin-bottom: 8px; flex-wrap: wrap; gap: 12px;
}
.topbar-title { font-size: 18px; font-weight: 700; margin-bottom: 2px; }
.topbar-meta { font-size: 11px; opacity: 0.85; }
.topbar-pills { display: flex; gap: 8px; flex-wrap: wrap; }
.stat-pill {
    background: rgba(255,255,255,0.2); border-radius: 20px;
    padding: 4px 14px; font-size: 12px; white-space: nowrap;
}
.stat-pill b { font-size: 14px; }

/* ---- Layout columns ---- */
#sidebar-col {
    background: white !important; padding: 0 !important;
    border-right: 1px solid #e5e7eb !important;
    border-radius: 10px 0 0 10px !important;
}
#main-col {
    background: #f8fafc !important; padding: 18px 22px !important;
    border-radius: 0 10px 10px 0 !important; min-width: 0 !important;
}
#sidebar-col > .form, #main-col > .form {
    background: transparent !important; box-shadow: none !important;
    border: none !important; padding: 0 !important;
}

/* ---- Sidebar Radio → nav buttons ---- */
#cat_radio {
    background: transparent !important; border: none !important;
    box-shadow: none !important; padding: 0 !important;
}
#cat_radio > .wrap { flex-direction: column !important; gap: 0 !important; padding: 0 !important; }
#cat_radio label {
    display: flex !important; align-items: center !important;
    padding: 8px 12px !important; margin: 0 !important;
    border-left: 3px solid transparent !important; border-radius: 0 !important;
    cursor: pointer !important; font-size: 12px !important;
    color: #374151 !important; background: white !important;
    width: 100% !important; box-sizing: border-box !important; gap: 0 !important;
}
#cat_radio label:hover { background: #f3f4f6 !important; }
#cat_radio label:has(input:checked) {
    background: #ede9fe !important; border-left-color: #7c3aed !important;
    color: #5b21b6 !important; font-weight: 600 !important;
}
#cat_radio input[type="radio"] { display: none !important; }
#cat_radio .wrap span { margin-left: 0 !important; padding-left: 0 !important; }

/* ---- Bench cards Radio ---- */
#bench_radio {
    background: transparent !important; border: none !important;
    box-shadow: none !important; padding: 0 !important;
}
#bench_radio > .wrap {
    flex-direction: row !important; flex-wrap: wrap !important;
    gap: 10px !important; padding: 4px 0 12px !important;
}
#bench_radio label {
    display: flex !important; align-items: center !important;
    padding: 10px 14px !important; border: 2px solid #e5e7eb !important;
    border-radius: 10px !important; cursor: pointer !important;
    font-size: 12px !important; background: white !important;
    color: #374151 !important; min-width: 150px !important;
    margin: 0 !important; gap: 0 !important; transition: border-color 0.15s !important;
}
#bench_radio label:hover { border-color: #a78bfa !important; }
#bench_radio label:has(input:checked) {
    border-color: #7c3aed !important; background: #faf5ff !important;
    font-weight: 600 !important; color: #5b21b6 !important;
}
#bench_radio input[type="radio"] { display: none !important; }
#bench_radio .wrap span { margin-left: 0 !important; padding-left: 0 !important; }

/* ---- Filter checkbox ---- */
#providers-filter { margin: 2px 0 10px; }
#providers-filter > label { font-size: 12px !important; color: #6b7280 !important; }

/* ---- Layout columns ---- */
#sidebar-col {
    background: white !important; padding: 0 !important;
    border-right: 1px solid #e5e7eb !important;
    border-radius: 10px 0 0 10px !important;
}
#main-col {
    background: #f8fafc !important; padding: 18px 22px !important;
    border-radius: 0 10px 10px 0 !important; min-width: 0 !important;
}
#sidebar-col > .form, #main-col > .form {
    background: transparent !important; box-shadow: none !important;
    border: none !important; padding: 0 !important;
}

/* ---- Sidebar HTML ---- */
.hf-sidebar { display: flex; flex-direction: column; padding: 10px 0; }
.hf-sidebar-label {
    font-size: 10px; font-weight: 700; color: #9ca3af;
    text-transform: uppercase; letter-spacing: 0.8px; padding: 0 16px 8px;
}
.hf-cat-btn {
    display: flex; align-items: center; gap: 9px; width: 100%;
    padding: 9px 16px; border: none; background: none; cursor: pointer;
    border-left: 3px solid transparent; font-size: 13px; color: #374151;
    text-align: left; transition: background 0.1s;
}
.hf-cat-btn:hover { background: #f3f4f6; }
.hf-cat-active {
    background: #ede9fe !important; border-left-color: #7c3aed !important;
    color: #5b21b6 !important; font-weight: 600;
}
.hf-cat-active .hf-cat-badge { background: #ddd6fe !important; color: #7c3aed !important; }
.hf-cat-icon { font-size: 15px; min-width: 20px; }
.hf-cat-name { flex: 1; }
.hf-cat-badge {
    background: #f3f4f6; border-radius: 12px;
    padding: 1px 8px; font-size: 11px; color: #6b7280;
}

/* ---- Benchmark cards HTML ---- */
.hf-section-head { display: flex; align-items: baseline; gap: 12px; margin-bottom: 12px; }
.hf-section-title { font-size: 16px; font-weight: 700; color: #111827; }
.hf-section-meta { font-size: 12px; color: #9ca3af; }
.hf-cards { display: flex; flex-wrap: wrap; gap: 10px; margin-bottom: 14px; }
.hf-card {
    border: 2px solid #e5e7eb; border-radius: 10px; padding: 10px 14px;
    cursor: pointer; background: white; min-width: 150px;
    transition: border-color 0.15s, box-shadow 0.15s;
}
.hf-card:hover { border-color: #a78bfa; box-shadow: 0 1px 6px rgba(124,58,237,0.1); }
.hf-card-active { border-color: #7c3aed !important; background: #faf5ff !important; }
.hf-card-name { font-size: 13px; font-weight: 600; color: #111827; }
.hf-card-active .hf-card-name { color: #5b21b6; }
.hf-card-count { font-size: 11px; color: #6b7280; margin-top: 3px; }
.hf-card-owner { font-size: 10px; color: #9ca3af; margin-top: 2px; }

/* ---- JS bridge textboxes (rendered but invisible) ---- */
#cat_trigger, #bench_trigger {
    display: none !important;
    position: absolute !important;
    pointer-events: none !important;
}

/* ---- Filter checkbox ---- */
#providers-filter { margin: 2px 0 10px; }
#providers-filter > label { font-size: 12px !important; color: #6b7280 !important; }

/* ---- Leaderboard HTML ---- */
.hf-lb { border: 1px solid #e5e7eb; border-radius: 10px; overflow: hidden; background: white; }
.hf-lb-head {
    display: flex; align-items: center; justify-content: space-between;
    padding: 10px 16px; border-bottom: 1px solid #f3f4f6; background: #f9fafb;
}
.hf-lb-title { font-size: 13px; font-weight: 600; color: #374151; }
.hf-lb-meta { display: flex; align-items: center; gap: 12px; }
.hf-lb-count { font-size: 11px; color: #9ca3af; }
.hf-hub-link { font-size: 11px; color: #7c3aed; text-decoration: none; font-weight: 500; }
.hf-hub-link:hover { text-decoration: underline; }
.hf-lb-scroll { overflow-x: auto; }
.hf-table { width: 100%; border-collapse: collapse; font-size: 12px; }
.hf-table thead th {
    padding: 7px 12px; text-align: left; font-size: 10px; font-weight: 700;
    color: #6b7280; text-transform: uppercase; letter-spacing: 0.4px;
    white-space: nowrap; background: white; border-bottom: 1px solid #f3f4f6;
}
.hf-table td { padding: 7px 12px; border-bottom: 1px solid #f3f4f6; vertical-align: middle; }
.hf-table tbody tr:last-child td { border-bottom: none; }
.hf-table tbody tr:nth-child(even) td { background: #fafafa; }
.hf-table tbody tr:hover td { background: #faf5ff !important; }
.hf-rank { width: 44px; text-align: center; font-size: 17px; }
.hf-rank-num { color: #9ca3af; font-size: 12px; font-variant-numeric: tabular-nums; }
.hf-model a { color: #2563eb; text-decoration: none; font-size: 11px; word-break: break-all; }
.hf-model a:hover { text-decoration: underline; }
.hf-score { font-variant-numeric: tabular-nums; font-weight: 600; color: #111827; white-space: nowrap; }
.hf-price { font-variant-numeric: tabular-nums; color: #059669; white-space: nowrap; }
.hf-ctx, .hf-params, .hf-ttft, .hf-tput { white-space: nowrap; }
.hf-ttft { color: #7c3aed; }
.hf-tput { color: #0369a1; }
.hf-lic { color: #6b7280; font-size: 11px; }
.hf-params { font-weight: 500; }
.hf-provs { display: flex; flex-wrap: wrap; gap: 3px; }
.hf-chip {
    background: #dbeafe; color: #1e40af;
    border-radius: 4px; padding: 1px 6px;
    font-size: 10px; font-weight: 500; white-space: nowrap;
}
.hf-chip-more { background: #f3f4f6 !important; color: #6b7280 !important; }
.hf-na { color: #d1d5db; }
.hf-empty { padding: 48px 24px; text-align: center; color: #9ca3af; font-size: 14px; }
"""

# ---------------------------------------------------------------------------
# HF API helpers
# ---------------------------------------------------------------------------


def _http_get_json(url: str, token: str | None = None, timeout: int = 30):
    req = urllib.request.Request(url, headers={"Accept": "application/json"})
    if token:
        req.add_header("Authorization", f"Bearer {token}")
    with urllib.request.urlopen(req, timeout=timeout) as resp:
        return json.loads(resp.read().decode("utf-8"))


def _read_token() -> str | None:
    import os
    p = Path(os.path.expanduser("~/.cache/huggingface/token"))
    if p.exists():
        tok = p.read_text().strip()
        if tok:
            return tok
    return os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")


def discover_benchmarks(token=None) -> list[dict]:
    url = "https://huggingface.co/api/datasets?filter=benchmark:official&limit=1000"
    data = _http_get_json(url, token, timeout=30)
    results = []
    for d in data:
        if not isinstance(d, dict) or "id" not in d:
            continue
        results.append({
            "id": d["id"],
            "tags": d.get("tags", []),
            "description": (d.get("description") or "")[:200],
        })
    return results


def get_leaderboard(dataset_id: str, token=None) -> list[dict]:
    url = f"https://huggingface.co/api/datasets/{dataset_id}/leaderboard"
    try:
        data = _http_get_json(url, token, timeout=30)
    except (urllib.error.HTTPError, urllib.error.URLError):
        return []
    if isinstance(data, dict) and "entries" in data:
        data = data["entries"]
    return data if isinstance(data, list) else []


# ---------------------------------------------------------------------------
# Categorisation
# ---------------------------------------------------------------------------


def categorize_benchmark(bench: dict) -> list[str]:
    tags = bench.get("tags", [])
    bid = bench["id"]
    bid_lower = bid.lower()
    categories = set()

    if any(t in tags for t in ["modality:audio", "modality:speech"]):
        categories.add("Audio / Speech")
    if any(t in tags for t in ["modality:image", "modality:video"]):
        categories.add("Vision")
    if any(t in tags for t in ["modality:document"]):
        categories.add("Document / OCR")
    if any(t in tags for t in ["task_categories:robotics"]):
        categories.add("Robotics")
    if any(t in tags for t in ["task_categories:text-retrieval"]):
        categories.add("Retrieval / Embedding")

    if "math" in bid_lower or "aime" in bid_lower or "hmmt" in bid_lower or "gsm8k" in bid_lower:
        categories.add("Math / Reasoning")
    if "swe" in bid_lower or "terminal" in bid_lower or "compute-eval" in bid_lower:
        categories.add("Code / Engineering")
    if "agent" in bid_lower or "claw" in bid_lower or "apex-agent" in bid_lower or "wildclaw" in bid_lower or "yc-bench" in bid_lower:
        categories.add("Agents")
    if "mmlu" in bid_lower or "gpqa" in bid_lower or "hle" in bid_lower:
        categories.add("Knowledge")
    if "ocr" in bid_lower or "parse" in bid_lower or "mdp" in bid_lower:
        categories.add("Document / OCR")
    if "asr" in bid_lower:
        categories.add("Audio / Speech")
    if "screen" in bid_lower or "mmmu" in bid_lower or "video" in bid_lower or "pbench" in bid_lower:
        categories.add("Vision")
    if "evasion" in bid_lower or "lex" in bid_lower:
        categories.add("NLP / Classification")
    if "bright" in bid_lower or "arguana" in bid_lower:
        categories.add("Retrieval / Embedding")

    if not categories:
        categories.add("Other")

    return sorted(categories, key=lambda c: CATEGORY_ORDER.index(c) if c in CATEGORY_ORDER else 99)


# ---------------------------------------------------------------------------
# Data fetching & aggregation
# ---------------------------------------------------------------------------


def fetch_all_data() -> dict:
    token = _read_token()
    benchmarks = discover_benchmarks(token)
    all_models: set[str] = set()
    benchmark_data = []

    for bench in benchmarks:
        bid = bench["id"]
        entries = get_leaderboard(bid, token)
        models: set[str] = set()
        model_details = []
        for entry in entries:
            mid = entry.get("modelId") or entry.get("model_id") or entry.get("model") or ""
            if not mid:
                continue
            models.add(mid)
            model_details.append({
                "rank": entry.get("rank"),
                "model_id": mid,
                "value": entry.get("value"),
                "verified": entry.get("verified", False),
            })

        model_details.sort(key=lambda x: (x["rank"] is None, x["rank"] or 999))
        all_models.update(models)
        cats = categorize_benchmark(bench)
        display_name = BENCHMARK_DISPLAY_NAMES.get(bid, bid.split("/")[-1])

        benchmark_data.append({
            "id": bid,
            "display_name": display_name,
            "categories": cats,
            "num_models": len(models),
            "models": sorted(models),
            "model_details": model_details,
            "description": bench["description"],
        })

    cat_benchmarks: dict[str, list] = defaultdict(list)
    cat_models: dict[str, set] = defaultdict(set)
    for bd in benchmark_data:
        for cat in bd["categories"]:
            cat_benchmarks[cat].append(bd)
            cat_models[cat].update(bd["models"])

    return {
        "total_benchmarks": len(benchmarks),
        "total_unique_models": len(all_models),
        "benchmarks_with_entries": sum(1 for bd in benchmark_data if bd["num_models"] > 0),
        "benchmarks_empty": sum(1 for bd in benchmark_data if bd["num_models"] == 0),
        "timestamp": datetime.now(timezone.utc).isoformat(timespec="seconds"),
        "all_models": sorted(all_models),
        "benchmark_data": benchmark_data,
        "categories": {
            cat: {
                "benchmarks": len(cat_benchmarks[cat]),
                "unique_models": len(cat_models[cat]),
            }
            for cat in CATEGORY_ORDER
            if cat in cat_benchmarks
        },
    }


def load_cached_data() -> dict | None:
    if not CACHE_PATH.exists():
        return None
    try:
        d = json.loads(CACHE_PATH.read_text())
        ts = d.get("timestamp", "")
        if ts:
            age = (datetime.now(timezone.utc) - datetime.fromisoformat(ts)).total_seconds()
            if age < CACHE_TTL_SECONDS:
                return d
    except Exception:
        pass
    return None


def save_cache(data: dict) -> None:
    CACHE_PATH.write_text(json.dumps(data, indent=2))


# ---------------------------------------------------------------------------
# UI helpers
# ---------------------------------------------------------------------------

_app_data: dict = {}
_router_data: dict = {}        # model_id → {providers, cheapest_input, cheapest_output, context_length}
_model_meta_cache: dict = {}   # model_id → {license, params}


def _render_topbar(data: dict) -> str:
    ts = data.get("timestamp", "?")[:19]
    total_entries = sum(bd["num_models"] for bd in data.get("benchmark_data", []))
    return (
        f'<div class="topbar">'
        f'<div><div class="topbar-title">🏆 HF Hub Benchmark Dashboard</div>'
        f'<div class="topbar-meta">Last updated: {ts} UTC · auto-refreshes every 6h</div></div>'
        f'<div class="topbar-pills">'
        f'<div class="stat-pill"><b>{data["total_benchmarks"]}</b> benchmarks</div>'
        f'<div class="stat-pill"><b>{data["total_unique_models"]}</b> models</div>'
        f'<div class="stat-pill"><b>{total_entries:,}</b> entries</div>'
        f'<div class="stat-pill"><b>{data["benchmarks_with_entries"]}</b> active</div>'
        f'<div class="stat-pill"><b>{data["benchmarks_empty"]}</b> empty</div>'
        f'</div></div>'
    )


def _load_router_data(token: str | None = None) -> dict:
    """Fetch all inference-available models from the HF router (pricing + context)."""
    try:
        resp = _http_get_json("https://router.huggingface.co/v1/models", token, timeout=30)
    except Exception:
        return {}
    out: dict = {}
    for m in resp.get("data", []):
        mid = m.get("id", "")
        if not mid:
            continue
        live = [p for p in m.get("providers", []) if p.get("status") == "live"]
        if not live:
            continue
        cheapest_out = min(live, key=lambda p: p.get("pricing", {}).get("output", 1e9))
        cheapest_in  = min(live, key=lambda p: p.get("pricing", {}).get("input",  1e9))
        ttfts = [p["first_token_latency_ms"] for p in live if p.get("first_token_latency_ms")]
        throughputs = [p["throughput"] for p in live if p.get("throughput")]
        out[mid] = {
            "providers":        [p["provider"] for p in live],
            "cheapest_input":   cheapest_in.get("pricing",  {}).get("input"),
            "cheapest_output":  cheapest_out.get("pricing", {}).get("output"),
            "context_length":   max((p.get("context_length") or 0) for p in live),
            "fastest_ttft_ms":  min(ttfts)       if ttfts       else None,
            "fastest_throughput": max(throughputs) if throughputs else None,
        }
    return out


def _load_model_metas(model_ids: list[str], token: str | None = None) -> None:
    """Fetch license + param count for model_ids not yet cached. Fills _model_meta_cache."""
    to_fetch = [m for m in model_ids if m not in _model_meta_cache]
    if not to_fetch:
        return

    def _fetch_one(mid: str) -> tuple[str, dict]:
        url = f"https://huggingface.co/api/models/{mid}?expand[]=safetensors&expand[]=cardData"
        try:
            d = _http_get_json(url, token, timeout=10)
        except Exception:
            return mid, {}
        lic = (d.get("cardData") or {}).get("license", "")
        if not lic:
            for t in d.get("tags", []):
                if t.startswith("license:"):
                    lic = t[8:]
                    break
        total = (d.get("safetensors") or {}).get("total", 0)
        params = ""
        if total:
            b = total / 1e9
            params = f"{round(b)}B" if b >= 1 else f"{round(total / 1e6)}M"
        return mid, {"license": lic, "params": params}

    with concurrent.futures.ThreadPoolExecutor(max_workers=20) as ex:
        for mid, meta in ex.map(_fetch_one, to_fetch):
            _model_meta_cache[mid] = meta


def _sidebar_choices(data: dict) -> list[tuple[str, str]]:
    cats = data.get("categories", {})
    result = []
    for cat in CATEGORY_ORDER:
        if cat not in cats:
            continue
        icon = CATEGORY_ICONS.get(cat, "")
        count = cats[cat]["benchmarks"]
        result.append((f"{icon} {cat}  ({count})", cat))
    return result


def _card_choices(data: dict, category: str) -> list[tuple[str, str]]:
    bds = sorted(
        [bd for bd in data["benchmark_data"] if category in bd["categories"]],
        key=lambda x: x["num_models"], reverse=True,
    )
    choices = []
    for bd in bds:
        owner = bd["id"].split("/")[0] if "/" in bd["id"] else ""
        label = f"{bd['display_name']} · {bd['num_models']} models"
        if owner:
            label += f"  [{owner}]"
        choices.append((label, bd["id"]))
    return choices


def _cat_header(data: dict, cat: str) -> str:
    icon = CATEGORY_ICONS.get(cat, "")
    info = data.get("categories", {}).get(cat, {})
    return f"### {icon} {cat} &nbsp;·&nbsp; {info.get('benchmarks', 0)} benchmarks · {info.get('unique_models', 0)} models"


def _render_leaderboard(data: dict, bid: str, providers_only: bool = False) -> str:
    if not bid:
        return '<div class="hf-empty">Select a benchmark to view its leaderboard.</div>'

    lookup = {bd["id"]: bd for bd in data["benchmark_data"]}
    bd = lookup.get(bid, {})
    safe_bid = _html.escape(bid)
    hub_link = (
        f'<a class="hf-hub-link" href="https://huggingface.co/datasets/{safe_bid}" target="_blank">'
        f'↗ View on Hub</a>'
    )

    rows = _lb_rows(data, bid, providers_only)
    if not rows:
        return (
            f'<div class="hf-lb">'
            f'<div class="hf-lb-head">'
            f'<span class="hf-lb-title">{_html.escape(bd.get("display_name", bid))}</span>'
            f'{hub_link}</div>'
            f'<div class="hf-empty">No entries yet.</div></div>'
        )

    thead = "<tr>" + "".join(
        f"<th>{h}</th>"
        for h in ["", "Model", "Score", "In $/1M", "Out $/1M", "Context", "TTFT", "Throughput", "License", "Params", "Providers"]
    ) + "</tr>"

    tbody = []
    for rank, model_id, score, price_in, price_out, ctx, ttft, tput, lic, params, provs in rows:
        if rank == 1:   rank_html = "🥇"
        elif rank == 2: rank_html = "🥈"
        elif rank == 3: rank_html = "🥉"
        else:           rank_html = f'<span class="hf-rank-num">{rank}</span>'

        safe_mid = _html.escape(model_id)
        model_html = f'<a href="https://huggingface.co/{safe_mid}" target="_blank">{safe_mid}</a>'

        if provs != "—":
            chips = []
            for p in provs.split(","):
                p = p.strip()
                cls = "hf-chip hf-chip-more" if p.startswith("+") else "hf-chip"
                chips.append(f'<span class="{cls}">{_html.escape(p)}</span>')
            prov_html = f'<div class="hf-provs">{"".join(chips)}</div>'
        else:
            prov_html = '<span class="hf-na">—</span>'

        tbody.append(
            f'<tr>'
            f'<td class="hf-rank">{rank_html}</td>'
            f'<td class="hf-model">{model_html}</td>'
            f'<td class="hf-score">{_html.escape(str(score))}</td>'
            f'<td class="hf-price">{_html.escape(str(price_in))}</td>'
            f'<td class="hf-price">{_html.escape(str(price_out))}</td>'
            f'<td class="hf-ctx">{_html.escape(str(ctx))}</td>'
            f'<td class="hf-ttft">{_html.escape(str(ttft))}</td>'
            f'<td class="hf-tput">{_html.escape(str(tput))}</td>'
            f'<td class="hf-lic">{_html.escape(str(lic))}</td>'
            f'<td class="hf-params">{_html.escape(str(params))}</td>'
            f'<td>{prov_html}</td>'
            f'</tr>'
        )

    return (
        f'<div class="hf-lb">'
        f'<div class="hf-lb-head">'
        f'<span class="hf-lb-title">{_html.escape(bd.get("display_name", bid))}</span>'
        f'<div class="hf-lb-meta">'
        f'<span class="hf-lb-count">{len(rows)} entries</span>'
        f'{hub_link}</div></div>'
        f'<div class="hf-lb-scroll">'
        f'<table class="hf-table">'
        f'<thead>{thead}</thead>'
        f'<tbody>{"".join(tbody)}</tbody>'
        f'</table></div></div>'
    )


def _fmt_ctx(n: int) -> str:
    if n >= 1_000_000:
        v = n / 1_000_000
        return f"{v:.0f}M" if v == int(v) else f"{v:.1f}M"
    if n >= 1_000:
        v = n / 1_000
        return f"{v:.0f}K" if v == int(v) else f"{v:.1f}K"
    return str(n) if n else "—"


def _lb_rows(data: dict, bid: str, providers_only: bool = False) -> list[list]:
    lookup = {bd["id"]: bd for bd in data["benchmark_data"]}
    details = lookup.get(bid, {}).get("model_details", [])[:50]
    if not details:
        return []

    model_ids = [m["model_id"] for m in details]
    _load_model_metas(model_ids, _read_token())

    if providers_only:
        details = [m for m in details if _router_data.get(m["model_id"], {}).get("providers")]

    rows = []
    for m in details:
        mid = m["model_id"]
        meta = _model_meta_cache.get(mid, {})
        router = _router_data.get(mid, {})

        providers = router.get("providers", [])
        if providers:
            prov_str = ", ".join(providers[:3])
            if len(providers) > 3:
                prov_str += f" +{len(providers) - 3}"
        else:
            prov_str = "—"

        in_price  = router.get("cheapest_input")
        out_price = router.get("cheapest_output")
        price_in_str  = f"${in_price:.2f}"  if in_price  is not None else "—"
        price_out_str = f"${out_price:.2f}" if out_price is not None else "—"

        ctx_str = _fmt_ctx(router.get("context_length") or 0)

        ttft = router.get("fastest_ttft_ms")
        ttft_str = f"{ttft:,.0f} ms" if ttft is not None else "—"

        tput = router.get("fastest_throughput")
        tput_str = f"{tput:.0f} t/s" if tput is not None else "—"

        rows.append([
            m["rank"] if m["rank"] is not None else "—",
            mid,
            str(m["value"]) if m["value"] is not None else "—",
            price_in_str,
            price_out_str,
            ctx_str,
            ttft_str,
            tput_str,
            meta.get("license") or "—",
            meta.get("params") or "—",
            prov_str,
        ])
    return rows


# ---------------------------------------------------------------------------
# Gradio app
# ---------------------------------------------------------------------------

def build_app() -> gr.Blocks:
    global _app_data, _router_data
    _app_data = load_cached_data()
    if _app_data is None:
        _app_data = fetch_all_data()
        save_cache(_app_data)
    _router_data = _load_router_data(_read_token())

    s_choices = _sidebar_choices(_app_data)
    init_cat  = s_choices[0][1] if s_choices else ""
    c_choices = _card_choices(_app_data, init_cat)
    init_bid  = c_choices[0][1] if c_choices else ""

    with gr.Blocks(
        title="HF Hub Benchmark Dashboard",
        css=CUSTOM_CSS,
        theme=gr.themes.Soft(),
    ) as demo:

        topbar = gr.HTML(_render_topbar(_app_data))

        # Tracks the currently-selected benchmark for the filter toggle
        sel_bid = gr.State(init_bid)

        with gr.Row(equal_height=True):
            with gr.Column(scale=1, min_width=170, elem_id="sidebar-col"):
                cat_radio = gr.Radio(
                    choices=s_choices, value=init_cat,
                    label="Categories", elem_id="cat_radio",
                )

            with gr.Column(scale=5, elem_id="main-col"):
                with gr.Row():
                    cat_header = gr.Markdown(_cat_header(_app_data, init_cat))
                    refresh_btn = gr.Button("🔄 Refresh Now", variant="primary", scale=0, min_width=150)

                bench_radio = gr.Radio(
                    choices=c_choices, value=init_bid,
                    show_label=False, elem_id="bench_radio",
                )

                providers_filter = gr.Checkbox(
                    label="Only show models with inference providers",
                    value=False, elem_id="providers-filter",
                )

                leaderboard = gr.HTML(_render_leaderboard(_app_data, init_bid))

        # ---- Event handlers ----

        def on_cat(cat: str, prov_only: bool):
            new_c   = _card_choices(_app_data, cat)
            new_bid = new_c[0][1] if new_c else ""
            return (
                _cat_header(_app_data, cat),
                gr.update(choices=new_c, value=new_bid),
                _render_leaderboard(_app_data, new_bid, prov_only),
                new_bid,
            )

        def on_bench(bid: str, prov_only: bool):
            return _render_leaderboard(_app_data, bid, prov_only), bid

        def on_filter(bid: str, prov_only: bool):
            return _render_leaderboard(_app_data, bid, prov_only)

        def on_refresh(prov_only: bool):
            global _app_data, _router_data
            try:
                new_data = fetch_all_data()
                save_cache(new_data)
                _app_data = new_data
                _router_data = _load_router_data(_read_token())
            except Exception as e:
                err = f'<p style="color:#dc2626;padding:8px">⚠️ Refresh failed: {e}</p>'
                return _render_topbar(_app_data) + err, gr.update(), gr.update(), gr.update(), gr.update(), gr.update()

            new_s   = _sidebar_choices(_app_data)
            new_cat = new_s[0][1] if new_s else ""
            new_c   = _card_choices(_app_data, new_cat)
            new_bid = new_c[0][1] if new_c else ""
            return (
                _render_topbar(_app_data),
                gr.update(choices=new_s, value=new_cat),
                _cat_header(_app_data, new_cat),
                gr.update(choices=new_c, value=new_bid),
                _render_leaderboard(_app_data, new_bid, prov_only),
                new_bid,
            )

        cat_radio.change(
            fn=on_cat,
            inputs=[cat_radio, providers_filter],
            outputs=[cat_header, bench_radio, leaderboard, sel_bid],
        )
        bench_radio.change(
            fn=on_bench,
            inputs=[bench_radio, providers_filter],
            outputs=[leaderboard, sel_bid],
        )
        providers_filter.change(
            fn=on_filter,
            inputs=[sel_bid, providers_filter],
            outputs=[leaderboard],
        )
        refresh_btn.click(
            fn=on_refresh,
            inputs=[providers_filter],
            outputs=[topbar, cat_radio, cat_header, bench_radio, leaderboard, sel_bid],
            show_progress="full",
        )

    return demo


if __name__ == "__main__":
    demo = build_app()
    demo.launch()