Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| HF Hub Benchmark Dashboard β Gradio app. | |
| Run: python app.py | |
| """ | |
| import html as _html | |
| import json | |
| import urllib.request | |
| import urllib.error | |
| import concurrent.futures | |
| from collections import defaultdict | |
| from datetime import datetime, timezone | |
| from pathlib import Path | |
| import gradio as gr | |
| # --------------------------------------------------------------------------- | |
| # Constants | |
| # --------------------------------------------------------------------------- | |
| ROOT = Path(__file__).resolve().parent | |
| CACHE_PATH = ROOT / "dashboard_cache.json" | |
| CACHE_TTL_SECONDS = 6 * 60 * 60 # 6 hours | |
| CATEGORY_ORDER = [ | |
| "Knowledge", | |
| "Math / Reasoning", | |
| "Code / Engineering", | |
| "Agents", | |
| "Vision", | |
| "Audio / Speech", | |
| "Document / OCR", | |
| "Retrieval / Embedding", | |
| "NLP / Classification", | |
| "Robotics", | |
| "Other", | |
| ] | |
| CATEGORY_ICONS = { | |
| "Knowledge": "π§ ", | |
| "Math / Reasoning": "π’", | |
| "Code / Engineering": "π»", | |
| "Agents": "π€", | |
| "Vision": "ποΈ", | |
| "Audio / Speech": "π", | |
| "Document / OCR": "π", | |
| "Retrieval / Embedding": "π", | |
| "NLP / Classification": "π·οΈ", | |
| "Robotics": "π¦Ύ", | |
| "Other": "π¦", | |
| } | |
| BENCHMARK_DISPLAY_NAMES = { | |
| "openai/gsm8k": "GSM8K", | |
| "Idavidrein/gpqa": "GPQA", | |
| "allenai/olmOCR-bench": "olmOCR-Bench", | |
| "llamaindex/ParseBench": "ParseBench", | |
| "mercor/apex-agents": "APEX-Agents", | |
| "harborframework/terminal-bench-2.0": "Terminal-Bench 2.0", | |
| "SWE-bench/SWE-bench_Verified": "SWE-bench Verified", | |
| "TIGER-Lab/MMLU-Pro": "MMLU-Pro", | |
| "hf-audio/open-asr-leaderboard": "Open ASR Leaderboard", | |
| "MathArena/aime_2026": "AIME 2026", | |
| "claw-eval/Claw-Eval": "Claw-Eval", | |
| "cais/hle": "HLE", | |
| "likaixin/ScreenSpot-Pro": "ScreenSpot-Pro", | |
| "nvidia/compute-eval": "ComputeEval", | |
| "ScaleAI/SWE-bench_Pro": "SWE-bench Pro", | |
| "FutureMa/EvasionBench": "EvasionBench", | |
| "mteb/BRIGHT": "BRIGHT", | |
| "Delores-Lin/MDPBench": "MDPBench", | |
| "mteb/arguana": "ArguAna", | |
| "MMMU/MMMU_Pro": "MMMU-Pro", | |
| "LEXam-Benchmark/LEXam": "LEXam", | |
| "mercor/ACE": "ACE", | |
| "mercor/APEX-v1-extended": "APEX-v1", | |
| "VLABench/vlabench_primitive_ft_lerobot_video": "VLABench", | |
| "tiiuae/PBench": "PBench", | |
| "MathArena/hmmt_feb_2026": "HMMT Feb 2026", | |
| "collinear-ai/yc-bench": "YC-Bench", | |
| "internlm/WildClawBench": "WildClawBench", | |
| "MME-Benchmarks/Video-MME-v2": "Video-MME v2", | |
| "open-agent-leaderboard/results": "Open Agent Leaderboard", | |
| } | |
| CUSTOM_CSS = """ | |
| /* ---- Topbar ---- */ | |
| .topbar { | |
| display: flex; align-items: center; justify-content: space-between; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; padding: 14px 24px; border-radius: 10px; | |
| margin-bottom: 8px; flex-wrap: wrap; gap: 12px; | |
| } | |
| .topbar-title { font-size: 18px; font-weight: 700; margin-bottom: 2px; } | |
| .topbar-meta { font-size: 11px; opacity: 0.85; } | |
| .topbar-pills { display: flex; gap: 8px; flex-wrap: wrap; } | |
| .stat-pill { | |
| background: rgba(255,255,255,0.2); border-radius: 20px; | |
| padding: 4px 14px; font-size: 12px; white-space: nowrap; | |
| } | |
| .stat-pill b { font-size: 14px; } | |
| /* ---- Layout columns ---- */ | |
| #sidebar-col { | |
| background: white !important; padding: 0 !important; | |
| border-right: 1px solid #e5e7eb !important; | |
| border-radius: 10px 0 0 10px !important; | |
| } | |
| #main-col { | |
| background: #f8fafc !important; padding: 18px 22px !important; | |
| border-radius: 0 10px 10px 0 !important; min-width: 0 !important; | |
| } | |
| #sidebar-col > .form, #main-col > .form { | |
| background: transparent !important; box-shadow: none !important; | |
| border: none !important; padding: 0 !important; | |
| } | |
| /* ---- Sidebar Radio β nav buttons ---- */ | |
| #cat_radio { | |
| background: transparent !important; border: none !important; | |
| box-shadow: none !important; padding: 0 !important; | |
| } | |
| #cat_radio > .wrap { flex-direction: column !important; gap: 0 !important; padding: 0 !important; } | |
| #cat_radio label { | |
| display: flex !important; align-items: center !important; | |
| padding: 8px 12px !important; margin: 0 !important; | |
| border-left: 3px solid transparent !important; border-radius: 0 !important; | |
| cursor: pointer !important; font-size: 12px !important; | |
| color: #374151 !important; background: white !important; | |
| width: 100% !important; box-sizing: border-box !important; gap: 0 !important; | |
| } | |
| #cat_radio label:hover { background: #f3f4f6 !important; } | |
| #cat_radio label:has(input:checked) { | |
| background: #ede9fe !important; border-left-color: #7c3aed !important; | |
| color: #5b21b6 !important; font-weight: 600 !important; | |
| } | |
| #cat_radio input[type="radio"] { display: none !important; } | |
| #cat_radio .wrap span { margin-left: 0 !important; padding-left: 0 !important; } | |
| /* ---- Bench cards Radio ---- */ | |
| #bench_radio { | |
| background: transparent !important; border: none !important; | |
| box-shadow: none !important; padding: 0 !important; | |
| } | |
| #bench_radio > .wrap { | |
| flex-direction: row !important; flex-wrap: wrap !important; | |
| gap: 10px !important; padding: 4px 0 12px !important; | |
| } | |
| #bench_radio label { | |
| display: flex !important; align-items: center !important; | |
| padding: 10px 14px !important; border: 2px solid #e5e7eb !important; | |
| border-radius: 10px !important; cursor: pointer !important; | |
| font-size: 12px !important; background: white !important; | |
| color: #374151 !important; min-width: 150px !important; | |
| margin: 0 !important; gap: 0 !important; transition: border-color 0.15s !important; | |
| } | |
| #bench_radio label:hover { border-color: #a78bfa !important; } | |
| #bench_radio label:has(input:checked) { | |
| border-color: #7c3aed !important; background: #faf5ff !important; | |
| font-weight: 600 !important; color: #5b21b6 !important; | |
| } | |
| #bench_radio input[type="radio"] { display: none !important; } | |
| #bench_radio .wrap span { margin-left: 0 !important; padding-left: 0 !important; } | |
| /* ---- Filter checkbox ---- */ | |
| #providers-filter { margin: 2px 0 10px; } | |
| #providers-filter > label { font-size: 12px !important; color: #6b7280 !important; } | |
| /* ---- Layout columns ---- */ | |
| #sidebar-col { | |
| background: white !important; padding: 0 !important; | |
| border-right: 1px solid #e5e7eb !important; | |
| border-radius: 10px 0 0 10px !important; | |
| } | |
| #main-col { | |
| background: #f8fafc !important; padding: 18px 22px !important; | |
| border-radius: 0 10px 10px 0 !important; min-width: 0 !important; | |
| } | |
| #sidebar-col > .form, #main-col > .form { | |
| background: transparent !important; box-shadow: none !important; | |
| border: none !important; padding: 0 !important; | |
| } | |
| /* ---- Sidebar HTML ---- */ | |
| .hf-sidebar { display: flex; flex-direction: column; padding: 10px 0; } | |
| .hf-sidebar-label { | |
| font-size: 10px; font-weight: 700; color: #9ca3af; | |
| text-transform: uppercase; letter-spacing: 0.8px; padding: 0 16px 8px; | |
| } | |
| .hf-cat-btn { | |
| display: flex; align-items: center; gap: 9px; width: 100%; | |
| padding: 9px 16px; border: none; background: none; cursor: pointer; | |
| border-left: 3px solid transparent; font-size: 13px; color: #374151; | |
| text-align: left; transition: background 0.1s; | |
| } | |
| .hf-cat-btn:hover { background: #f3f4f6; } | |
| .hf-cat-active { | |
| background: #ede9fe !important; border-left-color: #7c3aed !important; | |
| color: #5b21b6 !important; font-weight: 600; | |
| } | |
| .hf-cat-active .hf-cat-badge { background: #ddd6fe !important; color: #7c3aed !important; } | |
| .hf-cat-icon { font-size: 15px; min-width: 20px; } | |
| .hf-cat-name { flex: 1; } | |
| .hf-cat-badge { | |
| background: #f3f4f6; border-radius: 12px; | |
| padding: 1px 8px; font-size: 11px; color: #6b7280; | |
| } | |
| /* ---- Benchmark cards HTML ---- */ | |
| .hf-section-head { display: flex; align-items: baseline; gap: 12px; margin-bottom: 12px; } | |
| .hf-section-title { font-size: 16px; font-weight: 700; color: #111827; } | |
| .hf-section-meta { font-size: 12px; color: #9ca3af; } | |
| .hf-cards { display: flex; flex-wrap: wrap; gap: 10px; margin-bottom: 14px; } | |
| .hf-card { | |
| border: 2px solid #e5e7eb; border-radius: 10px; padding: 10px 14px; | |
| cursor: pointer; background: white; min-width: 150px; | |
| transition: border-color 0.15s, box-shadow 0.15s; | |
| } | |
| .hf-card:hover { border-color: #a78bfa; box-shadow: 0 1px 6px rgba(124,58,237,0.1); } | |
| .hf-card-active { border-color: #7c3aed !important; background: #faf5ff !important; } | |
| .hf-card-name { font-size: 13px; font-weight: 600; color: #111827; } | |
| .hf-card-active .hf-card-name { color: #5b21b6; } | |
| .hf-card-count { font-size: 11px; color: #6b7280; margin-top: 3px; } | |
| .hf-card-owner { font-size: 10px; color: #9ca3af; margin-top: 2px; } | |
| /* ---- JS bridge textboxes (rendered but invisible) ---- */ | |
| #cat_trigger, #bench_trigger { | |
| display: none !important; | |
| position: absolute !important; | |
| pointer-events: none !important; | |
| } | |
| /* ---- Filter checkbox ---- */ | |
| #providers-filter { margin: 2px 0 10px; } | |
| #providers-filter > label { font-size: 12px !important; color: #6b7280 !important; } | |
| /* ---- Leaderboard HTML ---- */ | |
| .hf-lb { border: 1px solid #e5e7eb; border-radius: 10px; overflow: hidden; background: white; } | |
| .hf-lb-head { | |
| display: flex; align-items: center; justify-content: space-between; | |
| padding: 10px 16px; border-bottom: 1px solid #f3f4f6; background: #f9fafb; | |
| } | |
| .hf-lb-title { font-size: 13px; font-weight: 600; color: #374151; } | |
| .hf-lb-meta { display: flex; align-items: center; gap: 12px; } | |
| .hf-lb-count { font-size: 11px; color: #9ca3af; } | |
| .hf-hub-link { font-size: 11px; color: #7c3aed; text-decoration: none; font-weight: 500; } | |
| .hf-hub-link:hover { text-decoration: underline; } | |
| .hf-lb-scroll { overflow-x: auto; } | |
| .hf-table { width: 100%; border-collapse: collapse; font-size: 12px; } | |
| .hf-table thead th { | |
| padding: 7px 12px; text-align: left; font-size: 10px; font-weight: 700; | |
| color: #6b7280; text-transform: uppercase; letter-spacing: 0.4px; | |
| white-space: nowrap; background: white; border-bottom: 1px solid #f3f4f6; | |
| } | |
| .hf-table td { padding: 7px 12px; border-bottom: 1px solid #f3f4f6; vertical-align: middle; } | |
| .hf-table tbody tr:last-child td { border-bottom: none; } | |
| .hf-table tbody tr:nth-child(even) td { background: #fafafa; } | |
| .hf-table tbody tr:hover td { background: #faf5ff !important; } | |
| .hf-rank { width: 44px; text-align: center; font-size: 17px; } | |
| .hf-rank-num { color: #9ca3af; font-size: 12px; font-variant-numeric: tabular-nums; } | |
| .hf-model a { color: #2563eb; text-decoration: none; font-size: 11px; word-break: break-all; } | |
| .hf-model a:hover { text-decoration: underline; } | |
| .hf-score { font-variant-numeric: tabular-nums; font-weight: 600; color: #111827; white-space: nowrap; } | |
| .hf-price { font-variant-numeric: tabular-nums; color: #059669; white-space: nowrap; } | |
| .hf-ctx, .hf-params, .hf-ttft, .hf-tput { white-space: nowrap; } | |
| .hf-ttft { color: #7c3aed; } | |
| .hf-tput { color: #0369a1; } | |
| .hf-lic { color: #6b7280; font-size: 11px; } | |
| .hf-params { font-weight: 500; } | |
| .hf-provs { display: flex; flex-wrap: wrap; gap: 3px; } | |
| .hf-chip { | |
| background: #dbeafe; color: #1e40af; | |
| border-radius: 4px; padding: 1px 6px; | |
| font-size: 10px; font-weight: 500; white-space: nowrap; | |
| } | |
| .hf-chip-more { background: #f3f4f6 !important; color: #6b7280 !important; } | |
| .hf-na { color: #d1d5db; } | |
| .hf-empty { padding: 48px 24px; text-align: center; color: #9ca3af; font-size: 14px; } | |
| """ | |
| # --------------------------------------------------------------------------- | |
| # HF API helpers | |
| # --------------------------------------------------------------------------- | |
| def _http_get_json(url: str, token: str | None = None, timeout: int = 30): | |
| req = urllib.request.Request(url, headers={"Accept": "application/json"}) | |
| if token: | |
| req.add_header("Authorization", f"Bearer {token}") | |
| with urllib.request.urlopen(req, timeout=timeout) as resp: | |
| return json.loads(resp.read().decode("utf-8")) | |
| def _read_token() -> str | None: | |
| import os | |
| p = Path(os.path.expanduser("~/.cache/huggingface/token")) | |
| if p.exists(): | |
| tok = p.read_text().strip() | |
| if tok: | |
| return tok | |
| return os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") | |
| def discover_benchmarks(token=None) -> list[dict]: | |
| url = "https://huggingface.co/api/datasets?filter=benchmark:official&limit=1000" | |
| data = _http_get_json(url, token, timeout=30) | |
| results = [] | |
| for d in data: | |
| if not isinstance(d, dict) or "id" not in d: | |
| continue | |
| results.append({ | |
| "id": d["id"], | |
| "tags": d.get("tags", []), | |
| "description": (d.get("description") or "")[:200], | |
| }) | |
| return results | |
| def get_leaderboard(dataset_id: str, token=None) -> list[dict]: | |
| url = f"https://huggingface.co/api/datasets/{dataset_id}/leaderboard" | |
| try: | |
| data = _http_get_json(url, token, timeout=30) | |
| except (urllib.error.HTTPError, urllib.error.URLError): | |
| return [] | |
| if isinstance(data, dict) and "entries" in data: | |
| data = data["entries"] | |
| return data if isinstance(data, list) else [] | |
| # --------------------------------------------------------------------------- | |
| # Categorisation | |
| # --------------------------------------------------------------------------- | |
| def categorize_benchmark(bench: dict) -> list[str]: | |
| tags = bench.get("tags", []) | |
| bid = bench["id"] | |
| bid_lower = bid.lower() | |
| categories = set() | |
| if any(t in tags for t in ["modality:audio", "modality:speech"]): | |
| categories.add("Audio / Speech") | |
| if any(t in tags for t in ["modality:image", "modality:video"]): | |
| categories.add("Vision") | |
| if any(t in tags for t in ["modality:document"]): | |
| categories.add("Document / OCR") | |
| if any(t in tags for t in ["task_categories:robotics"]): | |
| categories.add("Robotics") | |
| if any(t in tags for t in ["task_categories:text-retrieval"]): | |
| categories.add("Retrieval / Embedding") | |
| if "math" in bid_lower or "aime" in bid_lower or "hmmt" in bid_lower or "gsm8k" in bid_lower: | |
| categories.add("Math / Reasoning") | |
| if "swe" in bid_lower or "terminal" in bid_lower or "compute-eval" in bid_lower: | |
| categories.add("Code / Engineering") | |
| if "agent" in bid_lower or "claw" in bid_lower or "apex-agent" in bid_lower or "wildclaw" in bid_lower or "yc-bench" in bid_lower: | |
| categories.add("Agents") | |
| if "mmlu" in bid_lower or "gpqa" in bid_lower or "hle" in bid_lower: | |
| categories.add("Knowledge") | |
| if "ocr" in bid_lower or "parse" in bid_lower or "mdp" in bid_lower: | |
| categories.add("Document / OCR") | |
| if "asr" in bid_lower: | |
| categories.add("Audio / Speech") | |
| if "screen" in bid_lower or "mmmu" in bid_lower or "video" in bid_lower or "pbench" in bid_lower: | |
| categories.add("Vision") | |
| if "evasion" in bid_lower or "lex" in bid_lower: | |
| categories.add("NLP / Classification") | |
| if "bright" in bid_lower or "arguana" in bid_lower: | |
| categories.add("Retrieval / Embedding") | |
| if not categories: | |
| categories.add("Other") | |
| return sorted(categories, key=lambda c: CATEGORY_ORDER.index(c) if c in CATEGORY_ORDER else 99) | |
| # --------------------------------------------------------------------------- | |
| # Data fetching & aggregation | |
| # --------------------------------------------------------------------------- | |
| def fetch_all_data() -> dict: | |
| token = _read_token() | |
| benchmarks = discover_benchmarks(token) | |
| all_models: set[str] = set() | |
| benchmark_data = [] | |
| for bench in benchmarks: | |
| bid = bench["id"] | |
| entries = get_leaderboard(bid, token) | |
| models: set[str] = set() | |
| model_details = [] | |
| for entry in entries: | |
| mid = entry.get("modelId") or entry.get("model_id") or entry.get("model") or "" | |
| if not mid: | |
| continue | |
| models.add(mid) | |
| model_details.append({ | |
| "rank": entry.get("rank"), | |
| "model_id": mid, | |
| "value": entry.get("value"), | |
| "verified": entry.get("verified", False), | |
| }) | |
| model_details.sort(key=lambda x: (x["rank"] is None, x["rank"] or 999)) | |
| all_models.update(models) | |
| cats = categorize_benchmark(bench) | |
| display_name = BENCHMARK_DISPLAY_NAMES.get(bid, bid.split("/")[-1]) | |
| benchmark_data.append({ | |
| "id": bid, | |
| "display_name": display_name, | |
| "categories": cats, | |
| "num_models": len(models), | |
| "models": sorted(models), | |
| "model_details": model_details, | |
| "description": bench["description"], | |
| }) | |
| cat_benchmarks: dict[str, list] = defaultdict(list) | |
| cat_models: dict[str, set] = defaultdict(set) | |
| for bd in benchmark_data: | |
| for cat in bd["categories"]: | |
| cat_benchmarks[cat].append(bd) | |
| cat_models[cat].update(bd["models"]) | |
| return { | |
| "total_benchmarks": len(benchmarks), | |
| "total_unique_models": len(all_models), | |
| "benchmarks_with_entries": sum(1 for bd in benchmark_data if bd["num_models"] > 0), | |
| "benchmarks_empty": sum(1 for bd in benchmark_data if bd["num_models"] == 0), | |
| "timestamp": datetime.now(timezone.utc).isoformat(timespec="seconds"), | |
| "all_models": sorted(all_models), | |
| "benchmark_data": benchmark_data, | |
| "categories": { | |
| cat: { | |
| "benchmarks": len(cat_benchmarks[cat]), | |
| "unique_models": len(cat_models[cat]), | |
| } | |
| for cat in CATEGORY_ORDER | |
| if cat in cat_benchmarks | |
| }, | |
| } | |
| def load_cached_data() -> dict | None: | |
| if not CACHE_PATH.exists(): | |
| return None | |
| try: | |
| d = json.loads(CACHE_PATH.read_text()) | |
| ts = d.get("timestamp", "") | |
| if ts: | |
| age = (datetime.now(timezone.utc) - datetime.fromisoformat(ts)).total_seconds() | |
| if age < CACHE_TTL_SECONDS: | |
| return d | |
| except Exception: | |
| pass | |
| return None | |
| def save_cache(data: dict) -> None: | |
| CACHE_PATH.write_text(json.dumps(data, indent=2)) | |
| # --------------------------------------------------------------------------- | |
| # UI helpers | |
| # --------------------------------------------------------------------------- | |
| _app_data: dict = {} | |
| _router_data: dict = {} # model_id β {providers, cheapest_input, cheapest_output, context_length} | |
| _model_meta_cache: dict = {} # model_id β {license, params} | |
| def _render_topbar(data: dict) -> str: | |
| ts = data.get("timestamp", "?")[:19] | |
| total_entries = sum(bd["num_models"] for bd in data.get("benchmark_data", [])) | |
| return ( | |
| f'<div class="topbar">' | |
| f'<div><div class="topbar-title">π HF Hub Benchmark Dashboard</div>' | |
| f'<div class="topbar-meta">Last updated: {ts} UTC Β· auto-refreshes every 6h</div></div>' | |
| f'<div class="topbar-pills">' | |
| f'<div class="stat-pill"><b>{data["total_benchmarks"]}</b> benchmarks</div>' | |
| f'<div class="stat-pill"><b>{data["total_unique_models"]}</b> models</div>' | |
| f'<div class="stat-pill"><b>{total_entries:,}</b> entries</div>' | |
| f'<div class="stat-pill"><b>{data["benchmarks_with_entries"]}</b> active</div>' | |
| f'<div class="stat-pill"><b>{data["benchmarks_empty"]}</b> empty</div>' | |
| f'</div></div>' | |
| ) | |
| def _load_router_data(token: str | None = None) -> dict: | |
| """Fetch all inference-available models from the HF router (pricing + context).""" | |
| try: | |
| resp = _http_get_json("https://router.huggingface.co/v1/models", token, timeout=30) | |
| except Exception: | |
| return {} | |
| out: dict = {} | |
| for m in resp.get("data", []): | |
| mid = m.get("id", "") | |
| if not mid: | |
| continue | |
| live = [p for p in m.get("providers", []) if p.get("status") == "live"] | |
| if not live: | |
| continue | |
| cheapest_out = min(live, key=lambda p: p.get("pricing", {}).get("output", 1e9)) | |
| cheapest_in = min(live, key=lambda p: p.get("pricing", {}).get("input", 1e9)) | |
| ttfts = [p["first_token_latency_ms"] for p in live if p.get("first_token_latency_ms")] | |
| throughputs = [p["throughput"] for p in live if p.get("throughput")] | |
| out[mid] = { | |
| "providers": [p["provider"] for p in live], | |
| "cheapest_input": cheapest_in.get("pricing", {}).get("input"), | |
| "cheapest_output": cheapest_out.get("pricing", {}).get("output"), | |
| "context_length": max((p.get("context_length") or 0) for p in live), | |
| "fastest_ttft_ms": min(ttfts) if ttfts else None, | |
| "fastest_throughput": max(throughputs) if throughputs else None, | |
| } | |
| return out | |
| def _load_model_metas(model_ids: list[str], token: str | None = None) -> None: | |
| """Fetch license + param count for model_ids not yet cached. Fills _model_meta_cache.""" | |
| to_fetch = [m for m in model_ids if m not in _model_meta_cache] | |
| if not to_fetch: | |
| return | |
| def _fetch_one(mid: str) -> tuple[str, dict]: | |
| url = f"https://huggingface.co/api/models/{mid}?expand[]=safetensors&expand[]=cardData" | |
| try: | |
| d = _http_get_json(url, token, timeout=10) | |
| except Exception: | |
| return mid, {} | |
| lic = (d.get("cardData") or {}).get("license", "") | |
| if not lic: | |
| for t in d.get("tags", []): | |
| if t.startswith("license:"): | |
| lic = t[8:] | |
| break | |
| total = (d.get("safetensors") or {}).get("total", 0) | |
| params = "" | |
| if total: | |
| b = total / 1e9 | |
| params = f"{round(b)}B" if b >= 1 else f"{round(total / 1e6)}M" | |
| return mid, {"license": lic, "params": params} | |
| with concurrent.futures.ThreadPoolExecutor(max_workers=20) as ex: | |
| for mid, meta in ex.map(_fetch_one, to_fetch): | |
| _model_meta_cache[mid] = meta | |
| def _sidebar_choices(data: dict) -> list[tuple[str, str]]: | |
| cats = data.get("categories", {}) | |
| result = [] | |
| for cat in CATEGORY_ORDER: | |
| if cat not in cats: | |
| continue | |
| icon = CATEGORY_ICONS.get(cat, "") | |
| count = cats[cat]["benchmarks"] | |
| result.append((f"{icon} {cat} ({count})", cat)) | |
| return result | |
| def _card_choices(data: dict, category: str) -> list[tuple[str, str]]: | |
| bds = sorted( | |
| [bd for bd in data["benchmark_data"] if category in bd["categories"]], | |
| key=lambda x: x["num_models"], reverse=True, | |
| ) | |
| choices = [] | |
| for bd in bds: | |
| owner = bd["id"].split("/")[0] if "/" in bd["id"] else "" | |
| label = f"{bd['display_name']} Β· {bd['num_models']} models" | |
| if owner: | |
| label += f" [{owner}]" | |
| choices.append((label, bd["id"])) | |
| return choices | |
| def _cat_header(data: dict, cat: str) -> str: | |
| icon = CATEGORY_ICONS.get(cat, "") | |
| info = data.get("categories", {}).get(cat, {}) | |
| return f"### {icon} {cat} Β· {info.get('benchmarks', 0)} benchmarks Β· {info.get('unique_models', 0)} models" | |
| def _render_leaderboard(data: dict, bid: str, providers_only: bool = False) -> str: | |
| if not bid: | |
| return '<div class="hf-empty">Select a benchmark to view its leaderboard.</div>' | |
| lookup = {bd["id"]: bd for bd in data["benchmark_data"]} | |
| bd = lookup.get(bid, {}) | |
| safe_bid = _html.escape(bid) | |
| hub_link = ( | |
| f'<a class="hf-hub-link" href="https://huggingface.co/datasets/{safe_bid}" target="_blank">' | |
| f'β View on Hub</a>' | |
| ) | |
| rows = _lb_rows(data, bid, providers_only) | |
| if not rows: | |
| return ( | |
| f'<div class="hf-lb">' | |
| f'<div class="hf-lb-head">' | |
| f'<span class="hf-lb-title">{_html.escape(bd.get("display_name", bid))}</span>' | |
| f'{hub_link}</div>' | |
| f'<div class="hf-empty">No entries yet.</div></div>' | |
| ) | |
| thead = "<tr>" + "".join( | |
| f"<th>{h}</th>" | |
| for h in ["", "Model", "Score", "In $/1M", "Out $/1M", "Context", "TTFT", "Throughput", "License", "Params", "Providers"] | |
| ) + "</tr>" | |
| tbody = [] | |
| for rank, model_id, score, price_in, price_out, ctx, ttft, tput, lic, params, provs in rows: | |
| if rank == 1: rank_html = "π₯" | |
| elif rank == 2: rank_html = "π₯" | |
| elif rank == 3: rank_html = "π₯" | |
| else: rank_html = f'<span class="hf-rank-num">{rank}</span>' | |
| safe_mid = _html.escape(model_id) | |
| model_html = f'<a href="https://huggingface.co/{safe_mid}" target="_blank">{safe_mid}</a>' | |
| if provs != "β": | |
| chips = [] | |
| for p in provs.split(","): | |
| p = p.strip() | |
| cls = "hf-chip hf-chip-more" if p.startswith("+") else "hf-chip" | |
| chips.append(f'<span class="{cls}">{_html.escape(p)}</span>') | |
| prov_html = f'<div class="hf-provs">{"".join(chips)}</div>' | |
| else: | |
| prov_html = '<span class="hf-na">β</span>' | |
| tbody.append( | |
| f'<tr>' | |
| f'<td class="hf-rank">{rank_html}</td>' | |
| f'<td class="hf-model">{model_html}</td>' | |
| f'<td class="hf-score">{_html.escape(str(score))}</td>' | |
| f'<td class="hf-price">{_html.escape(str(price_in))}</td>' | |
| f'<td class="hf-price">{_html.escape(str(price_out))}</td>' | |
| f'<td class="hf-ctx">{_html.escape(str(ctx))}</td>' | |
| f'<td class="hf-ttft">{_html.escape(str(ttft))}</td>' | |
| f'<td class="hf-tput">{_html.escape(str(tput))}</td>' | |
| f'<td class="hf-lic">{_html.escape(str(lic))}</td>' | |
| f'<td class="hf-params">{_html.escape(str(params))}</td>' | |
| f'<td>{prov_html}</td>' | |
| f'</tr>' | |
| ) | |
| return ( | |
| f'<div class="hf-lb">' | |
| f'<div class="hf-lb-head">' | |
| f'<span class="hf-lb-title">{_html.escape(bd.get("display_name", bid))}</span>' | |
| f'<div class="hf-lb-meta">' | |
| f'<span class="hf-lb-count">{len(rows)} entries</span>' | |
| f'{hub_link}</div></div>' | |
| f'<div class="hf-lb-scroll">' | |
| f'<table class="hf-table">' | |
| f'<thead>{thead}</thead>' | |
| f'<tbody>{"".join(tbody)}</tbody>' | |
| f'</table></div></div>' | |
| ) | |
| def _fmt_ctx(n: int) -> str: | |
| if n >= 1_000_000: | |
| v = n / 1_000_000 | |
| return f"{v:.0f}M" if v == int(v) else f"{v:.1f}M" | |
| if n >= 1_000: | |
| v = n / 1_000 | |
| return f"{v:.0f}K" if v == int(v) else f"{v:.1f}K" | |
| return str(n) if n else "β" | |
| def _lb_rows(data: dict, bid: str, providers_only: bool = False) -> list[list]: | |
| lookup = {bd["id"]: bd for bd in data["benchmark_data"]} | |
| details = lookup.get(bid, {}).get("model_details", [])[:50] | |
| if not details: | |
| return [] | |
| model_ids = [m["model_id"] for m in details] | |
| _load_model_metas(model_ids, _read_token()) | |
| if providers_only: | |
| details = [m for m in details if _router_data.get(m["model_id"], {}).get("providers")] | |
| rows = [] | |
| for m in details: | |
| mid = m["model_id"] | |
| meta = _model_meta_cache.get(mid, {}) | |
| router = _router_data.get(mid, {}) | |
| providers = router.get("providers", []) | |
| if providers: | |
| prov_str = ", ".join(providers[:3]) | |
| if len(providers) > 3: | |
| prov_str += f" +{len(providers) - 3}" | |
| else: | |
| prov_str = "β" | |
| in_price = router.get("cheapest_input") | |
| out_price = router.get("cheapest_output") | |
| price_in_str = f"${in_price:.2f}" if in_price is not None else "β" | |
| price_out_str = f"${out_price:.2f}" if out_price is not None else "β" | |
| ctx_str = _fmt_ctx(router.get("context_length") or 0) | |
| ttft = router.get("fastest_ttft_ms") | |
| ttft_str = f"{ttft:,.0f} ms" if ttft is not None else "β" | |
| tput = router.get("fastest_throughput") | |
| tput_str = f"{tput:.0f} t/s" if tput is not None else "β" | |
| rows.append([ | |
| m["rank"] if m["rank"] is not None else "β", | |
| mid, | |
| str(m["value"]) if m["value"] is not None else "β", | |
| price_in_str, | |
| price_out_str, | |
| ctx_str, | |
| ttft_str, | |
| tput_str, | |
| meta.get("license") or "β", | |
| meta.get("params") or "β", | |
| prov_str, | |
| ]) | |
| return rows | |
| # --------------------------------------------------------------------------- | |
| # Gradio app | |
| # --------------------------------------------------------------------------- | |
| def build_app() -> gr.Blocks: | |
| global _app_data, _router_data | |
| _app_data = load_cached_data() | |
| if _app_data is None: | |
| _app_data = fetch_all_data() | |
| save_cache(_app_data) | |
| _router_data = _load_router_data(_read_token()) | |
| s_choices = _sidebar_choices(_app_data) | |
| init_cat = s_choices[0][1] if s_choices else "" | |
| c_choices = _card_choices(_app_data, init_cat) | |
| init_bid = c_choices[0][1] if c_choices else "" | |
| with gr.Blocks( | |
| title="HF Hub Benchmark Dashboard", | |
| css=CUSTOM_CSS, | |
| theme=gr.themes.Soft(), | |
| ) as demo: | |
| topbar = gr.HTML(_render_topbar(_app_data)) | |
| # Tracks the currently-selected benchmark for the filter toggle | |
| sel_bid = gr.State(init_bid) | |
| with gr.Row(equal_height=True): | |
| with gr.Column(scale=1, min_width=170, elem_id="sidebar-col"): | |
| cat_radio = gr.Radio( | |
| choices=s_choices, value=init_cat, | |
| label="Categories", elem_id="cat_radio", | |
| ) | |
| with gr.Column(scale=5, elem_id="main-col"): | |
| with gr.Row(): | |
| cat_header = gr.Markdown(_cat_header(_app_data, init_cat)) | |
| refresh_btn = gr.Button("π Refresh Now", variant="primary", scale=0, min_width=150) | |
| bench_radio = gr.Radio( | |
| choices=c_choices, value=init_bid, | |
| show_label=False, elem_id="bench_radio", | |
| ) | |
| providers_filter = gr.Checkbox( | |
| label="Only show models with inference providers", | |
| value=False, elem_id="providers-filter", | |
| ) | |
| leaderboard = gr.HTML(_render_leaderboard(_app_data, init_bid)) | |
| # ---- Event handlers ---- | |
| def on_cat(cat: str, prov_only: bool): | |
| new_c = _card_choices(_app_data, cat) | |
| new_bid = new_c[0][1] if new_c else "" | |
| return ( | |
| _cat_header(_app_data, cat), | |
| gr.update(choices=new_c, value=new_bid), | |
| _render_leaderboard(_app_data, new_bid, prov_only), | |
| new_bid, | |
| ) | |
| def on_bench(bid: str, prov_only: bool): | |
| return _render_leaderboard(_app_data, bid, prov_only), bid | |
| def on_filter(bid: str, prov_only: bool): | |
| return _render_leaderboard(_app_data, bid, prov_only) | |
| def on_refresh(prov_only: bool): | |
| global _app_data, _router_data | |
| try: | |
| new_data = fetch_all_data() | |
| save_cache(new_data) | |
| _app_data = new_data | |
| _router_data = _load_router_data(_read_token()) | |
| except Exception as e: | |
| err = f'<p style="color:#dc2626;padding:8px">β οΈ Refresh failed: {e}</p>' | |
| return _render_topbar(_app_data) + err, gr.update(), gr.update(), gr.update(), gr.update(), gr.update() | |
| new_s = _sidebar_choices(_app_data) | |
| new_cat = new_s[0][1] if new_s else "" | |
| new_c = _card_choices(_app_data, new_cat) | |
| new_bid = new_c[0][1] if new_c else "" | |
| return ( | |
| _render_topbar(_app_data), | |
| gr.update(choices=new_s, value=new_cat), | |
| _cat_header(_app_data, new_cat), | |
| gr.update(choices=new_c, value=new_bid), | |
| _render_leaderboard(_app_data, new_bid, prov_only), | |
| new_bid, | |
| ) | |
| cat_radio.change( | |
| fn=on_cat, | |
| inputs=[cat_radio, providers_filter], | |
| outputs=[cat_header, bench_radio, leaderboard, sel_bid], | |
| ) | |
| bench_radio.change( | |
| fn=on_bench, | |
| inputs=[bench_radio, providers_filter], | |
| outputs=[leaderboard, sel_bid], | |
| ) | |
| providers_filter.change( | |
| fn=on_filter, | |
| inputs=[sel_bid, providers_filter], | |
| outputs=[leaderboard], | |
| ) | |
| refresh_btn.click( | |
| fn=on_refresh, | |
| inputs=[providers_filter], | |
| outputs=[topbar, cat_radio, cat_header, bench_radio, leaderboard, sel_bid], | |
| show_progress="full", | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = build_app() | |
| demo.launch() | |