"""ViTeX-Bench public leaderboard. Three tabs: * Leaderboard — full thirteen-metric vector for every approved method, rendered as styled HTML with TextScore highlighted as the explicit sort key. * Submit — upload eval.json from a successful `bash scripts/run_benchmark.sh` run; saved as a pending entry awaiting maintainer review. * Admin — passphrase-gated panel listing pending submissions with approve / reject actions. State lives in `submissions.jsonl` inside this Space repo. Each line is one JSON object; `status` is `pending` / `approved` / `rejected`. Pre-populated with the paper baselines (status=approved). Owner-only writes via HF_TOKEN (set as a Space secret). """ import html as _html import json import math import os import time from typing import Dict, List, Optional, Tuple import gradio as gr import pandas as pd from huggingface_hub import HfApi, hf_hub_download from huggingface_hub.utils import HfHubHTTPError REPO_ID = "ViTeX-Bench/ViTeX-Bench-Leaderboard" SUBMISSIONS_FILE = "submissions.jsonl" HF_TOKEN = os.environ.get("HF_TOKEN", "") ADMIN_PASSPHRASE = os.environ.get("ADMIN_PASSPHRASE", "") api = HfApi(token=HF_TOKEN) if HF_TOKEN else HfApi() METRIC_KEYS = [ "SeqAcc", "CharAcc", "TTS", "Flicker_full", "Flicker_crop", "Warp_full", "Warp_crop", "MUSIQ_full", "MUSIQ_crop", "PSNR_loc", "SSIM_loc", "LPIPS_loc", "DreamSim_loc", ] TEXT_KEYS = ["SeqAcc", "CharAcc", "TTS"] def _text_score(seq, char, tts): if seq is None or char is None or tts is None: return None if seq <= 0.0 or char <= 0.0 or tts <= 0.0: return 0.0 return math.exp((math.log(seq) + math.log(char) + math.log(tts)) / 3.0) # ---------- I/O ---------- def fetch_submissions() -> List[Dict]: """Read submissions.jsonl from the Space repo (or local fallback). Returns an empty list on any error so the leaderboard can still render instead of breaking the whole Gradio app. """ try: if os.path.exists(SUBMISSIONS_FILE): with open(SUBMISSIONS_FILE) as f: return [json.loads(line) for line in f if line.strip()] except Exception as e: print(f"[fetch_submissions] local read failed: {e}", flush=True) try: path = hf_hub_download(REPO_ID, SUBMISSIONS_FILE, repo_type="space", token=HF_TOKEN or None) with open(path) as f: return [json.loads(line) for line in f if line.strip()] except HfHubHTTPError as e: print(f"[fetch_submissions] hub download HTTP error: {e}", flush=True) return [] except Exception as e: print(f"[fetch_submissions] hub download failed: {e}", flush=True) return [] def write_submissions(items: List[Dict]) -> None: """Persist submissions.jsonl to the Space repo + local cache.""" content = "\n".join(json.dumps(x, ensure_ascii=False) for x in items) + "\n" with open(SUBMISSIONS_FILE, "w") as f: f.write(content) if HF_TOKEN: api.upload_file( path_or_fileobj=content.encode(), path_in_repo=SUBMISSIONS_FILE, repo_id=REPO_ID, repo_type="space", commit_message=f"Update submissions ({len(items)} entries)", ) # ---------- HTML leaderboard rendering ---------- LEADERBOARD_CSS = """ """ # Global JS injected via Blocks(js=...). It hooks every .vbench-tbl table # in the DOM (immediately + via MutationObserver as Gradio re-renders the # HTML component) and attaches click-to-sort + above/below-mean shading. # Inline