"""ViTeX-Bench public leaderboard.
Three tabs:
* Leaderboard — full thirteen-metric vector for every approved method,
rendered as styled HTML with TextScore highlighted as the
explicit sort key.
* Submit — upload eval.json from a successful `bash scripts/run_benchmark.sh`
run; saved as a pending entry awaiting maintainer review.
* Admin — passphrase-gated panel listing pending submissions with
approve / reject actions.
State lives in `submissions.jsonl` inside this Space repo. Each line is one
JSON object; `status` is `pending` / `approved` / `rejected`. Pre-populated
with the paper baselines (status=approved). Owner-only writes via HF_TOKEN
(set as a Space secret).
"""
import html as _html
import json
import math
import os
import time
from typing import Dict, List, Optional, Tuple
import gradio as gr
import pandas as pd
from huggingface_hub import HfApi, hf_hub_download
from huggingface_hub.utils import HfHubHTTPError
REPO_ID = "ViTeX-Bench/ViTeX-Bench-Leaderboard"
SUBMISSIONS_FILE = "submissions.jsonl"
HF_TOKEN = os.environ.get("HF_TOKEN", "")
ADMIN_PASSPHRASE = os.environ.get("ADMIN_PASSPHRASE", "")
api = HfApi(token=HF_TOKEN) if HF_TOKEN else HfApi()
METRIC_KEYS = [
"SeqAcc", "CharAcc", "TTS",
"Flicker_full", "Flicker_crop", "Warp_full", "Warp_crop",
"MUSIQ_full", "MUSIQ_crop",
"PSNR_loc", "SSIM_loc", "LPIPS_loc", "DreamSim_loc",
]
TEXT_KEYS = ["SeqAcc", "CharAcc", "TTS"]
def _text_score(seq, char, tts):
if seq is None or char is None or tts is None:
return None
if seq <= 0.0 or char <= 0.0 or tts <= 0.0:
return 0.0
return math.exp((math.log(seq) + math.log(char) + math.log(tts)) / 3.0)
# ---------- I/O ----------
def fetch_submissions() -> List[Dict]:
"""Read submissions.jsonl from the Space repo (or local fallback).
Returns an empty list on any error so the leaderboard can still render
instead of breaking the whole Gradio app.
"""
try:
if os.path.exists(SUBMISSIONS_FILE):
with open(SUBMISSIONS_FILE) as f:
return [json.loads(line) for line in f if line.strip()]
except Exception as e:
print(f"[fetch_submissions] local read failed: {e}", flush=True)
try:
path = hf_hub_download(REPO_ID, SUBMISSIONS_FILE, repo_type="space",
token=HF_TOKEN or None)
with open(path) as f:
return [json.loads(line) for line in f if line.strip()]
except HfHubHTTPError as e:
print(f"[fetch_submissions] hub download HTTP error: {e}", flush=True)
return []
except Exception as e:
print(f"[fetch_submissions] hub download failed: {e}", flush=True)
return []
def write_submissions(items: List[Dict]) -> None:
"""Persist submissions.jsonl to the Space repo + local cache."""
content = "\n".join(json.dumps(x, ensure_ascii=False) for x in items) + "\n"
with open(SUBMISSIONS_FILE, "w") as f:
f.write(content)
if HF_TOKEN:
api.upload_file(
path_or_fileobj=content.encode(),
path_in_repo=SUBMISSIONS_FILE,
repo_id=REPO_ID,
repo_type="space",
commit_message=f"Update submissions ({len(items)} entries)",
)
# ---------- HTML leaderboard rendering ----------
LEADERBOARD_CSS = """
"""
# Global JS injected via Blocks(js=...). It hooks every .vbench-tbl table
# in the DOM (immediately + via MutationObserver as Gradio re-renders the
# HTML component) and attaches click-to-sort + above/below-mean shading.
# Inline