Spaces:
Sleeping
Sleeping
| # scripts/sync_lmarena.py | |
| from __future__ import annotations | |
| import json | |
| import os | |
| import re | |
| from datetime import datetime, timezone | |
| from pathlib import Path | |
| from typing import Optional, Tuple | |
| from huggingface_hub import HfApi, hf_hub_download | |
| UPSTREAM_SPACE = "lmarena-ai/lmarena-leaderboard" # source Space | |
| LOCAL_DIR = Path("data/lmarena") | |
| LOCAL_CSV = LOCAL_DIR / "leaderboard_table_latest.csv" | |
| LOCAL_META = LOCAL_DIR / "sync_meta.json" | |
| LEADERBOARD_RE = re.compile(r"^leaderboard_table_(\d{8})\.csv$") | |
| def _pick_latest_leaderboard_file(files: list[str]) -> Optional[Tuple[str, str]]: | |
| """ | |
| Returns (filename, yyyymmdd) for the newest leaderboard_table_YYYYMMDD.csv found. | |
| """ | |
| candidates: list[Tuple[str, str]] = [] | |
| for f in files: | |
| m = LEADERBOARD_RE.match(f) | |
| if m: | |
| candidates.append((f, m.group(1))) | |
| if not candidates: | |
| return None | |
| # Sort by date string; YYYYMMDD sorts lexicographically correctly | |
| candidates.sort(key=lambda x: x[1]) | |
| return candidates[-1] | |
| def main() -> int: | |
| token = os.getenv("HF_TOKEN") # optional for public, but recommended for rate limits | |
| api = HfApi(token=token) | |
| # list files from the upstream *space repo* | |
| files = api.list_repo_files(repo_id=UPSTREAM_SPACE, repo_type="space") | |
| latest = _pick_latest_leaderboard_file(files) | |
| if not latest: | |
| raise RuntimeError( | |
| f"No leaderboard_table_YYYYMMDD.csv found in upstream Space: {UPSTREAM_SPACE}" | |
| ) | |
| filename, yyyymmdd = latest | |
| # Download the raw file to a temp location (hub cache) then copy to our repo path | |
| downloaded_path = hf_hub_download( | |
| repo_id=UPSTREAM_SPACE, | |
| repo_type="space", | |
| filename=filename, | |
| token=token, | |
| ) | |
| LOCAL_DIR.mkdir(parents=True, exist_ok=True) | |
| # Copy file contents to our tracked path | |
| Path(downloaded_path).replace(LOCAL_CSV) if False else LOCAL_CSV.write_bytes(Path(downloaded_path).read_bytes()) | |
| meta = { | |
| "source_space": UPSTREAM_SPACE, | |
| "source_filename": filename, | |
| "source_date": yyyymmdd, | |
| "synced_at_utc": datetime.now(timezone.utc).isoformat(), | |
| } | |
| LOCAL_META.write_text(json.dumps(meta, indent=2), encoding="utf-8") | |
| print(f"[OK] Synced {filename} -> {LOCAL_CSV}") | |
| print(json.dumps(meta, indent=2)) | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |