davanstrien HF Staff commited on
Commit
05a4bf2
·
verified ·
1 Parent(s): 7f36893

Add scheduled data update script

Browse files
Files changed (1) hide show
  1. update_data.py +223 -0
update_data.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.11"
3
+ # dependencies = [
4
+ # "httpx",
5
+ # "huggingface_hub",
6
+ # ]
7
+ # ///
8
+ """
9
+ Scheduled job: regenerate data.json and upload to the benchmark-race Space.
10
+
11
+ Run locally:
12
+ uv run update_data.py
13
+
14
+ Schedule on HF Jobs (twice daily):
15
+ hf jobs scheduled uv run "0 8,20 * * *" \
16
+ --secrets HF_TOKEN \
17
+ https://huggingface.co/spaces/davanstrien/benchmark-race/resolve/main/update_data.py
18
+ """
19
+
20
+ import json
21
+ import os
22
+ import re
23
+ import tempfile
24
+ from concurrent.futures import ThreadPoolExecutor, as_completed
25
+ from datetime import datetime, timezone
26
+ from pathlib import Path
27
+
28
+ import httpx
29
+ from huggingface_hub import HfApi
30
+
31
+ SPACE_REPO = "davanstrien/benchmark-race"
32
+
33
+ BENCHMARK_CONFIGS = [
34
+ {"dataset": "SWE-bench/SWE-bench_Verified", "key": "sweVerified", "name": "SWE-bench Verified", "gated": False},
35
+ {"dataset": "ScaleAI/SWE-bench_Pro", "key": "swePro", "name": "SWE-bench Pro", "gated": False},
36
+ {"dataset": "TIGER-Lab/MMLU-Pro", "key": "mmluPro", "name": "MMLU-Pro", "gated": False},
37
+ {"dataset": "Idavidrein/gpqa", "key": "gpqa", "name": "GPQA Diamond", "gated": True},
38
+ {"dataset": "cais/hle", "key": "hle", "name": "HLE", "gated": True},
39
+ {"dataset": "MathArena/aime_2026", "key": "aime2026", "name": "AIME 2026", "gated": False},
40
+ {"dataset": "MathArena/hmmt_feb_2026", "key": "hmmt2026", "name": "HMMT Feb 2026", "gated": False},
41
+ {"dataset": "allenai/olmOCR-bench", "key": "olmOcr", "name": "olmOCR-bench", "gated": False},
42
+ {"dataset": "harborframework/terminal-bench-2.0", "key": "terminalBench", "name": "Terminal-Bench 2.0", "gated": False},
43
+ {"dataset": "FutureMa/EvasionBench", "key": "evasionBench", "name": "EvasionBench", "gated": False},
44
+ ]
45
+
46
+ PALETTE = [
47
+ "#6366f1", "#0d9488", "#d97706", "#e11d48", "#7c3aed",
48
+ "#16a34a", "#2563eb", "#ea580c", "#8b5cf6", "#0891b2",
49
+ "#c026d3", "#65a30d", "#dc2626", "#0284c7", "#a21caf",
50
+ "#059669", "#9333ea", "#ca8a04", "#be185d", "#0369a1",
51
+ ]
52
+
53
+
54
+ def fetch_leaderboard(config: dict, hf_token: str | None) -> list[dict]:
55
+ url = f"https://huggingface.co/api/datasets/{config['dataset']}/leaderboard"
56
+ headers = {}
57
+ if config["gated"] and hf_token:
58
+ headers["Authorization"] = f"Bearer {hf_token}"
59
+ elif config["gated"]:
60
+ print(f" {config['name']}: skipped (gated, no token)")
61
+ return []
62
+
63
+ print(f" {config['name']}: fetching scores...")
64
+ try:
65
+ resp = httpx.get(url, headers=headers, timeout=30)
66
+ if resp.status_code != 200:
67
+ print(f" skip (status {resp.status_code})")
68
+ return []
69
+ data = resp.json()
70
+ if not isinstance(data, list):
71
+ return []
72
+ except Exception as e:
73
+ print(f" error: {e}")
74
+ return []
75
+
76
+ seen = {}
77
+ for entry in data:
78
+ model_id = entry.get("modelId")
79
+ score = entry.get("value")
80
+ if model_id and score is not None:
81
+ score = float(score)
82
+ if model_id not in seen or score > seen[model_id]:
83
+ seen[model_id] = score
84
+
85
+ print(f" {len(seen)} models")
86
+ return [{"model_id": mid, "score": s} for mid, s in seen.items()]
87
+
88
+
89
+ def fetch_model_dates(model_ids: list[str], hf_token: str | None) -> dict[str, dict]:
90
+ api = HfApi()
91
+ results = {}
92
+
93
+ def _get_info(mid):
94
+ try:
95
+ info = api.model_info(mid, token=hf_token)
96
+ params_b = None
97
+ if info.safetensors and hasattr(info.safetensors, "total"):
98
+ params_b = round(info.safetensors.total / 1_000_000_000, 1)
99
+ if params_b is None:
100
+ m = re.findall(r"[-_/](\d+\.?\d*)[Bb](?:[-_/]|$)", mid)
101
+ if m:
102
+ params_b = max(float(x) for x in m)
103
+ return mid, info.created_at.strftime("%Y-%m-%d"), params_b
104
+ except Exception:
105
+ return mid, None, None
106
+
107
+ with ThreadPoolExecutor(max_workers=8) as pool:
108
+ futures = {pool.submit(_get_info, mid): mid for mid in model_ids}
109
+ for f in as_completed(futures):
110
+ mid, date, params = f.result()
111
+ if date:
112
+ results[mid] = {"date": date, "parameters_b": params}
113
+
114
+ return results
115
+
116
+
117
+ def fetch_logo(provider: str) -> str | None:
118
+ try:
119
+ resp = httpx.get(
120
+ f"https://huggingface.co/api/organizations/{provider}/avatar",
121
+ timeout=5,
122
+ )
123
+ if resp.status_code == 200:
124
+ return resp.json().get("avatarUrl")
125
+ except Exception:
126
+ pass
127
+ return None
128
+
129
+
130
+ def fetch_all_logos(providers: set[str]) -> dict[str, str]:
131
+ logos = {}
132
+ with ThreadPoolExecutor(max_workers=8) as pool:
133
+ futures = {pool.submit(fetch_logo, p): p for p in providers}
134
+ for f in as_completed(futures):
135
+ p = futures[f]
136
+ url = f.result()
137
+ if url:
138
+ logos[p] = url
139
+ return logos
140
+
141
+
142
+ def main():
143
+ hf_token = os.environ.get("HF_TOKEN")
144
+ print("Generating data.json for bar chart race\n")
145
+
146
+ all_scores: dict[str, list[dict]] = {}
147
+ all_model_ids: set[str] = set()
148
+
149
+ for config in BENCHMARK_CONFIGS:
150
+ rows = fetch_leaderboard(config, hf_token)
151
+ if rows:
152
+ all_scores[config["key"]] = {"name": config["name"], "rows": rows}
153
+ all_model_ids.update(r["model_id"] for r in rows)
154
+
155
+ print(f"\n{len(all_model_ids)} unique models across {len(all_scores)} benchmarks")
156
+ print("Fetching model dates...")
157
+ model_dates = fetch_model_dates(list(all_model_ids), hf_token)
158
+ print(f" got dates for {len(model_dates)}/{len(all_model_ids)} models")
159
+
160
+ all_providers: set[str] = set()
161
+ benchmarks = {}
162
+
163
+ for key, info in all_scores.items():
164
+ models = []
165
+ for row in info["rows"]:
166
+ mid = row["model_id"]
167
+ if mid not in model_dates:
168
+ continue
169
+ provider = mid.split("/")[0] if "/" in mid else mid
170
+ short_name = mid.split("/")[-1]
171
+ all_providers.add(provider)
172
+ models.append({
173
+ "model_id": mid,
174
+ "short_name": short_name,
175
+ "provider": provider,
176
+ "score": round(row["score"], 2),
177
+ "date": model_dates[mid]["date"],
178
+ })
179
+ if models:
180
+ benchmarks[key] = {"name": info["name"], "models": models}
181
+
182
+ print(f"\nFetching logos for {len(all_providers)} providers...")
183
+ logos = fetch_all_logos(all_providers)
184
+ print(f" got {len(logos)} logos")
185
+
186
+ color_map = {}
187
+ for i, provider in enumerate(sorted(all_providers)):
188
+ color_map[provider] = PALETTE[i % len(PALETTE)]
189
+
190
+ output = {
191
+ "benchmarks": benchmarks,
192
+ "logos": logos,
193
+ "colors": color_map,
194
+ "generated_at": datetime.now(timezone.utc).isoformat(),
195
+ }
196
+
197
+ data_json = json.dumps(output, indent=2)
198
+ print(f"\nGenerated {len(data_json) / 1024:.1f} KB")
199
+ for key, bm in benchmarks.items():
200
+ print(f" {bm['name']}: {len(bm['models'])} models")
201
+
202
+ # Upload to Space
203
+ print(f"\nUploading data.json to {SPACE_REPO}...")
204
+ api = HfApi()
205
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
206
+ f.write(data_json)
207
+ tmp_path = f.name
208
+
209
+ try:
210
+ api.upload_file(
211
+ path_or_fileobj=tmp_path,
212
+ path_in_repo="data.json",
213
+ repo_id=SPACE_REPO,
214
+ repo_type="space",
215
+ commit_message=f"Update data.json ({datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')})",
216
+ )
217
+ print("Done!")
218
+ finally:
219
+ Path(tmp_path).unlink(missing_ok=True)
220
+
221
+
222
+ if __name__ == "__main__":
223
+ main()