elevow commited on
Commit
1d58c43
·
verified ·
1 Parent(s): 06c9f5d

Update update_data.py

Browse files
Files changed (1) hide show
  1. update_data.py +56 -58
update_data.py CHANGED
@@ -1,4 +1,4 @@
1
- # /// script
2
  # requires-python = ">=3.11"
3
  # dependencies = [
4
  # "httpx",
@@ -6,17 +6,19 @@
6
  # ]
7
  # ///
8
  """
9
- Scheduled job: regenerate data.json and upload to the benchmark-race Space.
10
-
11
- Run locally:
12
- uv run update_data.py
13
-
14
- Schedule on HF Jobs (twice daily):
15
- hf jobs scheduled uv run "0 8,20 * * *" \
16
- --secrets HF_TOKEN \
17
- https://huggingface.co/spaces/davanstrien/benchmark-race/resolve/main/update_data.py
 
 
18
  """
19
-
20
  import json
21
  import os
22
  import re
@@ -24,12 +26,24 @@ import tempfile
24
  from concurrent.futures import ThreadPoolExecutor, as_completed
25
  from datetime import datetime, timezone
26
  from pathlib import Path
27
-
28
  import httpx
29
  from huggingface_hub import HfApi
30
-
31
- SPACE_REPO = "davanstrien/benchmark-race"
32
-
 
 
 
 
 
 
 
 
 
 
 
 
33
  BENCHMARK_CONFIGS = [
34
  {"dataset": "SWE-bench/SWE-bench_Verified", "key": "sweVerified", "name": "SWE-bench Verified", "gated": False},
35
  {"dataset": "ScaleAI/SWE-bench_Pro", "key": "swePro", "name": "SWE-bench Pro", "gated": False},
@@ -42,15 +56,25 @@ BENCHMARK_CONFIGS = [
42
  {"dataset": "harborframework/terminal-bench-2.0", "key": "terminalBench", "name": "Terminal-Bench 2.0", "gated": False},
43
  {"dataset": "FutureMa/EvasionBench", "key": "evasionBench", "name": "EvasionBench", "gated": False},
44
  ]
45
-
46
  PALETTE = [
47
  "#6366f1", "#0d9488", "#d97706", "#e11d48", "#7c3aed",
48
  "#16a34a", "#2563eb", "#ea580c", "#8b5cf6", "#0891b2",
49
  "#c026d3", "#65a30d", "#dc2626", "#0284c7", "#a21caf",
50
  "#059669", "#9333ea", "#ca8a04", "#be185d", "#0369a1",
51
  ]
52
-
53
-
 
 
 
 
 
 
 
 
 
 
 
54
  def fetch_leaderboard(config: dict, hf_token: str | None) -> list[dict]:
55
  url = f"https://huggingface.co/api/datasets/{config['dataset']}/leaderboard"
56
  headers = {}
@@ -59,7 +83,6 @@ def fetch_leaderboard(config: dict, hf_token: str | None) -> list[dict]:
59
  elif config["gated"]:
60
  print(f" {config['name']}: skipped (gated, no token)")
61
  return []
62
-
63
  print(f" {config['name']}: fetching scores...")
64
  try:
65
  resp = httpx.get(url, headers=headers, timeout=30)
@@ -72,8 +95,7 @@ def fetch_leaderboard(config: dict, hf_token: str | None) -> list[dict]:
72
  except Exception as e:
73
  print(f" error: {e}")
74
  return []
75
-
76
- seen = {}
77
  for entry in data:
78
  model_id = entry.get("modelId")
79
  score = entry.get("value")
@@ -81,16 +103,12 @@ def fetch_leaderboard(config: dict, hf_token: str | None) -> list[dict]:
81
  score = float(score)
82
  if model_id not in seen or score > seen[model_id]:
83
  seen[model_id] = score
84
-
85
  print(f" {len(seen)} models")
86
  return [{"model_id": mid, "score": s} for mid, s in seen.items()]
87
-
88
-
89
  def fetch_model_dates(model_ids: list[str], hf_token: str | None) -> dict[str, dict]:
90
  api = HfApi()
91
- results = {}
92
-
93
- def _get_info(mid):
94
  try:
95
  info = api.model_info(mid, token=hf_token)
96
  params_b = None
@@ -103,17 +121,13 @@ def fetch_model_dates(model_ids: list[str], hf_token: str | None) -> dict[str, d
103
  return mid, info.created_at.strftime("%Y-%m-%d"), params_b
104
  except Exception:
105
  return mid, None, None
106
-
107
  with ThreadPoolExecutor(max_workers=8) as pool:
108
  futures = {pool.submit(_get_info, mid): mid for mid in model_ids}
109
  for f in as_completed(futures):
110
  mid, date, params = f.result()
111
  if date:
112
  results[mid] = {"date": date, "parameters_b": params}
113
-
114
  return results
115
-
116
-
117
  def fetch_logo(provider: str) -> str | None:
118
  try:
119
  resp = httpx.get(
@@ -125,10 +139,8 @@ def fetch_logo(provider: str) -> str | None:
125
  except Exception:
126
  pass
127
  return None
128
-
129
-
130
  def fetch_all_logos(providers: set[str]) -> dict[str, str]:
131
- logos = {}
132
  with ThreadPoolExecutor(max_workers=8) as pool:
133
  futures = {pool.submit(fetch_logo, p): p for p in providers}
134
  for f in as_completed(futures):
@@ -137,31 +149,24 @@ def fetch_all_logos(providers: set[str]) -> dict[str, str]:
137
  if url:
138
  logos[p] = url
139
  return logos
140
-
141
-
142
- def main():
143
  hf_token = os.environ.get("HF_TOKEN")
144
- print("Generating data.json for bar chart race\n")
145
-
146
- all_scores: dict[str, list[dict]] = {}
147
  all_model_ids: set[str] = set()
148
-
149
  for config in BENCHMARK_CONFIGS:
150
  rows = fetch_leaderboard(config, hf_token)
151
  if rows:
152
  all_scores[config["key"]] = {"name": config["name"], "rows": rows}
153
  all_model_ids.update(r["model_id"] for r in rows)
154
-
155
  print(f"\n{len(all_model_ids)} unique models across {len(all_scores)} benchmarks")
156
  print("Fetching model dates...")
157
  model_dates = fetch_model_dates(list(all_model_ids), hf_token)
158
  print(f" got dates for {len(model_dates)}/{len(all_model_ids)} models")
159
-
160
  all_providers: set[str] = set()
161
- benchmarks = {}
162
-
163
  for key, info in all_scores.items():
164
- models = []
165
  for row in info["rows"]:
166
  mid = row["model_id"]
167
  if mid not in model_dates:
@@ -178,34 +183,29 @@ def main():
178
  })
179
  if models:
180
  benchmarks[key] = {"name": info["name"], "models": models}
181
-
182
  print(f"\nFetching logos for {len(all_providers)} providers...")
183
  logos = fetch_all_logos(all_providers)
184
  print(f" got {len(logos)} logos")
185
-
186
- color_map = {}
187
  for i, provider in enumerate(sorted(all_providers)):
188
  color_map[provider] = PALETTE[i % len(PALETTE)]
189
-
 
190
  output = {
191
  "benchmarks": benchmarks,
192
  "logos": logos,
193
  "colors": color_map,
194
  "generated_at": datetime.now(timezone.utc).isoformat(),
195
  }
196
-
197
  data_json = json.dumps(output, indent=2)
198
  print(f"\nGenerated {len(data_json) / 1024:.1f} KB")
199
  for key, bm in benchmarks.items():
200
  print(f" {bm['name']}: {len(bm['models'])} models")
201
-
202
- # Upload to Space
203
  print(f"\nUploading data.json to {SPACE_REPO}...")
204
  api = HfApi()
205
- with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
206
  f.write(data_json)
207
  tmp_path = f.name
208
-
209
  try:
210
  api.upload_file(
211
  path_or_fileobj=tmp_path,
@@ -217,7 +217,5 @@ def main():
217
  print("Done!")
218
  finally:
219
  Path(tmp_path).unlink(missing_ok=True)
220
-
221
-
222
  if __name__ == "__main__":
223
- main()
 
1
+ # /// script
2
  # requires-python = ">=3.11"
3
  # dependencies = [
4
  # "httpx",
 
6
  # ]
7
  # ///
8
  """
9
+ Regenerate data.json and upload to the elevow/benchmarks Space.
10
+ Source template: duplicated from davanstrien/benchmark-race
11
+ https://huggingface.co/spaces/elevow/benchmarks
12
+ Run locally (from repo root or this folder):
13
+ export HF_TOKEN=hf_...
14
+ uv run scripts/elevow-benchmarks/update_data.py
15
+ Or copy this file to your Space repo root on Hugging Face and run there.
16
+ Schedule on HF Jobs (example — point to YOUR raw file):
17
+ hf jobs scheduled uv run "0 8,20 * * *" \\
18
+ --secrets HF_TOKEN \\
19
+ https://huggingface.co/spaces/elevow/benchmarks/resolve/main/update_data.py
20
  """
21
+ from __future__ import annotations
22
  import json
23
  import os
24
  import re
 
26
  from concurrent.futures import ThreadPoolExecutor, as_completed
27
  from datetime import datetime, timezone
28
  from pathlib import Path
29
+ from typing import Any
30
  import httpx
31
  from huggingface_hub import HfApi
32
+ # Upload target: your fork (was davanstrien/benchmark-race in upstream).
33
+ SPACE_REPO = os.environ.get("BENCHMARK_SPACE_REPO", "elevow/benchmarks")
34
+ ALIGNED_LOGO_URL = (
35
+ "https://www.google.com/s2/favicons?sz=128&domain_url="
36
+ "https%3A%2F%2Ftryaligned.ai"
37
+ )
38
+ ALIGNED_LOGOS_KEY = "AlignedAI"
39
+ ALIGNED_COLOR = "#059669"
40
+ # Full HF model_id strings from leaderboard APIs — add any row that should show Aligned branding.
41
+ MODEL_IDS_USE_ALIGNED_LOGO: frozenset[str] = frozenset(
42
+ {
43
+ # Populate from live leaderboard responses, e.g.:
44
+ # "Qwen/Qwen2.5-Coder-32B-Instruct",
45
+ }
46
+ )
47
  BENCHMARK_CONFIGS = [
48
  {"dataset": "SWE-bench/SWE-bench_Verified", "key": "sweVerified", "name": "SWE-bench Verified", "gated": False},
49
  {"dataset": "ScaleAI/SWE-bench_Pro", "key": "swePro", "name": "SWE-bench Pro", "gated": False},
 
56
  {"dataset": "harborframework/terminal-bench-2.0", "key": "terminalBench", "name": "Terminal-Bench 2.0", "gated": False},
57
  {"dataset": "FutureMa/EvasionBench", "key": "evasionBench", "name": "EvasionBench", "gated": False},
58
  ]
 
59
  PALETTE = [
60
  "#6366f1", "#0d9488", "#d97706", "#e11d48", "#7c3aed",
61
  "#16a34a", "#2563eb", "#ea580c", "#8b5cf6", "#0891b2",
62
  "#c026d3", "#65a30d", "#dc2626", "#0284c7", "#a21caf",
63
  "#059669", "#9333ea", "#ca8a04", "#be185d", "#0369a1",
64
  ]
65
+ def inject_aligned_race_branding(
66
+ benchmarks: dict[str, Any],
67
+ logos: dict[str, str],
68
+ color_map: dict[str, str],
69
+ ) -> None:
70
+ """Add Aligned logo URL, optional per-model race_logo_key, and bar color."""
71
+ logos[ALIGNED_LOGOS_KEY] = ALIGNED_LOGO_URL
72
+ color_map[ALIGNED_LOGOS_KEY] = ALIGNED_COLOR
73
+ for _key, bm in benchmarks.items():
74
+ for m in bm.get("models") or []:
75
+ mid = m.get("model_id") or ""
76
+ if mid in MODEL_IDS_USE_ALIGNED_LOGO:
77
+ m["race_logo_key"] = ALIGNED_LOGOS_KEY
78
  def fetch_leaderboard(config: dict, hf_token: str | None) -> list[dict]:
79
  url = f"https://huggingface.co/api/datasets/{config['dataset']}/leaderboard"
80
  headers = {}
 
83
  elif config["gated"]:
84
  print(f" {config['name']}: skipped (gated, no token)")
85
  return []
 
86
  print(f" {config['name']}: fetching scores...")
87
  try:
88
  resp = httpx.get(url, headers=headers, timeout=30)
 
95
  except Exception as e:
96
  print(f" error: {e}")
97
  return []
98
+ seen: dict[str, float] = {}
 
99
  for entry in data:
100
  model_id = entry.get("modelId")
101
  score = entry.get("value")
 
103
  score = float(score)
104
  if model_id not in seen or score > seen[model_id]:
105
  seen[model_id] = score
 
106
  print(f" {len(seen)} models")
107
  return [{"model_id": mid, "score": s} for mid, s in seen.items()]
 
 
108
  def fetch_model_dates(model_ids: list[str], hf_token: str | None) -> dict[str, dict]:
109
  api = HfApi()
110
+ results: dict[str, dict] = {}
111
+ def _get_info(mid: str):
 
112
  try:
113
  info = api.model_info(mid, token=hf_token)
114
  params_b = None
 
121
  return mid, info.created_at.strftime("%Y-%m-%d"), params_b
122
  except Exception:
123
  return mid, None, None
 
124
  with ThreadPoolExecutor(max_workers=8) as pool:
125
  futures = {pool.submit(_get_info, mid): mid for mid in model_ids}
126
  for f in as_completed(futures):
127
  mid, date, params = f.result()
128
  if date:
129
  results[mid] = {"date": date, "parameters_b": params}
 
130
  return results
 
 
131
  def fetch_logo(provider: str) -> str | None:
132
  try:
133
  resp = httpx.get(
 
139
  except Exception:
140
  pass
141
  return None
 
 
142
  def fetch_all_logos(providers: set[str]) -> dict[str, str]:
143
+ logos: dict[str, str] = {}
144
  with ThreadPoolExecutor(max_workers=8) as pool:
145
  futures = {pool.submit(fetch_logo, p): p for p in providers}
146
  for f in as_completed(futures):
 
149
  if url:
150
  logos[p] = url
151
  return logos
152
+ def main() -> None:
 
 
153
  hf_token = os.environ.get("HF_TOKEN")
154
+ print(f"Generating data.json upload to {SPACE_REPO}\n")
155
+ all_scores: dict[str, dict] = {}
 
156
  all_model_ids: set[str] = set()
 
157
  for config in BENCHMARK_CONFIGS:
158
  rows = fetch_leaderboard(config, hf_token)
159
  if rows:
160
  all_scores[config["key"]] = {"name": config["name"], "rows": rows}
161
  all_model_ids.update(r["model_id"] for r in rows)
 
162
  print(f"\n{len(all_model_ids)} unique models across {len(all_scores)} benchmarks")
163
  print("Fetching model dates...")
164
  model_dates = fetch_model_dates(list(all_model_ids), hf_token)
165
  print(f" got dates for {len(model_dates)}/{len(all_model_ids)} models")
 
166
  all_providers: set[str] = set()
167
+ benchmarks: dict[str, Any] = {}
 
168
  for key, info in all_scores.items():
169
+ models: list[dict] = []
170
  for row in info["rows"]:
171
  mid = row["model_id"]
172
  if mid not in model_dates:
 
183
  })
184
  if models:
185
  benchmarks[key] = {"name": info["name"], "models": models}
 
186
  print(f"\nFetching logos for {len(all_providers)} providers...")
187
  logos = fetch_all_logos(all_providers)
188
  print(f" got {len(logos)} logos")
189
+ color_map: dict[str, str] = {}
 
190
  for i, provider in enumerate(sorted(all_providers)):
191
  color_map[provider] = PALETTE[i % len(PALETTE)]
192
+ inject_aligned_race_branding(benchmarks, logos, color_map)
193
+ print(f" injected {ALIGNED_LOGOS_KEY} logo + color; race_logo_key on {len(MODEL_IDS_USE_ALIGNED_LOGO)} id(s) configured")
194
  output = {
195
  "benchmarks": benchmarks,
196
  "logos": logos,
197
  "colors": color_map,
198
  "generated_at": datetime.now(timezone.utc).isoformat(),
199
  }
 
200
  data_json = json.dumps(output, indent=2)
201
  print(f"\nGenerated {len(data_json) / 1024:.1f} KB")
202
  for key, bm in benchmarks.items():
203
  print(f" {bm['name']}: {len(bm['models'])} models")
 
 
204
  print(f"\nUploading data.json to {SPACE_REPO}...")
205
  api = HfApi()
206
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False, encoding="utf-8") as f:
207
  f.write(data_json)
208
  tmp_path = f.name
 
209
  try:
210
  api.upload_file(
211
  path_or_fileobj=tmp_path,
 
217
  print("Done!")
218
  finally:
219
  Path(tmp_path).unlink(missing_ok=True)
 
 
220
  if __name__ == "__main__":
221
+ main()