benchmarks

Running

App Files Files Community

elevow commited on Mar 27

Commit

5b123b0

verified ·

1 Parent(s): 0918fd5

Update update_data.py

Browse files

Files changed (1) hide show

update_data.py +49 -20

update_data.py CHANGED Viewed

@@ -14,8 +14,11 @@ https://huggingface.co/spaces/elevow/benchmarks
 **Single file:** All Aligned race branding, axis relabeling, optional org-groq tagging, and
 offline ``patch_output_dict`` live here (no separate inject script).
-Populate ``MODEL_IDS_ALIGNED_AXIS_LABEL`` with full HF ``model_id`` strings (as leaderboards
-return them) to show **Aligned AI — {lane} · …** on race bar labels via rewritten ``short_name``.
 Run locally (from repo root or this folder):
     export HF_TOKEN=hf_...
@@ -27,6 +30,9 @@ Schedule on HF Jobs (example — point to YOUR raw file):
     hf jobs scheduled uv run "0 8,20 * * *" \\
         --secrets HF_TOKEN \\
         https://huggingface.co/spaces/elevow/benchmarks/resolve/main/update_data.py
 """
 from __future__ import annotations
@@ -53,26 +59,23 @@ ALIGNED_LOGO_URL = (
 ALIGNED_LOGOS_KEY = "AlignedAI"
 ALIGNED_COLOR = "#059669"
-# Full HF model_id strings from leaderboard APIs — add any row that should show Aligned branding.
-MODEL_IDS_USE_ALIGNED_LOGO: frozenset[str] = frozenset(
     {
-        # Populate from live leaderboard responses, e.g.:
-        # "Qwen/Qwen2.5-Coder-32B-Instruct",
-    }
-)
-# HF benchmark-race charts label bars with `short_name`. For models you treat as Groq-hosted
-# Aligned references, rewrite that field to "Aligned AI — {lane} · {checkpoint}" (same lanes as
-# client GMCQ charts). Stock Space UI ignores `race_logo_key` unless you fork index.html; it
-# always uses `short_name` for the bar text.
-MODEL_IDS_ALIGNED_AXIS_LABEL: frozenset[str] = frozenset(
-    {
-        # Same strings as leaderboards return, e.g.:
         # "meta-llama/Llama-3.3-70B-Instruct",
         # "meta-llama/Llama-4-Scout-17B-16E-Instruct",
     }
 )
 # If True, tag every row whose HF org is literally "groq" with race_logo_key (rare on leaderboards).
 USE_ALIGNED_FOR_ORG_GROQ = False
@@ -154,13 +157,12 @@ def inject_aligned_race_branding(
         for m in bm.get("models") or []:
             mid = m.get("model_id") or ""
             provider = mid.split("/")[0] if "/" in mid else mid
-            use_logo = mid in MODEL_IDS_USE_ALIGNED_LOGO
-            use_axis = mid in MODEL_IDS_ALIGNED_AXIS_LABEL
             use_groq_org = USE_ALIGNED_FOR_ORG_GROQ and provider.lower() == "groq"
-            if use_logo or use_axis or use_groq_org:
                 m["race_logo_key"] = ALIGNED_LOGOS_KEY
                 logo_n += 1
-            if use_axis:
                 orig_sn = m.get("short_name") or (mid.split("/")[-1] if "/" in mid else mid)
                 m["chart_full_name"] = f"Published HF model: {orig_sn.replace('-', ' ')}"
                 m["short_name"] = aligned_axis_label_from_model_id(mid)
@@ -169,6 +171,25 @@ def inject_aligned_race_branding(
     return logo_n, axis_n
 def patch_output_dict(output: dict[str, Any]) -> dict[str, Any]:
     """Deep-copy a loaded data.json dict, apply Aligned branding in place, return the copy."""
     out = json.loads(json.dumps(output))
@@ -281,6 +302,12 @@ def main() -> None:
             all_model_ids.update(r["model_id"] for r in rows)
     print(f"\n{len(all_model_ids)} unique models across {len(all_scores)} benchmarks")
     print("Fetching model dates...")
     model_dates = fetch_model_dates(list(all_model_ids), hf_token)
     print(f"  got dates for {len(model_dates)}/{len(all_model_ids)} models")
@@ -352,6 +379,8 @@ def main() -> None:
     finally:
         Path(tmp_path).unlink(missing_ok=True)
 if __name__ == "__main__":
     main()

 **Single file:** All Aligned race branding, axis relabeling, optional org-groq tagging, and
 offline ``patch_output_dict`` live here (no separate inject script).
+1. Add HF ``model_id`` strings to ``MODEL_IDS_ALIGNED_ON_RACE`` (exact strings — use
+   ``DUMP_MODEL_IDS=1`` once to list them). That rewrites ``short_name`` and sets ``race_logo_key``.
+2. **Upload the forked** ``scripts/elevow-benchmarks/index.html`` **to your Space** (same folder as
+   ``data.json``). Upstream benchmark-race ignores ``race_logo_key``; without this file you will
+   not see the Aligned logo or Aligned bar color.
 Run locally (from repo root or this folder):
     export HF_TOKEN=hf_...
     hf jobs scheduled uv run "0 8,20 * * *" \\
         --secrets HF_TOKEN \\
         https://huggingface.co/spaces/elevow/benchmarks/resolve/main/update_data.py
+Upload the forked UI in the same commit as data (one shot):
+    UPLOAD_INDEX_HTML=1 uv run scripts/elevow-benchmarks/update_data.py
 """
 from __future__ import annotations
 ALIGNED_LOGOS_KEY = "AlignedAI"
 ALIGNED_COLOR = "#059669"
+# Preferred: one list for both **Aligned bar label** + **race_logo_key** + Aligned bar color.
+# Run with DUMP_MODEL_IDS=1 once to print every model_id the script saw (copy exact strings).
+MODEL_IDS_ALIGNED_ON_RACE: frozenset[str] = frozenset(
     {
         # "meta-llama/Llama-3.3-70B-Instruct",
         # "meta-llama/Llama-4-Scout-17B-16E-Instruct",
     }
 )
+# Legacy: unioned with MODEL_IDS_ALIGNED_ON_RACE (you can use any of these three sets).
+MODEL_IDS_USE_ALIGNED_LOGO: frozenset[str] = frozenset()
+MODEL_IDS_ALIGNED_AXIS_LABEL: frozenset[str] = frozenset()
+def _all_branded_model_ids() -> frozenset[str]:
+    return MODEL_IDS_ALIGNED_ON_RACE | MODEL_IDS_USE_ALIGNED_LOGO | MODEL_IDS_ALIGNED_AXIS_LABEL
 # If True, tag every row whose HF org is literally "groq" with race_logo_key (rare on leaderboards).
 USE_ALIGNED_FOR_ORG_GROQ = False
         for m in bm.get("models") or []:
             mid = m.get("model_id") or ""
             provider = mid.split("/")[0] if "/" in mid else mid
+            branded = mid in _all_branded_model_ids()
             use_groq_org = USE_ALIGNED_FOR_ORG_GROQ and provider.lower() == "groq"
+            if branded or use_groq_org:
                 m["race_logo_key"] = ALIGNED_LOGOS_KEY
                 logo_n += 1
+            if branded:
                 orig_sn = m.get("short_name") or (mid.split("/")[-1] if "/" in mid else mid)
                 m["chart_full_name"] = f"Published HF model: {orig_sn.replace('-', ' ')}"
                 m["short_name"] = aligned_axis_label_from_model_id(mid)
     return logo_n, axis_n
+def _upload_index_html_fork(api: HfApi) -> None:
+    """Stock benchmark-race ignores race_logo_key; upload sibling index.html when asked."""
+    flag = os.environ.get("UPLOAD_INDEX_HTML", "").lower()
+    if flag not in ("1", "true", "yes"):
+        return
+    index_path = Path(__file__).resolve().parent / "index.html"
+    if not index_path.is_file():
+        print("UPLOAD_INDEX_HTML set but scripts/elevow-benchmarks/index.html is missing.")
+        return
+    api.upload_file(
+        path_or_fileobj=str(index_path),
+        path_in_repo="index.html",
+        repo_id=SPACE_REPO,
+        repo_type="space",
+        commit_message=f"Update index.html Aligned fork ({datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')})",
+    )
+    print(f"Uploaded index.html → {SPACE_REPO}")
 def patch_output_dict(output: dict[str, Any]) -> dict[str, Any]:
     """Deep-copy a loaded data.json dict, apply Aligned branding in place, return the copy."""
     out = json.loads(json.dumps(output))
             all_model_ids.update(r["model_id"] for r in rows)
     print(f"\n{len(all_model_ids)} unique models across {len(all_scores)} benchmarks")
+    if os.environ.get("DUMP_MODEL_IDS"):
+        print("\n-- DUMP_MODEL_IDS (copy into MODEL_IDS_ALIGNED_ON_RACE) --")
+        for mid in sorted(all_model_ids):
+            print(mid)
+        print("-- end --\n")
     print("Fetching model dates...")
     model_dates = fetch_model_dates(list(all_model_ids), hf_token)
     print(f"  got dates for {len(model_dates)}/{len(all_model_ids)} models")
     finally:
         Path(tmp_path).unlink(missing_ok=True)
+    _upload_index_html_fork(api)
 if __name__ == "__main__":
     main()