Michael Rabinovich commited on
Commit ·
c4e21b3
1
Parent(s): 6e3ab50
leaderboard: format submitted_at as `YYYY-MM-DD HH:MM UTC`; lock tables read-only
Browse filesTable cells were editable (gradio_leaderboard inherits gr.Dataframe's
interactive default); add interactive=False to both Validated and
Unvalidated widgets so cell contents aren't accidentally typed over.
submitted_at column rendered as raw ISO-8601 ("2026-05-28T07:13:16Z");
apply the same _fmt_timestamp helper the detail panel already uses so
the table reads as "2026-05-28 07:13 UTC". Helper moved into
leaderboard.py (formatter module) and re-imported by app.py.
- app.py +3 -19
- leaderboard.py +24 -0
- tests/test_leaderboard.py +22 -1
app.py
CHANGED
|
@@ -13,7 +13,6 @@ from __future__ import annotations
|
|
| 13 |
import html
|
| 14 |
import logging
|
| 15 |
import os
|
| 16 |
-
import re
|
| 17 |
from functools import lru_cache
|
| 18 |
from pathlib import Path
|
| 19 |
|
|
@@ -31,6 +30,7 @@ from leaderboard import (
|
|
| 31 |
LEADERBOARD_DATATYPES,
|
| 32 |
LEADERBOARD_HIDE_COLUMNS,
|
| 33 |
VALIDATED_LEADERBOARD_DATATYPES,
|
|
|
|
| 34 |
build_combined_csv,
|
| 35 |
load_leaderboard_split,
|
| 36 |
)
|
|
@@ -92,8 +92,6 @@ Full policy: [`docs/benchmark/validation.md`]({VALIDATION_DOC_URL})."""
|
|
| 92 |
|
| 93 |
DETAIL_PLACEHOLDER = "_Click a row above for details._"
|
| 94 |
|
| 95 |
-
_ISO_TS_RE = re.compile(r"^(\d{4}-\d{2}-\d{2})T(\d{2}:\d{2}):\d{2}Z$")
|
| 96 |
-
|
| 97 |
|
| 98 |
def _has(value) -> bool:
|
| 99 |
"""True for values that should show up in the detail panel."""
|
|
@@ -104,22 +102,6 @@ def _has(value) -> bool:
|
|
| 104 |
return str(value).strip() != ""
|
| 105 |
|
| 106 |
|
| 107 |
-
def _fmt_timestamp(ts) -> str:
|
| 108 |
-
"""Render an ISO-8601 ``submitted_at`` as ``YYYY-MM-DD HH:MM UTC``.
|
| 109 |
-
|
| 110 |
-
The schema writes timestamps as ``YYYY-MM-DDTHH:MM:SSZ``; the
|
| 111 |
-
minute-level UTC form is plenty for the detail panel, drops the
|
| 112 |
-
``T``/``Z`` punctuation, and renders timezone explicitly so a
|
| 113 |
-
reader doesn't have to know that "Z" means UTC.
|
| 114 |
-
"""
|
| 115 |
-
if not _has(ts):
|
| 116 |
-
return ""
|
| 117 |
-
m = _ISO_TS_RE.match(str(ts))
|
| 118 |
-
if m:
|
| 119 |
-
return f"{m.group(1)} {m.group(2)} UTC"
|
| 120 |
-
return str(ts)
|
| 121 |
-
|
| 122 |
-
|
| 123 |
def _build_report_iframe(html_bytes: bytes) -> str:
|
| 124 |
"""Wrap a fetched report's HTML bytes into a self-contained iframe.
|
| 125 |
|
|
@@ -301,6 +283,7 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
|
|
| 301 |
search_columns=["submission_name", "submitter_name"],
|
| 302 |
hide_columns=LEADERBOARD_HIDE_COLUMNS,
|
| 303 |
label="Validated Leaderboard",
|
|
|
|
| 304 |
)
|
| 305 |
unvalidated_view = Leaderboard(
|
| 306 |
value=initial_unvalidated,
|
|
@@ -308,6 +291,7 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
|
|
| 308 |
search_columns=["submission_name", "submitter_name"],
|
| 309 |
hide_columns=LEADERBOARD_HIDE_COLUMNS,
|
| 310 |
label="Unvalidated Leaderboard",
|
|
|
|
| 311 |
)
|
| 312 |
with gr.Row():
|
| 313 |
refresh_btn = gr.Button("Refresh", size="sm")
|
|
|
|
| 13 |
import html
|
| 14 |
import logging
|
| 15 |
import os
|
|
|
|
| 16 |
from functools import lru_cache
|
| 17 |
from pathlib import Path
|
| 18 |
|
|
|
|
| 30 |
LEADERBOARD_DATATYPES,
|
| 31 |
LEADERBOARD_HIDE_COLUMNS,
|
| 32 |
VALIDATED_LEADERBOARD_DATATYPES,
|
| 33 |
+
_fmt_timestamp,
|
| 34 |
build_combined_csv,
|
| 35 |
load_leaderboard_split,
|
| 36 |
)
|
|
|
|
| 92 |
|
| 93 |
DETAIL_PLACEHOLDER = "_Click a row above for details._"
|
| 94 |
|
|
|
|
|
|
|
| 95 |
|
| 96 |
def _has(value) -> bool:
|
| 97 |
"""True for values that should show up in the detail panel."""
|
|
|
|
| 102 |
return str(value).strip() != ""
|
| 103 |
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
def _build_report_iframe(html_bytes: bytes) -> str:
|
| 106 |
"""Wrap a fetched report's HTML bytes into a self-contained iframe.
|
| 107 |
|
|
|
|
| 283 |
search_columns=["submission_name", "submitter_name"],
|
| 284 |
hide_columns=LEADERBOARD_HIDE_COLUMNS,
|
| 285 |
label="Validated Leaderboard",
|
| 286 |
+
interactive=False,
|
| 287 |
)
|
| 288 |
unvalidated_view = Leaderboard(
|
| 289 |
value=initial_unvalidated,
|
|
|
|
| 291 |
search_columns=["submission_name", "submitter_name"],
|
| 292 |
hide_columns=LEADERBOARD_HIDE_COLUMNS,
|
| 293 |
label="Unvalidated Leaderboard",
|
| 294 |
+
interactive=False,
|
| 295 |
)
|
| 296 |
with gr.Row():
|
| 297 |
refresh_btn = gr.Button("Refresh", size="sm")
|
leaderboard.py
CHANGED
|
@@ -10,6 +10,7 @@ from __future__ import annotations
|
|
| 10 |
import json
|
| 11 |
import logging
|
| 12 |
import os
|
|
|
|
| 13 |
import tempfile
|
| 14 |
import time
|
| 15 |
import uuid
|
|
@@ -111,6 +112,27 @@ VALIDATED_LEADERBOARD_DATATYPES = _datatypes_for(VALIDATED_LEADERBOARD_COLS)
|
|
| 111 |
PENDING_CELL_TAG = "⏳ evaluating..."
|
| 112 |
FAILED_CELL_TAG = "✗ failed"
|
| 113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
def _load_rows_from_hub() -> list[dict] | None:
|
| 116 |
"""Pull results.jsonl from the submissions dataset via raw HTTPS.
|
|
@@ -369,6 +391,8 @@ def _project_and_format(df: pd.DataFrame, columns: list[str]) -> pd.DataFrame:
|
|
| 369 |
out["model details (optional)"] = out["model details (optional)"].apply(
|
| 370 |
_agent_url_md
|
| 371 |
)
|
|
|
|
|
|
|
| 372 |
return out
|
| 373 |
|
| 374 |
|
|
|
|
| 10 |
import json
|
| 11 |
import logging
|
| 12 |
import os
|
| 13 |
+
import re
|
| 14 |
import tempfile
|
| 15 |
import time
|
| 16 |
import uuid
|
|
|
|
| 112 |
PENDING_CELL_TAG = "⏳ evaluating..."
|
| 113 |
FAILED_CELL_TAG = "✗ failed"
|
| 114 |
|
| 115 |
+
_ISO_TS_RE = re.compile(r"^(\d{4}-\d{2}-\d{2})T(\d{2}:\d{2}):\d{2}Z$")
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def _fmt_timestamp(ts) -> str:
|
| 119 |
+
"""Render an ISO-8601 ``submitted_at`` as ``YYYY-MM-DD HH:MM UTC``.
|
| 120 |
+
|
| 121 |
+
The schema writes timestamps as ``YYYY-MM-DDTHH:MM:SSZ``; the
|
| 122 |
+
minute-level UTC form is plenty for the table + detail panel,
|
| 123 |
+
drops the ``T``/``Z`` punctuation, and renders the timezone
|
| 124 |
+
explicitly so a reader doesn't have to know that "Z" means UTC.
|
| 125 |
+
"""
|
| 126 |
+
if ts is None or (isinstance(ts, float) and pd.isna(ts)):
|
| 127 |
+
return ""
|
| 128 |
+
s = str(ts).strip()
|
| 129 |
+
if not s:
|
| 130 |
+
return ""
|
| 131 |
+
m = _ISO_TS_RE.match(s)
|
| 132 |
+
if m:
|
| 133 |
+
return f"{m.group(1)} {m.group(2)} UTC"
|
| 134 |
+
return s
|
| 135 |
+
|
| 136 |
|
| 137 |
def _load_rows_from_hub() -> list[dict] | None:
|
| 138 |
"""Pull results.jsonl from the submissions dataset via raw HTTPS.
|
|
|
|
| 391 |
out["model details (optional)"] = out["model details (optional)"].apply(
|
| 392 |
_agent_url_md
|
| 393 |
)
|
| 394 |
+
if "submitted_at" in out.columns:
|
| 395 |
+
out["submitted_at"] = out["submitted_at"].apply(_fmt_timestamp)
|
| 396 |
return out
|
| 397 |
|
| 398 |
|
tests/test_leaderboard.py
CHANGED
|
@@ -114,7 +114,9 @@ def test_field_passthrough(monkeypatch):
|
|
| 114 |
# Use submitter_name to identify Beta (its submission_name is now
|
| 115 |
# wrapped as a markdown link).
|
| 116 |
beta = unvalidated[unvalidated["submitter_name"] == "team-beta"].iloc[0]
|
| 117 |
-
|
|
|
|
|
|
|
| 118 |
assert beta["cadgenbench_version"] == "0.1.0"
|
| 119 |
|
| 120 |
assert list(validated.columns) == leaderboard.VALIDATED_LEADERBOARD_COLS
|
|
@@ -239,6 +241,25 @@ def test_build_combined_csv_orders_validated_first(monkeypatch):
|
|
| 239 |
assert s == "validated", f"unexpected status before unvalidated tier: {s!r}"
|
| 240 |
|
| 241 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
def test_datatypes_align_with_columns():
|
| 243 |
"""Per-column datatype lists track the column-list lengths.
|
| 244 |
|
|
|
|
| 114 |
# Use submitter_name to identify Beta (its submission_name is now
|
| 115 |
# wrapped as a markdown link).
|
| 116 |
beta = unvalidated[unvalidated["submitter_name"] == "team-beta"].iloc[0]
|
| 117 |
+
# submitted_at is rendered as `YYYY-MM-DD HH:MM UTC` by
|
| 118 |
+
# _fmt_timestamp; the underlying ISO-8601 string is the input.
|
| 119 |
+
assert beta["submitted_at"] == "2026-05-02 10:00 UTC"
|
| 120 |
assert beta["cadgenbench_version"] == "0.1.0"
|
| 121 |
|
| 122 |
assert list(validated.columns) == leaderboard.VALIDATED_LEADERBOARD_COLS
|
|
|
|
| 241 |
assert s == "validated", f"unexpected status before unvalidated tier: {s!r}"
|
| 242 |
|
| 243 |
|
| 244 |
+
def test_fmt_timestamp_formats_iso_and_passes_through_garbage():
|
| 245 |
+
"""ISO ``YYYY-MM-DDTHH:MM:SSZ`` -> ``YYYY-MM-DD HH:MM UTC``; garbage stays.
|
| 246 |
+
|
| 247 |
+
Empty / None / NaN render as the empty string (the cell is
|
| 248 |
+
rendered blank rather than as a literal placeholder).
|
| 249 |
+
"""
|
| 250 |
+
import math
|
| 251 |
+
|
| 252 |
+
assert leaderboard._fmt_timestamp("2026-05-28T07:13:16Z") == "2026-05-28 07:13 UTC"
|
| 253 |
+
assert leaderboard._fmt_timestamp(None) == ""
|
| 254 |
+
assert leaderboard._fmt_timestamp("") == ""
|
| 255 |
+
assert leaderboard._fmt_timestamp(" ") == ""
|
| 256 |
+
assert leaderboard._fmt_timestamp(float("nan")) == ""
|
| 257 |
+
# Anything that doesn't match the canonical shape passes through
|
| 258 |
+
# unchanged (e.g., a manually-edited cell or a legacy timestamp
|
| 259 |
+
# format) so the visible cell is at least not blank-replaced.
|
| 260 |
+
assert leaderboard._fmt_timestamp("not-a-timestamp") == "not-a-timestamp"
|
| 261 |
+
|
| 262 |
+
|
| 263 |
def test_datatypes_align_with_columns():
|
| 264 |
"""Per-column datatype lists track the column-list lengths.
|
| 265 |
|