Michael Rabinovich commited on
Commit
c4e21b3
·
1 Parent(s): 6e3ab50

leaderboard: format submitted_at as `YYYY-MM-DD HH:MM UTC`; lock tables read-only

Browse files

Table cells were editable (gradio_leaderboard inherits gr.Dataframe's
interactive default); add interactive=False to both Validated and
Unvalidated widgets so cell contents aren't accidentally typed over.

submitted_at column rendered as raw ISO-8601 ("2026-05-28T07:13:16Z");
apply the same _fmt_timestamp helper the detail panel already uses so
the table reads as "2026-05-28 07:13 UTC". Helper moved into
leaderboard.py (formatter module) and re-imported by app.py.

Files changed (3) hide show
  1. app.py +3 -19
  2. leaderboard.py +24 -0
  3. tests/test_leaderboard.py +22 -1
app.py CHANGED
@@ -13,7 +13,6 @@ from __future__ import annotations
13
  import html
14
  import logging
15
  import os
16
- import re
17
  from functools import lru_cache
18
  from pathlib import Path
19
 
@@ -31,6 +30,7 @@ from leaderboard import (
31
  LEADERBOARD_DATATYPES,
32
  LEADERBOARD_HIDE_COLUMNS,
33
  VALIDATED_LEADERBOARD_DATATYPES,
 
34
  build_combined_csv,
35
  load_leaderboard_split,
36
  )
@@ -92,8 +92,6 @@ Full policy: [`docs/benchmark/validation.md`]({VALIDATION_DOC_URL})."""
92
 
93
  DETAIL_PLACEHOLDER = "_Click a row above for details._"
94
 
95
- _ISO_TS_RE = re.compile(r"^(\d{4}-\d{2}-\d{2})T(\d{2}:\d{2}):\d{2}Z$")
96
-
97
 
98
  def _has(value) -> bool:
99
  """True for values that should show up in the detail panel."""
@@ -104,22 +102,6 @@ def _has(value) -> bool:
104
  return str(value).strip() != ""
105
 
106
 
107
- def _fmt_timestamp(ts) -> str:
108
- """Render an ISO-8601 ``submitted_at`` as ``YYYY-MM-DD HH:MM UTC``.
109
-
110
- The schema writes timestamps as ``YYYY-MM-DDTHH:MM:SSZ``; the
111
- minute-level UTC form is plenty for the detail panel, drops the
112
- ``T``/``Z`` punctuation, and renders timezone explicitly so a
113
- reader doesn't have to know that "Z" means UTC.
114
- """
115
- if not _has(ts):
116
- return ""
117
- m = _ISO_TS_RE.match(str(ts))
118
- if m:
119
- return f"{m.group(1)} {m.group(2)} UTC"
120
- return str(ts)
121
-
122
-
123
  def _build_report_iframe(html_bytes: bytes) -> str:
124
  """Wrap a fetched report's HTML bytes into a self-contained iframe.
125
 
@@ -301,6 +283,7 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
301
  search_columns=["submission_name", "submitter_name"],
302
  hide_columns=LEADERBOARD_HIDE_COLUMNS,
303
  label="Validated Leaderboard",
 
304
  )
305
  unvalidated_view = Leaderboard(
306
  value=initial_unvalidated,
@@ -308,6 +291,7 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
308
  search_columns=["submission_name", "submitter_name"],
309
  hide_columns=LEADERBOARD_HIDE_COLUMNS,
310
  label="Unvalidated Leaderboard",
 
311
  )
312
  with gr.Row():
313
  refresh_btn = gr.Button("Refresh", size="sm")
 
13
  import html
14
  import logging
15
  import os
 
16
  from functools import lru_cache
17
  from pathlib import Path
18
 
 
30
  LEADERBOARD_DATATYPES,
31
  LEADERBOARD_HIDE_COLUMNS,
32
  VALIDATED_LEADERBOARD_DATATYPES,
33
+ _fmt_timestamp,
34
  build_combined_csv,
35
  load_leaderboard_split,
36
  )
 
92
 
93
  DETAIL_PLACEHOLDER = "_Click a row above for details._"
94
 
 
 
95
 
96
  def _has(value) -> bool:
97
  """True for values that should show up in the detail panel."""
 
102
  return str(value).strip() != ""
103
 
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  def _build_report_iframe(html_bytes: bytes) -> str:
106
  """Wrap a fetched report's HTML bytes into a self-contained iframe.
107
 
 
283
  search_columns=["submission_name", "submitter_name"],
284
  hide_columns=LEADERBOARD_HIDE_COLUMNS,
285
  label="Validated Leaderboard",
286
+ interactive=False,
287
  )
288
  unvalidated_view = Leaderboard(
289
  value=initial_unvalidated,
 
291
  search_columns=["submission_name", "submitter_name"],
292
  hide_columns=LEADERBOARD_HIDE_COLUMNS,
293
  label="Unvalidated Leaderboard",
294
+ interactive=False,
295
  )
296
  with gr.Row():
297
  refresh_btn = gr.Button("Refresh", size="sm")
leaderboard.py CHANGED
@@ -10,6 +10,7 @@ from __future__ import annotations
10
  import json
11
  import logging
12
  import os
 
13
  import tempfile
14
  import time
15
  import uuid
@@ -111,6 +112,27 @@ VALIDATED_LEADERBOARD_DATATYPES = _datatypes_for(VALIDATED_LEADERBOARD_COLS)
111
  PENDING_CELL_TAG = "⏳ evaluating..."
112
  FAILED_CELL_TAG = "✗ failed"
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
  def _load_rows_from_hub() -> list[dict] | None:
116
  """Pull results.jsonl from the submissions dataset via raw HTTPS.
@@ -369,6 +391,8 @@ def _project_and_format(df: pd.DataFrame, columns: list[str]) -> pd.DataFrame:
369
  out["model details (optional)"] = out["model details (optional)"].apply(
370
  _agent_url_md
371
  )
 
 
372
  return out
373
 
374
 
 
10
  import json
11
  import logging
12
  import os
13
+ import re
14
  import tempfile
15
  import time
16
  import uuid
 
112
  PENDING_CELL_TAG = "⏳ evaluating..."
113
  FAILED_CELL_TAG = "✗ failed"
114
 
115
+ _ISO_TS_RE = re.compile(r"^(\d{4}-\d{2}-\d{2})T(\d{2}:\d{2}):\d{2}Z$")
116
+
117
+
118
+ def _fmt_timestamp(ts) -> str:
119
+ """Render an ISO-8601 ``submitted_at`` as ``YYYY-MM-DD HH:MM UTC``.
120
+
121
+ The schema writes timestamps as ``YYYY-MM-DDTHH:MM:SSZ``; the
122
+ minute-level UTC form is plenty for the table + detail panel,
123
+ drops the ``T``/``Z`` punctuation, and renders the timezone
124
+ explicitly so a reader doesn't have to know that "Z" means UTC.
125
+ """
126
+ if ts is None or (isinstance(ts, float) and pd.isna(ts)):
127
+ return ""
128
+ s = str(ts).strip()
129
+ if not s:
130
+ return ""
131
+ m = _ISO_TS_RE.match(s)
132
+ if m:
133
+ return f"{m.group(1)} {m.group(2)} UTC"
134
+ return s
135
+
136
 
137
  def _load_rows_from_hub() -> list[dict] | None:
138
  """Pull results.jsonl from the submissions dataset via raw HTTPS.
 
391
  out["model details (optional)"] = out["model details (optional)"].apply(
392
  _agent_url_md
393
  )
394
+ if "submitted_at" in out.columns:
395
+ out["submitted_at"] = out["submitted_at"].apply(_fmt_timestamp)
396
  return out
397
 
398
 
tests/test_leaderboard.py CHANGED
@@ -114,7 +114,9 @@ def test_field_passthrough(monkeypatch):
114
  # Use submitter_name to identify Beta (its submission_name is now
115
  # wrapped as a markdown link).
116
  beta = unvalidated[unvalidated["submitter_name"] == "team-beta"].iloc[0]
117
- assert beta["submitted_at"] == "2026-05-02T10:00:00Z"
 
 
118
  assert beta["cadgenbench_version"] == "0.1.0"
119
 
120
  assert list(validated.columns) == leaderboard.VALIDATED_LEADERBOARD_COLS
@@ -239,6 +241,25 @@ def test_build_combined_csv_orders_validated_first(monkeypatch):
239
  assert s == "validated", f"unexpected status before unvalidated tier: {s!r}"
240
 
241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  def test_datatypes_align_with_columns():
243
  """Per-column datatype lists track the column-list lengths.
244
 
 
114
  # Use submitter_name to identify Beta (its submission_name is now
115
  # wrapped as a markdown link).
116
  beta = unvalidated[unvalidated["submitter_name"] == "team-beta"].iloc[0]
117
+ # submitted_at is rendered as `YYYY-MM-DD HH:MM UTC` by
118
+ # _fmt_timestamp; the underlying ISO-8601 string is the input.
119
+ assert beta["submitted_at"] == "2026-05-02 10:00 UTC"
120
  assert beta["cadgenbench_version"] == "0.1.0"
121
 
122
  assert list(validated.columns) == leaderboard.VALIDATED_LEADERBOARD_COLS
 
241
  assert s == "validated", f"unexpected status before unvalidated tier: {s!r}"
242
 
243
 
244
+ def test_fmt_timestamp_formats_iso_and_passes_through_garbage():
245
+ """ISO ``YYYY-MM-DDTHH:MM:SSZ`` -> ``YYYY-MM-DD HH:MM UTC``; garbage stays.
246
+
247
+ Empty / None / NaN render as the empty string (the cell is
248
+ rendered blank rather than as a literal placeholder).
249
+ """
250
+ import math
251
+
252
+ assert leaderboard._fmt_timestamp("2026-05-28T07:13:16Z") == "2026-05-28 07:13 UTC"
253
+ assert leaderboard._fmt_timestamp(None) == ""
254
+ assert leaderboard._fmt_timestamp("") == ""
255
+ assert leaderboard._fmt_timestamp(" ") == ""
256
+ assert leaderboard._fmt_timestamp(float("nan")) == ""
257
+ # Anything that doesn't match the canonical shape passes through
258
+ # unchanged (e.g., a manually-edited cell or a legacy timestamp
259
+ # format) so the visible cell is at least not blank-replaced.
260
+ assert leaderboard._fmt_timestamp("not-a-timestamp") == "not-a-timestamp"
261
+
262
+
263
  def test_datatypes_align_with_columns():
264
  """Per-column datatype lists track the column-list lengths.
265