Michael Rabinovich commited on
Commit
1a8f331
·
1 Parent(s): 3112173

app+leaderboard: detail panel polish (rename link columns, fix broken report links)

Browse files

Polish follow-up on C6, three fixes after the first detail-panel
commit landed:

1. Rename display columns from snake_case schema names to clean
short headers. `agent_url` -> `agent`, `submission_blob_url` ->
`zip`, `report_url` -> `report`. The rename happens at projection
time via `df.rename`, so the schema in results.jsonl is
unchanged. Headers now read as plain English ("agent", "zip",
"report") rather than a python dict.

2. The `agent` column shows the URL itself (scheme stripped,
truncated past 40 chars) as link text instead of a fixed "code"
label. The schema says agent_url can be code OR paper; the
shortened URL is the only honest hint about what's behind the
click. Missing values render as italic `_None_` rather than a
blank cell, so the optionality is explicit.

3. The `report` link is now gated on the row's `submission_sha256`
being non-null in addition to status == "completed". Legacy seed
rows pre-date the modern submit pipeline (which uploads
reports/<id>.html); the per-row sha256 is the schema's
"modern pipeline" sentinel ("null only on legacy rows that
pre-date this field"). Pre-fix the legacy rows rendered a
/resolve/ link that 404'd, exactly the bug surfaced on the live
Space.

Detail panel (app.py):
- Timestamp formatting: 2026-05-26T12:02:31Z -> 2026-05-26 12:02 UTC.
ISO punctuation isn't useful in a human-readable card; dropping
the T/Z and showing the timezone explicitly is plenty.
- Always shows "Agent: ..." (with `_None_` when missing) so the
optionality is visible in the rendered card.
- Reads the renamed display columns.

Tests:
- _stub_rows() now carries submission_sha256 on rows that should
emit the report link; the legacy stub deliberately leaves it out.
- test_link_columns_render_as_markdown updated for the renamed
columns + shortened-URL agent text + the `_None_` placeholder.
- New test_legacy_row_omits_report_link guards the sentinel gate
so a future refactor that drops it gets caught.

10/10 unit tests green. Live read on the submissions dataset:
legacy baseline rows render `agent` as a shortened github URL,
`zip` as [zip](...), `report` as empty (no submission_sha256 ->
no report file would exist). Modern submission rows pick up a
real [report](...) link to a reports/<id>.html that actually
exists on the dataset.

Files changed (3) hide show
  1. app.py +32 -8
  2. leaderboard.py +74 -27
  3. tests/test_leaderboard.py +49 -19
app.py CHANGED
@@ -6,6 +6,7 @@ Read path lives in :mod:`leaderboard`. Submit-tab validation lives in
6
  from __future__ import annotations
7
 
8
  import logging
 
9
 
10
  import gradio as gr
11
  from gradio_leaderboard import Leaderboard
@@ -50,6 +51,8 @@ correct 3D model.
50
 
51
  DETAIL_PLACEHOLDER = "_Click a row above for details._"
52
 
 
 
53
 
54
  def _has(value) -> bool:
55
  """True for values that should show up in the detail panel."""
@@ -60,6 +63,22 @@ def _has(value) -> bool:
60
  return str(value).strip() != ""
61
 
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  def _format_detail(df: pd.DataFrame | None, evt: gr.SelectData) -> str:
64
  """Build the row-detail markdown for the clicked submission.
65
 
@@ -68,7 +87,10 @@ def _format_detail(df: pd.DataFrame | None, evt: gr.SelectData) -> str:
68
  the visible link cells (already pre-formatted as ``[label](url)``
69
  by ``leaderboard.py``'s ``_project_and_format``).
70
  ``failure_reason`` only shows on ``failed`` rows;
71
- ``report_url`` is only non-empty for ``completed`` rows.
 
 
 
72
  """
73
  if df is None or len(df) == 0 or evt is None or evt.index is None:
74
  return DETAIL_PLACEHOLDER
@@ -84,15 +106,17 @@ def _format_detail(df: pd.DataFrame | None, evt: gr.SelectData) -> str:
84
  if _has(row.get("status")):
85
  lines.append(f"- **Status**: {row['status']}")
86
  if _has(row.get("submitted_at")):
87
- lines.append(f"- **Submitted**: {row['submitted_at']}")
88
  if _has(row.get("notes")):
89
  lines.append(f"- **Notes**: {row['notes']}")
90
- if _has(row.get("agent_url")):
91
- lines.append(f"- **Agent code**: {row['agent_url']}")
92
- if _has(row.get("submission_blob_url")):
93
- lines.append(f"- **Submission**: {row['submission_blob_url']}")
94
- if _has(row.get("report_url")):
95
- lines.append(f"- **Report**: {row['report_url']}")
 
 
96
  if row.get("status") == "failed" and _has(row.get("failure_reason")):
97
  lines.append(f"- **Failure reason**: {row['failure_reason']}")
98
  return "\n".join(lines)
 
6
  from __future__ import annotations
7
 
8
  import logging
9
+ import re
10
 
11
  import gradio as gr
12
  from gradio_leaderboard import Leaderboard
 
51
 
52
  DETAIL_PLACEHOLDER = "_Click a row above for details._"
53
 
54
+ _ISO_TS_RE = re.compile(r"^(\d{4}-\d{2}-\d{2})T(\d{2}:\d{2}):\d{2}Z$")
55
+
56
 
57
  def _has(value) -> bool:
58
  """True for values that should show up in the detail panel."""
 
63
  return str(value).strip() != ""
64
 
65
 
66
+ def _fmt_timestamp(ts) -> str:
67
+ """Render an ISO-8601 ``submitted_at`` as ``YYYY-MM-DD HH:MM UTC``.
68
+
69
+ The schema writes timestamps as ``YYYY-MM-DDTHH:MM:SSZ``; the
70
+ minute-level UTC form is plenty for the detail panel, drops the
71
+ ``T``/``Z`` punctuation, and renders timezone explicitly so a
72
+ reader doesn't have to know that "Z" means UTC.
73
+ """
74
+ if not _has(ts):
75
+ return ""
76
+ m = _ISO_TS_RE.match(str(ts))
77
+ if m:
78
+ return f"{m.group(1)} {m.group(2)} UTC"
79
+ return str(ts)
80
+
81
+
82
  def _format_detail(df: pd.DataFrame | None, evt: gr.SelectData) -> str:
83
  """Build the row-detail markdown for the clicked submission.
84
 
 
87
  the visible link cells (already pre-formatted as ``[label](url)``
88
  by ``leaderboard.py``'s ``_project_and_format``).
89
  ``failure_reason`` only shows on ``failed`` rows;
90
+ ``report_url`` is only non-empty for completed rows from the
91
+ modern submit pipeline (leaderboard.py gates on
92
+ ``submission_sha256`` so legacy rows don't render a broken
93
+ /resolve/ link).
94
  """
95
  if df is None or len(df) == 0 or evt is None or evt.index is None:
96
  return DETAIL_PLACEHOLDER
 
106
  if _has(row.get("status")):
107
  lines.append(f"- **Status**: {row['status']}")
108
  if _has(row.get("submitted_at")):
109
+ lines.append(f"- **Submitted**: {_fmt_timestamp(row['submitted_at'])}")
110
  if _has(row.get("notes")):
111
  lines.append(f"- **Notes**: {row['notes']}")
112
+ # Display columns from leaderboard.py's _project_and_format:
113
+ # `agent` carries the markdown link (or "_None_" when missing);
114
+ # `zip` and `report` are empty strings when not applicable.
115
+ lines.append(f"- **Agent**: {row.get('agent') or '_None_'}")
116
+ if _has(row.get("zip")):
117
+ lines.append(f"- **Submission**: {row['zip']}")
118
+ if _has(row.get("report")):
119
+ lines.append(f"- **Report**: {row['report']}")
120
  if row.get("status") == "failed" and _has(row.get("failure_reason")):
121
  lines.append(f"- **Failure reason**: {row['failure_reason']}")
122
  return "\n".join(lines)
leaderboard.py CHANGED
@@ -33,6 +33,11 @@ HUB_FETCH_TIMEOUT_SECONDS = 30
33
  # by hidden-but-data-present columns the row-click detail panel pulls from.
34
  # Hidden columns ride along in the DataFrame so `Leaderboard.select(...)`
35
  # can read them out without a separate state-cache or re-fetch.
 
 
 
 
 
36
  LEADERBOARD_COLS = [
37
  "status",
38
  "submission_name",
@@ -41,9 +46,9 @@ LEADERBOARD_COLS = [
41
  "validity_rate",
42
  "submitted_at",
43
  "cadgenbench_version",
44
- "agent_url",
45
- "submission_blob_url",
46
- "report_url",
47
  # Detail-panel-only (hidden via `hide_columns` on the widget):
48
  "submission_id",
49
  "notes",
@@ -62,9 +67,9 @@ VALIDATED_LEADERBOARD_COLS = [
62
  "validation_method",
63
  "submitted_at",
64
  "cadgenbench_version",
65
- "agent_url",
66
- "submission_blob_url",
67
- "report_url",
68
  "submission_id",
69
  "notes",
70
  "failure_reason",
@@ -80,7 +85,7 @@ LEADERBOARD_HIDE_COLUMNS = ["submission_id", "notes", "failure_reason"]
80
  # cells get pending / failed status tags applied by _fmt_pct /
81
  # _fmt_score so they're string-shaped by the time the widget sees
82
  # them).
83
- _LINK_COLUMNS = frozenset({"agent_url", "submission_blob_url", "report_url"})
84
 
85
 
86
  def _datatypes_for(columns: list[str]) -> list[str]:
@@ -181,31 +186,61 @@ def _is_empty(v) -> bool:
181
  return False
182
 
183
 
 
 
 
 
 
 
 
 
 
 
 
184
  def _agent_url_md(url) -> str:
185
- """Render an `agent_url` cell as a markdown link (empty string if absent)."""
 
 
 
 
 
 
 
 
186
  if _is_empty(url):
187
- return ""
188
- return f"[code]({url})"
189
 
190
 
191
  def _submission_blob_md(url) -> str:
192
- """Render a `submission_blob_url` cell as a markdown link."""
 
 
 
 
193
  if _is_empty(url):
194
  return ""
195
  return f"[zip]({url})"
196
 
197
 
198
- def _report_url_md(submission_id, status) -> str:
199
- """Build the report URL from `submission_id`, only for completed rows.
200
 
201
- `reports/<id>.html` lives on the submissions dataset alongside the
202
- submission zip. ``/resolve/main/`` (matching the convention used
203
- by the submit handler for ``submission_blob_url``) serves the
204
- file with its content type so the browser renders the HTML
205
- report directly. Pending and failed rows have no report yet.
 
 
 
 
 
206
  """
207
  if status != "completed" or _is_empty(submission_id):
208
  return ""
 
 
209
  return (
210
  f"[report](https://huggingface.co/datasets/{HF_SUBMISSIONS_REPO}"
211
  f"/resolve/main/reports/{submission_id}.html)"
@@ -261,13 +296,25 @@ def _project_and_format(df: pd.DataFrame, columns: list[str]) -> pd.DataFrame:
261
  if df.empty:
262
  return pd.DataFrame(columns=columns)
263
  df = df.copy()
264
- # Derive `report_url` before projection drops `submission_id`.
265
- # Computed (not stored on the row) so a path change doesn't
266
- # require a results.jsonl rewrite.
267
  if "submission_id" in df.columns and "status" in df.columns:
268
- df["report_url"] = df.apply(
269
- lambda r: _report_url_md(r["submission_id"], r["status"]), axis=1,
 
 
 
 
 
270
  )
 
 
 
 
 
 
 
271
  # Make sure every declared column exists (legacy rows can be
272
  # missing optional fields). Detail-panel reads expect the
273
  # column-set to be stable regardless of which source rows had
@@ -288,8 +335,8 @@ def _project_and_format(df: pd.DataFrame, columns: list[str]) -> pd.DataFrame:
288
  out["aggregate_score"] = out.apply(
289
  lambda r: _fmt_score(r["aggregate_score"], r["status"]), axis=1,
290
  )
291
- if "agent_url" in out.columns:
292
- out["agent_url"] = out["agent_url"].apply(_agent_url_md)
293
- if "submission_blob_url" in out.columns:
294
- out["submission_blob_url"] = out["submission_blob_url"].apply(_submission_blob_md)
295
  return out
 
33
  # by hidden-but-data-present columns the row-click detail panel pulls from.
34
  # Hidden columns ride along in the DataFrame so `Leaderboard.select(...)`
35
  # can read them out without a separate state-cache or re-fetch.
36
+ # Display column names (these are what the rendered table headers
37
+ # read). The schema fields `agent_url` and `submission_blob_url` get
38
+ # renamed at projection time to `agent` / `zip` so the rendered
39
+ # headers stay short and uniform; the `report` column is computed
40
+ # fresh from `submission_id`.
41
  LEADERBOARD_COLS = [
42
  "status",
43
  "submission_name",
 
46
  "validity_rate",
47
  "submitted_at",
48
  "cadgenbench_version",
49
+ "agent",
50
+ "zip",
51
+ "report",
52
  # Detail-panel-only (hidden via `hide_columns` on the widget):
53
  "submission_id",
54
  "notes",
 
67
  "validation_method",
68
  "submitted_at",
69
  "cadgenbench_version",
70
+ "agent",
71
+ "zip",
72
+ "report",
73
  "submission_id",
74
  "notes",
75
  "failure_reason",
 
85
  # cells get pending / failed status tags applied by _fmt_pct /
86
  # _fmt_score so they're string-shaped by the time the widget sees
87
  # them).
88
+ _LINK_COLUMNS = frozenset({"agent", "zip", "report"})
89
 
90
 
91
  def _datatypes_for(columns: list[str]) -> list[str]:
 
186
  return False
187
 
188
 
189
+ _AGENT_URL_MAX_LINK_TEXT = 40
190
+
191
+
192
+ def _shorten_url_for_display(url: str) -> str:
193
+ """Strip scheme + trailing slash; truncate to keep the table cell tidy."""
194
+ s = url.replace("https://", "").replace("http://", "").rstrip("/")
195
+ if len(s) > _AGENT_URL_MAX_LINK_TEXT:
196
+ s = s[: _AGENT_URL_MAX_LINK_TEXT - 1] + "…"
197
+ return s
198
+
199
+
200
  def _agent_url_md(url) -> str:
201
+ """Render the `agent` cell as a markdown link.
202
+
203
+ Uses a shortened version of the URL itself as the link text:
204
+ `agent_url` is a free-form "URL pointing at the agent code or
205
+ paper" per the schema, so the URL itself carries the only
206
+ honest hint about what's behind the click. Missing cells render
207
+ as italic `_None_` so a reader sees the field is optional and
208
+ just wasn't filled, rather than a blank.
209
+ """
210
  if _is_empty(url):
211
+ return "_None_"
212
+ return f"[{_shorten_url_for_display(str(url))}]({url})"
213
 
214
 
215
  def _submission_blob_md(url) -> str:
216
+ """Render the `zip` cell as a markdown link.
217
+
218
+ Link text stays "zip" (the URL points at our own infrastructure
219
+ and adds no extra information for the reader).
220
+ """
221
  if _is_empty(url):
222
  return ""
223
  return f"[zip]({url})"
224
 
225
 
226
+ def _report_url_md(submission_id, status, submission_sha256) -> str:
227
+ """Build the report URL, only when the report file is known to exist.
228
 
229
+ `reports/<id>.html` is uploaded by the post-eval worker in the
230
+ modern submit pipeline. Legacy rows that pre-date that pipeline
231
+ (the three baseline seed rows; identifiable by ``submission_sha256``
232
+ being null, per the schema's compatibility note) never had a
233
+ report uploaded, so the link would 404. Gate on
234
+ ``submission_sha256`` to keep the rendered link honest.
235
+
236
+ ``/resolve/main/`` (matching the convention used by the submit
237
+ handler for ``submission_blob_url``) serves the file with its
238
+ content type so the browser renders the HTML report directly.
239
  """
240
  if status != "completed" or _is_empty(submission_id):
241
  return ""
242
+ if _is_empty(submission_sha256):
243
+ return ""
244
  return (
245
  f"[report](https://huggingface.co/datasets/{HF_SUBMISSIONS_REPO}"
246
  f"/resolve/main/reports/{submission_id}.html)"
 
296
  if df.empty:
297
  return pd.DataFrame(columns=columns)
298
  df = df.copy()
299
+ # Compute `report` (not stored on the row) before projection drops
300
+ # the source columns it needs. Path is derived from `submission_id`
301
+ # so a layout change doesn't require a results.jsonl rewrite.
302
  if "submission_id" in df.columns and "status" in df.columns:
303
+ df["report"] = df.apply(
304
+ lambda r: _report_url_md(
305
+ r["submission_id"],
306
+ r["status"],
307
+ r.get("submission_sha256"),
308
+ ),
309
+ axis=1,
310
  )
311
+ # Schema-field names -> display-column names. Keeps the rendered
312
+ # headers short and uniform without renaming anything in
313
+ # results.jsonl.
314
+ df = df.rename(columns={
315
+ "agent_url": "agent",
316
+ "submission_blob_url": "zip",
317
+ })
318
  # Make sure every declared column exists (legacy rows can be
319
  # missing optional fields). Detail-panel reads expect the
320
  # column-set to be stable regardless of which source rows had
 
335
  out["aggregate_score"] = out.apply(
336
  lambda r: _fmt_score(r["aggregate_score"], r["status"]), axis=1,
337
  )
338
+ if "agent" in out.columns:
339
+ out["agent"] = out["agent"].apply(_agent_url_md)
340
+ if "zip" in out.columns:
341
+ out["zip"] = out["zip"].apply(_submission_blob_md)
342
  return out
tests/test_leaderboard.py CHANGED
@@ -34,6 +34,9 @@ def _stub_rows():
34
  "submission_blob_url": (
35
  "https://huggingface.co/datasets/test/sub-a.zip"
36
  ),
 
 
 
37
  },
38
  {
39
  "submission_id": "sub-b",
@@ -51,9 +54,13 @@ def _stub_rows():
51
  "submission_blob_url": (
52
  "https://huggingface.co/datasets/test/sub-b.zip"
53
  ),
 
54
  },
55
  # Legacy row: pre-schema-bump shape. No `validation_status` key,
56
- # no `status` key. Both should be defaulted by the reader.
 
 
 
57
  {
58
  "submission_id": "sub-c-legacy",
59
  "submitter_name": "team-gamma",
@@ -122,32 +129,55 @@ def test_empty_input_returns_two_empty_frames(monkeypatch):
122
 
123
 
124
  def test_link_columns_render_as_markdown(monkeypatch):
125
- """agent_url / submission_blob_url / report_url render as markdown links.
126
-
127
- Covers C5: link cells should be ``[label](url)`` strings (so the
128
- Leaderboard widget rendering them under ``datatype="markdown"``
129
- produces clickable anchors), null/missing agent_urls are empty,
130
- and report_url is built from submission_id but only for
131
- ``status == "completed"`` rows.
 
 
 
 
 
132
  """
133
  monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows())
134
  validated, unvalidated = leaderboard.load_leaderboard_split()
135
 
136
  alpha = validated[validated["submission_name"] == "Alpha Agent v1"].iloc[0]
137
- assert alpha["agent_url"] == "[code](https://github.com/example/alpha-agent)"
138
- assert alpha["submission_blob_url"] == (
 
 
139
  "[zip](https://huggingface.co/datasets/test/sub-a.zip)"
140
  )
141
- # Report URL is computed from submission_id and points at the
142
- # submissions dataset's `reports/<id>.html` via /resolve/main/.
143
- assert alpha["report_url"].startswith("[report](")
144
- assert "reports/sub-a.html" in alpha["report_url"]
145
 
146
- # Null agent_url renders as empty cell, not a broken anchor.
147
  beta = unvalidated[unvalidated["submission_name"] == "Beta Agent v2"].iloc[0]
148
- assert beta["agent_url"] == ""
149
- assert beta["submission_blob_url"].startswith("[zip](")
150
- assert beta["report_url"].startswith("[report](")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
 
153
  def test_datatypes_align_with_columns():
@@ -169,7 +199,7 @@ def test_datatypes_align_with_columns():
169
  for col, dt in zip(
170
  leaderboard.LEADERBOARD_COLS, leaderboard.LEADERBOARD_DATATYPES
171
  ):
172
- if col in ("agent_url", "submission_blob_url", "report_url"):
173
  assert dt == "markdown"
174
  else:
175
  assert dt == "str"
 
34
  "submission_blob_url": (
35
  "https://huggingface.co/datasets/test/sub-a.zip"
36
  ),
37
+ # Modern submit pipeline: sha256 is populated, so report
38
+ # links should be emitted on completed rows.
39
+ "submission_sha256": "a" * 64,
40
  },
41
  {
42
  "submission_id": "sub-b",
 
54
  "submission_blob_url": (
55
  "https://huggingface.co/datasets/test/sub-b.zip"
56
  ),
57
+ "submission_sha256": "b" * 64,
58
  },
59
  # Legacy row: pre-schema-bump shape. No `validation_status` key,
60
+ # no `status` key, no `submission_sha256`. Both `status` and
61
+ # `validation_status` should be defaulted by the reader; the
62
+ # missing sha256 must suppress the report link (the
63
+ # corresponding reports/<id>.html doesn't exist on the dataset).
64
  {
65
  "submission_id": "sub-c-legacy",
66
  "submitter_name": "team-gamma",
 
129
 
130
 
131
  def test_link_columns_render_as_markdown(monkeypatch):
132
+ """`agent` / `zip` / `report` columns render as markdown links.
133
+
134
+ Link cells are ``[label](url)`` strings so the Leaderboard widget
135
+ under ``datatype="markdown"`` produces clickable anchors. The
136
+ ``agent`` cell uses the URL itself (scheme stripped) as link
137
+ text so a reader can tell what's behind the click; ``zip`` and
138
+ ``report`` use the short fixed labels because they always point
139
+ at our own infrastructure.
140
+
141
+ Missing ``agent_url`` renders as ``_None_`` (italic placeholder
142
+ so the reader sees the field exists but wasn't filled), not a
143
+ blank cell.
144
  """
145
  monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows())
146
  validated, unvalidated = leaderboard.load_leaderboard_split()
147
 
148
  alpha = validated[validated["submission_name"] == "Alpha Agent v1"].iloc[0]
149
+ assert alpha["agent"] == (
150
+ "[github.com/example/alpha-agent](https://github.com/example/alpha-agent)"
151
+ )
152
+ assert alpha["zip"] == (
153
  "[zip](https://huggingface.co/datasets/test/sub-a.zip)"
154
  )
155
+ assert alpha["report"].startswith("[report](")
156
+ assert "reports/sub-a.html" in alpha["report"]
 
 
157
 
 
158
  beta = unvalidated[unvalidated["submission_name"] == "Beta Agent v2"].iloc[0]
159
+ assert beta["agent"] == "_None_"
160
+ assert beta["zip"].startswith("[zip](")
161
+ # Beta has submission_sha256, so the report link is emitted.
162
+ assert beta["report"].startswith("[report](")
163
+
164
+
165
+ def test_legacy_row_omits_report_link(monkeypatch):
166
+ """Rows without ``submission_sha256`` (legacy seed rows) drop the report link.
167
+
168
+ ``reports/<id>.html`` is only uploaded by the modern submit
169
+ pipeline; legacy seed rows that pre-date that pipeline never had
170
+ a report uploaded, so the /resolve/ URL would 404. Gate on
171
+ ``submission_sha256`` (the schema's "modern pipeline" sentinel)
172
+ so the leaderboard doesn't render a broken link.
173
+ """
174
+ monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows())
175
+ _, unvalidated = leaderboard.load_leaderboard_split()
176
+ gamma = unvalidated[unvalidated["submission_name"] == "Gamma baseline"].iloc[0]
177
+ assert gamma["report"] == ""
178
+ # Sanity: agent + zip still render normally on the legacy row.
179
+ assert gamma["agent"].startswith("[github.com/example/gamma-baseline](")
180
+ assert gamma["zip"].startswith("[zip](")
181
 
182
 
183
  def test_datatypes_align_with_columns():
 
199
  for col, dt in zip(
200
  leaderboard.LEADERBOARD_COLS, leaderboard.LEADERBOARD_DATATYPES
201
  ):
202
+ if col in ("agent", "zip", "report"):
203
  assert dt == "markdown"
204
  else:
205
  assert dt == "str"