app+leaderboard: detail panel polish (rename link columns, fix broken report links)
Browse filesPolish follow-up on C6, three fixes after the first detail-panel
commit landed:
1. Rename display columns from snake_case schema names to clean
short headers. `agent_url` -> `agent`, `submission_blob_url` ->
`zip`, `report_url` -> `report`. The rename happens at projection
time via `df.rename`, so the schema in results.jsonl is
unchanged. Headers now read as plain English ("agent", "zip",
"report") rather than a python dict.
2. The `agent` column shows the URL itself (scheme stripped,
truncated past 40 chars) as link text instead of a fixed "code"
label. The schema says agent_url can be code OR paper; the
shortened URL is the only honest hint about what's behind the
click. Missing values render as italic `_None_` rather than a
blank cell, so the optionality is explicit.
3. The `report` link is now gated on the row's `submission_sha256`
being non-null in addition to status == "completed". Legacy seed
rows pre-date the modern submit pipeline (which uploads
reports/<id>.html); the per-row sha256 is the schema's
"modern pipeline" sentinel ("null only on legacy rows that
pre-date this field"). Pre-fix the legacy rows rendered a
/resolve/ link that 404'd, exactly the bug surfaced on the live
Space.
Detail panel (app.py):
- Timestamp formatting: 2026-05-26T12:02:31Z -> 2026-05-26 12:02 UTC.
ISO punctuation isn't useful in a human-readable card; dropping
the T/Z and showing the timezone explicitly is plenty.
- Always shows "Agent: ..." (with `_None_` when missing) so the
optionality is visible in the rendered card.
- Reads the renamed display columns.
Tests:
- _stub_rows() now carries submission_sha256 on rows that should
emit the report link; the legacy stub deliberately leaves it out.
- test_link_columns_render_as_markdown updated for the renamed
columns + shortened-URL agent text + the `_None_` placeholder.
- New test_legacy_row_omits_report_link guards the sentinel gate
so a future refactor that drops it gets caught.
10/10 unit tests green. Live read on the submissions dataset:
legacy baseline rows render `agent` as a shortened github URL,
`zip` as [zip](...), `report` as empty (no submission_sha256 ->
no report file would exist). Modern submission rows pick up a
real [report](...) link to a reports/<id>.html that actually
exists on the dataset.
- app.py +32 -8
- leaderboard.py +74 -27
- tests/test_leaderboard.py +49 -19
|
@@ -6,6 +6,7 @@ Read path lives in :mod:`leaderboard`. Submit-tab validation lives in
|
|
| 6 |
from __future__ import annotations
|
| 7 |
|
| 8 |
import logging
|
|
|
|
| 9 |
|
| 10 |
import gradio as gr
|
| 11 |
from gradio_leaderboard import Leaderboard
|
|
@@ -50,6 +51,8 @@ correct 3D model.
|
|
| 50 |
|
| 51 |
DETAIL_PLACEHOLDER = "_Click a row above for details._"
|
| 52 |
|
|
|
|
|
|
|
| 53 |
|
| 54 |
def _has(value) -> bool:
|
| 55 |
"""True for values that should show up in the detail panel."""
|
|
@@ -60,6 +63,22 @@ def _has(value) -> bool:
|
|
| 60 |
return str(value).strip() != ""
|
| 61 |
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
def _format_detail(df: pd.DataFrame | None, evt: gr.SelectData) -> str:
|
| 64 |
"""Build the row-detail markdown for the clicked submission.
|
| 65 |
|
|
@@ -68,7 +87,10 @@ def _format_detail(df: pd.DataFrame | None, evt: gr.SelectData) -> str:
|
|
| 68 |
the visible link cells (already pre-formatted as ``[label](url)``
|
| 69 |
by ``leaderboard.py``'s ``_project_and_format``).
|
| 70 |
``failure_reason`` only shows on ``failed`` rows;
|
| 71 |
-
``report_url`` is only non-empty for
|
|
|
|
|
|
|
|
|
|
| 72 |
"""
|
| 73 |
if df is None or len(df) == 0 or evt is None or evt.index is None:
|
| 74 |
return DETAIL_PLACEHOLDER
|
|
@@ -84,15 +106,17 @@ def _format_detail(df: pd.DataFrame | None, evt: gr.SelectData) -> str:
|
|
| 84 |
if _has(row.get("status")):
|
| 85 |
lines.append(f"- **Status**: {row['status']}")
|
| 86 |
if _has(row.get("submitted_at")):
|
| 87 |
-
lines.append(f"- **Submitted**: {row['submitted_at']}")
|
| 88 |
if _has(row.get("notes")):
|
| 89 |
lines.append(f"- **Notes**: {row['notes']}")
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
if _has(row.get("
|
| 95 |
-
lines.append(f"- **
|
|
|
|
|
|
|
| 96 |
if row.get("status") == "failed" and _has(row.get("failure_reason")):
|
| 97 |
lines.append(f"- **Failure reason**: {row['failure_reason']}")
|
| 98 |
return "\n".join(lines)
|
|
|
|
| 6 |
from __future__ import annotations
|
| 7 |
|
| 8 |
import logging
|
| 9 |
+
import re
|
| 10 |
|
| 11 |
import gradio as gr
|
| 12 |
from gradio_leaderboard import Leaderboard
|
|
|
|
| 51 |
|
| 52 |
DETAIL_PLACEHOLDER = "_Click a row above for details._"
|
| 53 |
|
| 54 |
+
_ISO_TS_RE = re.compile(r"^(\d{4}-\d{2}-\d{2})T(\d{2}:\d{2}):\d{2}Z$")
|
| 55 |
+
|
| 56 |
|
| 57 |
def _has(value) -> bool:
|
| 58 |
"""True for values that should show up in the detail panel."""
|
|
|
|
| 63 |
return str(value).strip() != ""
|
| 64 |
|
| 65 |
|
| 66 |
+
def _fmt_timestamp(ts) -> str:
|
| 67 |
+
"""Render an ISO-8601 ``submitted_at`` as ``YYYY-MM-DD HH:MM UTC``.
|
| 68 |
+
|
| 69 |
+
The schema writes timestamps as ``YYYY-MM-DDTHH:MM:SSZ``; the
|
| 70 |
+
minute-level UTC form is plenty for the detail panel, drops the
|
| 71 |
+
``T``/``Z`` punctuation, and renders timezone explicitly so a
|
| 72 |
+
reader doesn't have to know that "Z" means UTC.
|
| 73 |
+
"""
|
| 74 |
+
if not _has(ts):
|
| 75 |
+
return ""
|
| 76 |
+
m = _ISO_TS_RE.match(str(ts))
|
| 77 |
+
if m:
|
| 78 |
+
return f"{m.group(1)} {m.group(2)} UTC"
|
| 79 |
+
return str(ts)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
def _format_detail(df: pd.DataFrame | None, evt: gr.SelectData) -> str:
|
| 83 |
"""Build the row-detail markdown for the clicked submission.
|
| 84 |
|
|
|
|
| 87 |
the visible link cells (already pre-formatted as ``[label](url)``
|
| 88 |
by ``leaderboard.py``'s ``_project_and_format``).
|
| 89 |
``failure_reason`` only shows on ``failed`` rows;
|
| 90 |
+
``report_url`` is only non-empty for completed rows from the
|
| 91 |
+
modern submit pipeline (leaderboard.py gates on
|
| 92 |
+
``submission_sha256`` so legacy rows don't render a broken
|
| 93 |
+
/resolve/ link).
|
| 94 |
"""
|
| 95 |
if df is None or len(df) == 0 or evt is None or evt.index is None:
|
| 96 |
return DETAIL_PLACEHOLDER
|
|
|
|
| 106 |
if _has(row.get("status")):
|
| 107 |
lines.append(f"- **Status**: {row['status']}")
|
| 108 |
if _has(row.get("submitted_at")):
|
| 109 |
+
lines.append(f"- **Submitted**: {_fmt_timestamp(row['submitted_at'])}")
|
| 110 |
if _has(row.get("notes")):
|
| 111 |
lines.append(f"- **Notes**: {row['notes']}")
|
| 112 |
+
# Display columns from leaderboard.py's _project_and_format:
|
| 113 |
+
# `agent` carries the markdown link (or "_None_" when missing);
|
| 114 |
+
# `zip` and `report` are empty strings when not applicable.
|
| 115 |
+
lines.append(f"- **Agent**: {row.get('agent') or '_None_'}")
|
| 116 |
+
if _has(row.get("zip")):
|
| 117 |
+
lines.append(f"- **Submission**: {row['zip']}")
|
| 118 |
+
if _has(row.get("report")):
|
| 119 |
+
lines.append(f"- **Report**: {row['report']}")
|
| 120 |
if row.get("status") == "failed" and _has(row.get("failure_reason")):
|
| 121 |
lines.append(f"- **Failure reason**: {row['failure_reason']}")
|
| 122 |
return "\n".join(lines)
|
|
@@ -33,6 +33,11 @@ HUB_FETCH_TIMEOUT_SECONDS = 30
|
|
| 33 |
# by hidden-but-data-present columns the row-click detail panel pulls from.
|
| 34 |
# Hidden columns ride along in the DataFrame so `Leaderboard.select(...)`
|
| 35 |
# can read them out without a separate state-cache or re-fetch.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
LEADERBOARD_COLS = [
|
| 37 |
"status",
|
| 38 |
"submission_name",
|
|
@@ -41,9 +46,9 @@ LEADERBOARD_COLS = [
|
|
| 41 |
"validity_rate",
|
| 42 |
"submitted_at",
|
| 43 |
"cadgenbench_version",
|
| 44 |
-
"
|
| 45 |
-
"
|
| 46 |
-
"
|
| 47 |
# Detail-panel-only (hidden via `hide_columns` on the widget):
|
| 48 |
"submission_id",
|
| 49 |
"notes",
|
|
@@ -62,9 +67,9 @@ VALIDATED_LEADERBOARD_COLS = [
|
|
| 62 |
"validation_method",
|
| 63 |
"submitted_at",
|
| 64 |
"cadgenbench_version",
|
| 65 |
-
"
|
| 66 |
-
"
|
| 67 |
-
"
|
| 68 |
"submission_id",
|
| 69 |
"notes",
|
| 70 |
"failure_reason",
|
|
@@ -80,7 +85,7 @@ LEADERBOARD_HIDE_COLUMNS = ["submission_id", "notes", "failure_reason"]
|
|
| 80 |
# cells get pending / failed status tags applied by _fmt_pct /
|
| 81 |
# _fmt_score so they're string-shaped by the time the widget sees
|
| 82 |
# them).
|
| 83 |
-
_LINK_COLUMNS = frozenset({"
|
| 84 |
|
| 85 |
|
| 86 |
def _datatypes_for(columns: list[str]) -> list[str]:
|
|
@@ -181,31 +186,61 @@ def _is_empty(v) -> bool:
|
|
| 181 |
return False
|
| 182 |
|
| 183 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
def _agent_url_md(url) -> str:
|
| 185 |
-
"""Render
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
if _is_empty(url):
|
| 187 |
-
return ""
|
| 188 |
-
return f"[
|
| 189 |
|
| 190 |
|
| 191 |
def _submission_blob_md(url) -> str:
|
| 192 |
-
"""Render
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
if _is_empty(url):
|
| 194 |
return ""
|
| 195 |
return f"[zip]({url})"
|
| 196 |
|
| 197 |
|
| 198 |
-
def _report_url_md(submission_id, status) -> str:
|
| 199 |
-
"""Build the report URL
|
| 200 |
|
| 201 |
-
`reports/<id>.html`
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
report
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
"""
|
| 207 |
if status != "completed" or _is_empty(submission_id):
|
| 208 |
return ""
|
|
|
|
|
|
|
| 209 |
return (
|
| 210 |
f"[report](https://huggingface.co/datasets/{HF_SUBMISSIONS_REPO}"
|
| 211 |
f"/resolve/main/reports/{submission_id}.html)"
|
|
@@ -261,13 +296,25 @@ def _project_and_format(df: pd.DataFrame, columns: list[str]) -> pd.DataFrame:
|
|
| 261 |
if df.empty:
|
| 262 |
return pd.DataFrame(columns=columns)
|
| 263 |
df = df.copy()
|
| 264 |
-
#
|
| 265 |
-
#
|
| 266 |
-
# require a results.jsonl rewrite.
|
| 267 |
if "submission_id" in df.columns and "status" in df.columns:
|
| 268 |
-
df["
|
| 269 |
-
lambda r: _report_url_md(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
# Make sure every declared column exists (legacy rows can be
|
| 272 |
# missing optional fields). Detail-panel reads expect the
|
| 273 |
# column-set to be stable regardless of which source rows had
|
|
@@ -288,8 +335,8 @@ def _project_and_format(df: pd.DataFrame, columns: list[str]) -> pd.DataFrame:
|
|
| 288 |
out["aggregate_score"] = out.apply(
|
| 289 |
lambda r: _fmt_score(r["aggregate_score"], r["status"]), axis=1,
|
| 290 |
)
|
| 291 |
-
if "
|
| 292 |
-
out["
|
| 293 |
-
if "
|
| 294 |
-
out["
|
| 295 |
return out
|
|
|
|
| 33 |
# by hidden-but-data-present columns the row-click detail panel pulls from.
|
| 34 |
# Hidden columns ride along in the DataFrame so `Leaderboard.select(...)`
|
| 35 |
# can read them out without a separate state-cache or re-fetch.
|
| 36 |
+
# Display column names (these are what the rendered table headers
|
| 37 |
+
# read). The schema fields `agent_url` and `submission_blob_url` get
|
| 38 |
+
# renamed at projection time to `agent` / `zip` so the rendered
|
| 39 |
+
# headers stay short and uniform; the `report` column is computed
|
| 40 |
+
# fresh from `submission_id`.
|
| 41 |
LEADERBOARD_COLS = [
|
| 42 |
"status",
|
| 43 |
"submission_name",
|
|
|
|
| 46 |
"validity_rate",
|
| 47 |
"submitted_at",
|
| 48 |
"cadgenbench_version",
|
| 49 |
+
"agent",
|
| 50 |
+
"zip",
|
| 51 |
+
"report",
|
| 52 |
# Detail-panel-only (hidden via `hide_columns` on the widget):
|
| 53 |
"submission_id",
|
| 54 |
"notes",
|
|
|
|
| 67 |
"validation_method",
|
| 68 |
"submitted_at",
|
| 69 |
"cadgenbench_version",
|
| 70 |
+
"agent",
|
| 71 |
+
"zip",
|
| 72 |
+
"report",
|
| 73 |
"submission_id",
|
| 74 |
"notes",
|
| 75 |
"failure_reason",
|
|
|
|
| 85 |
# cells get pending / failed status tags applied by _fmt_pct /
|
| 86 |
# _fmt_score so they're string-shaped by the time the widget sees
|
| 87 |
# them).
|
| 88 |
+
_LINK_COLUMNS = frozenset({"agent", "zip", "report"})
|
| 89 |
|
| 90 |
|
| 91 |
def _datatypes_for(columns: list[str]) -> list[str]:
|
|
|
|
| 186 |
return False
|
| 187 |
|
| 188 |
|
| 189 |
+
_AGENT_URL_MAX_LINK_TEXT = 40
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
def _shorten_url_for_display(url: str) -> str:
|
| 193 |
+
"""Strip scheme + trailing slash; truncate to keep the table cell tidy."""
|
| 194 |
+
s = url.replace("https://", "").replace("http://", "").rstrip("/")
|
| 195 |
+
if len(s) > _AGENT_URL_MAX_LINK_TEXT:
|
| 196 |
+
s = s[: _AGENT_URL_MAX_LINK_TEXT - 1] + "…"
|
| 197 |
+
return s
|
| 198 |
+
|
| 199 |
+
|
| 200 |
def _agent_url_md(url) -> str:
|
| 201 |
+
"""Render the `agent` cell as a markdown link.
|
| 202 |
+
|
| 203 |
+
Uses a shortened version of the URL itself as the link text:
|
| 204 |
+
`agent_url` is a free-form "URL pointing at the agent code or
|
| 205 |
+
paper" per the schema, so the URL itself carries the only
|
| 206 |
+
honest hint about what's behind the click. Missing cells render
|
| 207 |
+
as italic `_None_` so a reader sees the field is optional and
|
| 208 |
+
just wasn't filled, rather than a blank.
|
| 209 |
+
"""
|
| 210 |
if _is_empty(url):
|
| 211 |
+
return "_None_"
|
| 212 |
+
return f"[{_shorten_url_for_display(str(url))}]({url})"
|
| 213 |
|
| 214 |
|
| 215 |
def _submission_blob_md(url) -> str:
|
| 216 |
+
"""Render the `zip` cell as a markdown link.
|
| 217 |
+
|
| 218 |
+
Link text stays "zip" (the URL points at our own infrastructure
|
| 219 |
+
and adds no extra information for the reader).
|
| 220 |
+
"""
|
| 221 |
if _is_empty(url):
|
| 222 |
return ""
|
| 223 |
return f"[zip]({url})"
|
| 224 |
|
| 225 |
|
| 226 |
+
def _report_url_md(submission_id, status, submission_sha256) -> str:
|
| 227 |
+
"""Build the report URL, only when the report file is known to exist.
|
| 228 |
|
| 229 |
+
`reports/<id>.html` is uploaded by the post-eval worker in the
|
| 230 |
+
modern submit pipeline. Legacy rows that pre-date that pipeline
|
| 231 |
+
(the three baseline seed rows; identifiable by ``submission_sha256``
|
| 232 |
+
being null, per the schema's compatibility note) never had a
|
| 233 |
+
report uploaded, so the link would 404. Gate on
|
| 234 |
+
``submission_sha256`` to keep the rendered link honest.
|
| 235 |
+
|
| 236 |
+
``/resolve/main/`` (matching the convention used by the submit
|
| 237 |
+
handler for ``submission_blob_url``) serves the file with its
|
| 238 |
+
content type so the browser renders the HTML report directly.
|
| 239 |
"""
|
| 240 |
if status != "completed" or _is_empty(submission_id):
|
| 241 |
return ""
|
| 242 |
+
if _is_empty(submission_sha256):
|
| 243 |
+
return ""
|
| 244 |
return (
|
| 245 |
f"[report](https://huggingface.co/datasets/{HF_SUBMISSIONS_REPO}"
|
| 246 |
f"/resolve/main/reports/{submission_id}.html)"
|
|
|
|
| 296 |
if df.empty:
|
| 297 |
return pd.DataFrame(columns=columns)
|
| 298 |
df = df.copy()
|
| 299 |
+
# Compute `report` (not stored on the row) before projection drops
|
| 300 |
+
# the source columns it needs. Path is derived from `submission_id`
|
| 301 |
+
# so a layout change doesn't require a results.jsonl rewrite.
|
| 302 |
if "submission_id" in df.columns and "status" in df.columns:
|
| 303 |
+
df["report"] = df.apply(
|
| 304 |
+
lambda r: _report_url_md(
|
| 305 |
+
r["submission_id"],
|
| 306 |
+
r["status"],
|
| 307 |
+
r.get("submission_sha256"),
|
| 308 |
+
),
|
| 309 |
+
axis=1,
|
| 310 |
)
|
| 311 |
+
# Schema-field names -> display-column names. Keeps the rendered
|
| 312 |
+
# headers short and uniform without renaming anything in
|
| 313 |
+
# results.jsonl.
|
| 314 |
+
df = df.rename(columns={
|
| 315 |
+
"agent_url": "agent",
|
| 316 |
+
"submission_blob_url": "zip",
|
| 317 |
+
})
|
| 318 |
# Make sure every declared column exists (legacy rows can be
|
| 319 |
# missing optional fields). Detail-panel reads expect the
|
| 320 |
# column-set to be stable regardless of which source rows had
|
|
|
|
| 335 |
out["aggregate_score"] = out.apply(
|
| 336 |
lambda r: _fmt_score(r["aggregate_score"], r["status"]), axis=1,
|
| 337 |
)
|
| 338 |
+
if "agent" in out.columns:
|
| 339 |
+
out["agent"] = out["agent"].apply(_agent_url_md)
|
| 340 |
+
if "zip" in out.columns:
|
| 341 |
+
out["zip"] = out["zip"].apply(_submission_blob_md)
|
| 342 |
return out
|
|
@@ -34,6 +34,9 @@ def _stub_rows():
|
|
| 34 |
"submission_blob_url": (
|
| 35 |
"https://huggingface.co/datasets/test/sub-a.zip"
|
| 36 |
),
|
|
|
|
|
|
|
|
|
|
| 37 |
},
|
| 38 |
{
|
| 39 |
"submission_id": "sub-b",
|
|
@@ -51,9 +54,13 @@ def _stub_rows():
|
|
| 51 |
"submission_blob_url": (
|
| 52 |
"https://huggingface.co/datasets/test/sub-b.zip"
|
| 53 |
),
|
|
|
|
| 54 |
},
|
| 55 |
# Legacy row: pre-schema-bump shape. No `validation_status` key,
|
| 56 |
-
# no `status` key. Both
|
|
|
|
|
|
|
|
|
|
| 57 |
{
|
| 58 |
"submission_id": "sub-c-legacy",
|
| 59 |
"submitter_name": "team-gamma",
|
|
@@ -122,32 +129,55 @@ def test_empty_input_returns_two_empty_frames(monkeypatch):
|
|
| 122 |
|
| 123 |
|
| 124 |
def test_link_columns_render_as_markdown(monkeypatch):
|
| 125 |
-
"""
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
``
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
"""
|
| 133 |
monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows())
|
| 134 |
validated, unvalidated = leaderboard.load_leaderboard_split()
|
| 135 |
|
| 136 |
alpha = validated[validated["submission_name"] == "Alpha Agent v1"].iloc[0]
|
| 137 |
-
assert alpha["
|
| 138 |
-
|
|
|
|
|
|
|
| 139 |
"[zip](https://huggingface.co/datasets/test/sub-a.zip)"
|
| 140 |
)
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
assert alpha["report_url"].startswith("[report](")
|
| 144 |
-
assert "reports/sub-a.html" in alpha["report_url"]
|
| 145 |
|
| 146 |
-
# Null agent_url renders as empty cell, not a broken anchor.
|
| 147 |
beta = unvalidated[unvalidated["submission_name"] == "Beta Agent v2"].iloc[0]
|
| 148 |
-
assert beta["
|
| 149 |
-
assert beta["
|
| 150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
|
| 153 |
def test_datatypes_align_with_columns():
|
|
@@ -169,7 +199,7 @@ def test_datatypes_align_with_columns():
|
|
| 169 |
for col, dt in zip(
|
| 170 |
leaderboard.LEADERBOARD_COLS, leaderboard.LEADERBOARD_DATATYPES
|
| 171 |
):
|
| 172 |
-
if col in ("
|
| 173 |
assert dt == "markdown"
|
| 174 |
else:
|
| 175 |
assert dt == "str"
|
|
|
|
| 34 |
"submission_blob_url": (
|
| 35 |
"https://huggingface.co/datasets/test/sub-a.zip"
|
| 36 |
),
|
| 37 |
+
# Modern submit pipeline: sha256 is populated, so report
|
| 38 |
+
# links should be emitted on completed rows.
|
| 39 |
+
"submission_sha256": "a" * 64,
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"submission_id": "sub-b",
|
|
|
|
| 54 |
"submission_blob_url": (
|
| 55 |
"https://huggingface.co/datasets/test/sub-b.zip"
|
| 56 |
),
|
| 57 |
+
"submission_sha256": "b" * 64,
|
| 58 |
},
|
| 59 |
# Legacy row: pre-schema-bump shape. No `validation_status` key,
|
| 60 |
+
# no `status` key, no `submission_sha256`. Both `status` and
|
| 61 |
+
# `validation_status` should be defaulted by the reader; the
|
| 62 |
+
# missing sha256 must suppress the report link (the
|
| 63 |
+
# corresponding reports/<id>.html doesn't exist on the dataset).
|
| 64 |
{
|
| 65 |
"submission_id": "sub-c-legacy",
|
| 66 |
"submitter_name": "team-gamma",
|
|
|
|
| 129 |
|
| 130 |
|
| 131 |
def test_link_columns_render_as_markdown(monkeypatch):
|
| 132 |
+
"""`agent` / `zip` / `report` columns render as markdown links.
|
| 133 |
+
|
| 134 |
+
Link cells are ``[label](url)`` strings so the Leaderboard widget
|
| 135 |
+
under ``datatype="markdown"`` produces clickable anchors. The
|
| 136 |
+
``agent`` cell uses the URL itself (scheme stripped) as link
|
| 137 |
+
text so a reader can tell what's behind the click; ``zip`` and
|
| 138 |
+
``report`` use the short fixed labels because they always point
|
| 139 |
+
at our own infrastructure.
|
| 140 |
+
|
| 141 |
+
Missing ``agent_url`` renders as ``_None_`` (italic placeholder
|
| 142 |
+
so the reader sees the field exists but wasn't filled), not a
|
| 143 |
+
blank cell.
|
| 144 |
"""
|
| 145 |
monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows())
|
| 146 |
validated, unvalidated = leaderboard.load_leaderboard_split()
|
| 147 |
|
| 148 |
alpha = validated[validated["submission_name"] == "Alpha Agent v1"].iloc[0]
|
| 149 |
+
assert alpha["agent"] == (
|
| 150 |
+
"[github.com/example/alpha-agent](https://github.com/example/alpha-agent)"
|
| 151 |
+
)
|
| 152 |
+
assert alpha["zip"] == (
|
| 153 |
"[zip](https://huggingface.co/datasets/test/sub-a.zip)"
|
| 154 |
)
|
| 155 |
+
assert alpha["report"].startswith("[report](")
|
| 156 |
+
assert "reports/sub-a.html" in alpha["report"]
|
|
|
|
|
|
|
| 157 |
|
|
|
|
| 158 |
beta = unvalidated[unvalidated["submission_name"] == "Beta Agent v2"].iloc[0]
|
| 159 |
+
assert beta["agent"] == "_None_"
|
| 160 |
+
assert beta["zip"].startswith("[zip](")
|
| 161 |
+
# Beta has submission_sha256, so the report link is emitted.
|
| 162 |
+
assert beta["report"].startswith("[report](")
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def test_legacy_row_omits_report_link(monkeypatch):
|
| 166 |
+
"""Rows without ``submission_sha256`` (legacy seed rows) drop the report link.
|
| 167 |
+
|
| 168 |
+
``reports/<id>.html`` is only uploaded by the modern submit
|
| 169 |
+
pipeline; legacy seed rows that pre-date that pipeline never had
|
| 170 |
+
a report uploaded, so the /resolve/ URL would 404. Gate on
|
| 171 |
+
``submission_sha256`` (the schema's "modern pipeline" sentinel)
|
| 172 |
+
so the leaderboard doesn't render a broken link.
|
| 173 |
+
"""
|
| 174 |
+
monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows())
|
| 175 |
+
_, unvalidated = leaderboard.load_leaderboard_split()
|
| 176 |
+
gamma = unvalidated[unvalidated["submission_name"] == "Gamma baseline"].iloc[0]
|
| 177 |
+
assert gamma["report"] == ""
|
| 178 |
+
# Sanity: agent + zip still render normally on the legacy row.
|
| 179 |
+
assert gamma["agent"].startswith("[github.com/example/gamma-baseline](")
|
| 180 |
+
assert gamma["zip"].startswith("[zip](")
|
| 181 |
|
| 182 |
|
| 183 |
def test_datatypes_align_with_columns():
|
|
|
|
| 199 |
for col, dt in zip(
|
| 200 |
leaderboard.LEADERBOARD_COLS, leaderboard.LEADERBOARD_DATATYPES
|
| 201 |
):
|
| 202 |
+
if col in ("agent", "zip", "report"):
|
| 203 |
assert dt == "markdown"
|
| 204 |
else:
|
| 205 |
assert dt == "str"
|