Michael Rabinovich commited on
Commit
5fb3ebc
·
1 Parent(s): 6ffd043

leaderboard: open reports in new tab

Browse files

Now that the Space is public, link completed submissions directly to their
proxied HTML reports instead of inlining large report payloads through the
row-click detail panel.

Files changed (4) hide show
  1. app.py +6 -107
  2. leaderboard.py +24 -17
  3. tests/test_leaderboard.py +20 -21
  4. tests/test_proxy.py +7 -123
app.py CHANGED
@@ -122,49 +122,12 @@ VALIDATION_GUIDELINES_MD = f"""Submissions appear on the **Unvalidated** table t
122
 
123
  Full policy: [`docs/benchmark/validation.md`]({VALIDATION_DOC_URL})."""
124
 
125
- DETAIL_PLACEHOLDER = "_Click a row above for details._"
126
  SUBMIT_STATUS_IDLE = (
127
  "_Log in, attach a zip, and click **Submit**. Progress and any "
128
  "errors appear here._"
129
  )
130
 
131
 
132
- def _has(value) -> bool:
133
- """True for values that should show up in the detail panel."""
134
- if value is None:
135
- return False
136
- if isinstance(value, float) and pd.isna(value):
137
- return False
138
- return str(value).strip() != ""
139
-
140
-
141
- def _build_report_iframe(html_bytes: bytes) -> str:
142
- """Wrap a fetched report's HTML bytes into a self-contained iframe.
143
-
144
- ``srcdoc`` puts the entire report HTML directly inside the iframe
145
- attribute. The iframe gets its own document context, so the
146
- report's CSS can't collide with Gradio's, and an explicit
147
- ``height: 90vh`` keeps the report from being clipped by Gradio's
148
- column-flex layout (which was the visible cut-off the user saw
149
- in earlier rendering attempts).
150
-
151
- The Space is private; the FastAPI ``/reports/<id>.html`` route
152
- works server-side under Bearer auth but breaks for a logged-in
153
- browser user (HF's edge gates same-origin pathname navigations
154
- on a JWT that the browser doesn't carry forward). srcdoc
155
- sidesteps that entirely by inlining the bytes; no second HTTP
156
- request leaves the browser.
157
- """
158
- escaped = html.escape(
159
- html_bytes.decode("utf-8", errors="replace"), quote=True,
160
- )
161
- return (
162
- f'<iframe srcdoc="{escaped}" '
163
- 'style="width:100%; height:90vh; border:0; display:block;" '
164
- 'title="Submission report"></iframe>'
165
- )
166
-
167
-
168
  def _data_error_banner_md(message: str | None) -> str:
169
  """Markdown for the top-of-tab data-unavailable banner.
170
 
@@ -523,58 +486,6 @@ def _admin_stop_delete(
523
  )
524
 
525
 
526
- def _format_detail_and_report(
527
- df: pd.DataFrame | None, evt: gr.SelectData,
528
- ) -> tuple[str, str]:
529
- """Return ``(detail_markdown, report_iframe_html)`` for the clicked row.
530
-
531
- The detail panel holds metadata (submitter, status, timestamp,
532
- notes, links to ZIP and external agent URL). The HTML viewer
533
- holds the rendered report when one exists (status == completed
534
- AND the row carries the modern-pipeline ``submission_sha256``
535
- sentinel) - the leaderboard reader computes ``report_url`` only
536
- for rows that satisfy that gate.
537
-
538
- Returns ``(DETAIL_PLACEHOLDER, "")`` on a null / out-of-range
539
- event so the panel falls back to its initial state.
540
- """
541
- if df is None or len(df) == 0 or evt is None or evt.index is None:
542
- return DETAIL_PLACEHOLDER, ""
543
- idx = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
544
- if idx < 0 or idx >= len(df):
545
- return DETAIL_PLACEHOLDER, ""
546
- row = df.iloc[idx]
547
-
548
- title = row.get("submission_name") or "(unnamed submission)"
549
- lines = [f"### {title}", ""]
550
- if _has(row.get("submitter_name")):
551
- lines.append(f"- **Submitter**: {row['submitter_name']}")
552
- if _has(row.get("status")):
553
- lines.append(f"- **Status**: {row['status']}")
554
- if _has(row.get("submitted_at")):
555
- lines.append(f"- **Submitted**: {_fmt_timestamp(row['submitted_at'])}")
556
- if _has(row.get("notes")):
557
- lines.append(f"- **Notes**: {row['notes']}")
558
- lines.append(
559
- f"- **Model details (optional)**: "
560
- f"{row.get('model details (optional)') or '_None_'}"
561
- )
562
- if _has(row.get("submission_blob_url")):
563
- lines.append(
564
- f"- **Submission ZIP**: [download]({row['submission_blob_url']})"
565
- )
566
- if row.get("status") == "failed" and _has(row.get("failure_reason")):
567
- lines.append(f"- **Failure reason**: {row['failure_reason']}")
568
- detail_md = "\n".join(lines)
569
-
570
- report_iframe = ""
571
- if _has(row.get("report_url")) and _has(row.get("submission_id")):
572
- content = _fetch_report_html(str(row["submission_id"]))
573
- if content:
574
- report_iframe = _build_report_iframe(content)
575
- return detail_md, report_iframe
576
-
577
-
578
  @lru_cache(maxsize=128)
579
  def _fetch_report_html(submission_id: str) -> bytes | None:
580
  """Pull ``reports/<id>.html`` off the submissions dataset.
@@ -850,24 +761,12 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
850
  )
851
  download_btn.click(fn=build_combined_csv, outputs=download_btn)
852
 
853
- # Row-click panel: one shared metadata markdown component +
854
- # one report viewer below it. The viewer holds an iframe
855
- # containing the full per-submission report (srcdoc-inlined
856
- # so no second HTTP request needs to leave the browser - the
857
- # Space is private and HF's edge would 404 same-origin
858
- # pathname navigations that aren't carrying the iframe's
859
- # short-lived `__sign` JWT). Both tables share both outputs.
860
- detail_panel = gr.Markdown(
861
- value=DETAIL_PLACEHOLDER,
862
- label="Selected submission",
863
- )
864
- report_viewer = gr.HTML(value="", label="Report")
865
- for view in (validated_view, unvalidated_view):
866
- view.select(
867
- fn=_format_detail_and_report,
868
- inputs=view,
869
- outputs=[detail_panel, report_viewer],
870
- )
871
 
872
  with gr.Tab("Submit"):
873
  gr.Markdown(
 
122
 
123
  Full policy: [`docs/benchmark/validation.md`]({VALIDATION_DOC_URL})."""
124
 
 
125
  SUBMIT_STATUS_IDLE = (
126
  "_Log in, attach a zip, and click **Submit**. Progress and any "
127
  "errors appear here._"
128
  )
129
 
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  def _data_error_banner_md(message: str | None) -> str:
132
  """Markdown for the top-of-tab data-unavailable banner.
133
 
 
486
  )
487
 
488
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
489
  @lru_cache(maxsize=128)
490
  def _fetch_report_html(submission_id: str) -> bytes | None:
491
  """Pull ``reports/<id>.html`` off the submissions dataset.
 
761
  )
762
  download_btn.click(fn=build_combined_csv, outputs=download_btn)
763
 
764
+ # No inline row-click detail panel: the submission_name cell is a
765
+ # deep-link that opens the self-contained per-submission report in
766
+ # a new tab (see `_submission_name_md` in leaderboard.py). Now that
767
+ # the Space is public, HF's edge serves `/reports/<id>.html` to
768
+ # browser users, so we link to it directly instead of inlining the
769
+ # (tens-to-hundreds-of-MB) report through the Gradio event payload.
 
 
 
 
 
 
 
 
 
 
 
 
770
 
771
  with gr.Tab("Submit"):
772
  gr.Markdown(
leaderboard.py CHANGED
@@ -24,6 +24,7 @@ that the submit path also consumes.
24
  """
25
  from __future__ import annotations
26
 
 
27
  import json
28
  import logging
29
  import os
@@ -305,26 +306,32 @@ def _report_relative_url(submission_id, status, submission_sha256) -> str:
305
 
306
 
307
  def _submission_name_md(name, report_url) -> str:
308
- """Render `submission_name` as plain text.
309
-
310
- Earlier iterations wrapped this as a markdown link to the report
311
- proxy at ``/reports/<id>.html``. On a private Space the link
312
- works server-side (Bearer auth) but breaks for a logged-in user
313
- in the browser: the iframe's `__sign` token doesn't propagate to
314
- same-origin pathname navigations, and HF's edge returns 404
315
- before the request reaches the FastAPI route. Reports are now
316
- rendered inline via an iframe-srcdoc viewer below the detail
317
- panel (see ``_format_detail_and_report`` in :mod:`app`), so the
318
- submission_name cell stays plain text to avoid offering a link
319
- that doesn't work.
320
-
321
- `report_url` is accepted (and ignored) so the call sites don't
322
- have to change when the Space eventually goes public and the
323
- link can be restored.
 
 
324
  """
325
  if _is_empty(name):
326
  return "(unnamed submission)"
327
- return str(name)
 
 
 
 
328
 
329
 
330
  def load_leaderboard_split() -> tuple[pd.DataFrame, pd.DataFrame]:
 
24
  """
25
  from __future__ import annotations
26
 
27
+ import html
28
  import json
29
  import logging
30
  import os
 
306
 
307
 
308
  def _submission_name_md(name, report_url) -> str:
309
+ """Render `submission_name`, linking to the report in a new tab.
310
+
311
+ Now that the Space is public, HF's edge serves the FastAPI
312
+ ``/reports/<id>.html`` route to in-browser users (it 404'd
313
+ same-origin pathname navigations while the Space was private,
314
+ which is why an earlier iteration kept this cell plain text and
315
+ inlined the report via an iframe-srcdoc viewer instead). So the
316
+ name cell becomes a deep-link that opens the self-contained
317
+ per-submission report in a **new tab** the typical HF
318
+ leaderboard pattern, and far lighter than shipping the
319
+ (tens-to-hundreds-of-MB) report through the page on every click.
320
+
321
+ ``report_url`` is the relative ``/reports/<id>.html`` route the
322
+ reader computes only for completed modern-pipeline rows; rows
323
+ without one (pending / failed / legacy) render as plain text. The
324
+ name column is a ``markdown`` datatype, which renders inline HTML,
325
+ so a raw anchor with ``target="_blank"`` works; the name is
326
+ HTML-escaped so an odd submission name can't break the cell.
327
  """
328
  if _is_empty(name):
329
  return "(unnamed submission)"
330
+ label = html.escape(str(name))
331
+ if _is_empty(report_url):
332
+ return label
333
+ href = html.escape(str(report_url), quote=True)
334
+ return f'<a href="{href}" target="_blank" rel="noopener">{label}</a>'
335
 
336
 
337
  def load_leaderboard_split() -> tuple[pd.DataFrame, pd.DataFrame]:
tests/test_leaderboard.py CHANGED
@@ -147,33 +147,32 @@ def test_hub_read_failure_raises_no_silent_fallback(monkeypatch):
147
  leaderboard.load_leaderboard_split()
148
 
149
 
150
- def test_submission_name_is_plain_text(monkeypatch):
151
- """`submission_name` cells render as plain text on both tables.
152
-
153
- Earlier iterations wrapped the name as a markdown link to the
154
- report proxy, but the proxy URL 404s for logged-in users on a
155
- private Space (no auth carryover on same-origin pathname
156
- navigations). Reports are rendered inline via an iframe viewer
157
- on row-click instead, so the name cell stays plain text.
158
  """
159
  monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows())
160
  validated, unvalidated = leaderboard.load_leaderboard_split()
161
- for name in [
162
- "Alpha Agent v1",
163
- "Beta Agent v2",
164
- "Gamma baseline",
165
- ]:
166
- present = (
167
- (validated["submission_name"] == name).any()
168
- or (unvalidated["submission_name"] == name).any()
169
- )
170
- assert present, f"{name!r} should be present as a plain-text cell"
171
- # The hidden `report_url` column is still computed (used by the
172
- # row-click handler to decide whether to embed the report iframe).
173
  alpha = validated.iloc[0]
174
  assert alpha["report_url"] == "/reports/sub-a.html"
175
- gamma = unvalidated[unvalidated["submission_name"] == "Gamma baseline"].iloc[0]
 
 
 
 
 
 
 
 
 
 
176
  assert gamma["report_url"] == ""
 
177
 
178
 
179
  def test_model_details_column_renders(monkeypatch):
 
147
  leaderboard.load_leaderboard_split()
148
 
149
 
150
+ def test_submission_name_links_to_report_in_new_tab(monkeypatch):
151
+ """`submission_name` deep-links to the report in a new tab when one exists.
152
+
153
+ Now that the Space is public, the name cell is an anchor with
154
+ ``target="_blank"`` pointing at the ``/reports/<id>.html`` route
155
+ (completed modern-pipeline rows only). Rows without a report
156
+ (legacy / pre-pipeline, no ``submission_sha256``) stay plain text.
 
157
  """
158
  monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows())
159
  validated, unvalidated = leaderboard.load_leaderboard_split()
160
+ # Modern completed rows -> new-tab anchor to their report route.
 
 
 
 
 
 
 
 
 
 
 
161
  alpha = validated.iloc[0]
162
  assert alpha["report_url"] == "/reports/sub-a.html"
163
+ assert alpha["submission_name"] == (
164
+ '<a href="/reports/sub-a.html" target="_blank" rel="noopener">'
165
+ "Alpha Agent v1</a>"
166
+ )
167
+ beta = unvalidated[unvalidated["submitter_name"] == "team-beta"].iloc[0]
168
+ assert beta["submission_name"] == (
169
+ '<a href="/reports/sub-b.html" target="_blank" rel="noopener">'
170
+ "Beta Agent v2</a>"
171
+ )
172
+ # Legacy row without a report -> plain text, no anchor.
173
+ gamma = unvalidated[unvalidated["submitter_name"] == "team-gamma"].iloc[0]
174
  assert gamma["report_url"] == ""
175
+ assert gamma["submission_name"] == "Gamma baseline"
176
 
177
 
178
  def test_model_details_column_renders(monkeypatch):
tests/test_proxy.py CHANGED
@@ -1,24 +1,17 @@
1
- """Unit tests for the report-proxy route and the inline iframe viewer.
2
 
3
- The Space exposes two paths for the per-submission HTML report:
4
-
5
- - ``/reports/{submission_id}.html`` (FastAPI route): re-serves the
6
- file with ``Content-Type: text/html``. Works under Bearer auth
7
- for programmatic clients; gets 404'd by HF's edge for logged-in
8
- browser users on a private Space (no auth carryover across
9
- same-origin pathname navigations). Kept as a backdoor / for the
10
- future public migration.
11
- - ``_format_detail_and_report`` (row-click handler): server-side
12
- fetches the report via ``hf_hub_download`` and inlines it into
13
- an ``<iframe srcdoc=...>``. No browser HTTP request → no edge
14
- auth gate → renders for any logged-in user.
15
 
16
  Tests stub the Hub fetch via monkeypatch so the suite has zero
17
  network I/O.
18
  """
19
  from __future__ import annotations
20
 
21
- import re
22
  import types
23
 
24
  import pandas as pd
@@ -75,93 +68,6 @@ def test_proxy_route_is_registered():
75
  assert "/reports/{submission_id}.html" in routes
76
 
77
 
78
- # --- Inline iframe viewer (_format_detail_and_report) ----------------
79
-
80
- def _stub_row(**overrides):
81
- base = {
82
- "submission_id": "sub-test-x",
83
- "submission_name": "Test Agent",
84
- "submitter_name": "team-test",
85
- "status": "completed",
86
- "submitted_at": "2026-05-26T12:02:31Z",
87
- "notes": None,
88
- "model details (optional)": "_None_",
89
- "submission_blob_url": "https://example.test/sub-test-x.zip",
90
- "report_url": "/reports/sub-test-x.html",
91
- "failure_reason": None,
92
- }
93
- base.update(overrides)
94
- return pd.DataFrame([base])
95
-
96
-
97
- def _fake_evt(idx=0):
98
- """Minimal stand-in for gr.SelectData with the .index attr we read."""
99
- return types.SimpleNamespace(index=[idx, 0])
100
-
101
-
102
- def test_iframe_viewer_inlines_report_for_modern_row(monkeypatch):
103
- """A completed modern row's HTML lands inside <iframe srcdoc>.
104
-
105
- Confirms the fetched bytes are HTML-escaped (so `<html>` is not
106
- re-parsed by the host page) and the iframe carries explicit
107
- sizing so Gradio's column flex can't clip it vertically.
108
- """
109
- monkeypatch.setattr(
110
- app, "_fetch_report_html",
111
- lambda sid: b"<!DOCTYPE html><body><h1>Report for " + sid.encode() + b"</h1></body>",
112
- )
113
- df = _stub_row()
114
- md, iframe = app._format_detail_and_report(df, _fake_evt())
115
- assert "### Test Agent" in md
116
- assert iframe.startswith('<iframe srcdoc="')
117
- # HTML-escaped content: literal "<!DOCTYPE html>" becomes
118
- # "&lt;!DOCTYPE html&gt;" inside the attribute.
119
- assert "&lt;!DOCTYPE html&gt;" in iframe
120
- assert "sub-test-x" in iframe
121
- assert 'style="width:100%; height:90vh; border:0; display:block;"' in iframe
122
-
123
-
124
- def test_iframe_viewer_empty_for_pending_or_failed_row(monkeypatch):
125
- """Rows without a report_url get an empty viewer (no iframe at all).
126
-
127
- Pending: still evaluating; no report exists yet. Failed: eval
128
- crashed; no report uploaded. Legacy: pre-modern-pipeline; the
129
- file genuinely doesn't exist on the dataset. All three are
130
- handled by the same `report_url == ""` gate.
131
- """
132
- monkeypatch.setattr(app, "_fetch_report_html", lambda sid: b"unused")
133
- for row_overrides in [
134
- {"status": "pending", "report_url": ""},
135
- {"status": "failed", "report_url": "", "failure_reason": "boom"},
136
- {"status": "completed", "report_url": ""}, # legacy
137
- ]:
138
- df = _stub_row(**row_overrides)
139
- md, iframe = app._format_detail_and_report(df, _fake_evt())
140
- assert iframe == "", f"expected empty viewer for {row_overrides}"
141
- # The metadata panel still renders.
142
- assert "### Test Agent" in md
143
-
144
-
145
- def test_iframe_viewer_falls_back_to_empty_when_fetch_fails(monkeypatch):
146
- """If _fetch_report_html returns None (Hub blip), no iframe is emitted.
147
-
148
- Avoids surfacing a broken iframe on a transient failure.
149
- """
150
- monkeypatch.setattr(app, "_fetch_report_html", lambda sid: None)
151
- df = _stub_row()
152
- _md, iframe = app._format_detail_and_report(df, _fake_evt())
153
- assert iframe == ""
154
-
155
-
156
- def test_iframe_viewer_returns_placeholder_on_null_event():
157
- """A null SelectData (no row clicked) returns placeholder + empty viewer."""
158
- df = _stub_row()
159
- fake = types.SimpleNamespace(index=None)
160
- md, iframe = app._format_detail_and_report(df, fake)
161
- assert md == app.DETAIL_PLACEHOLDER
162
- assert iframe == ""
163
-
164
-
165
  # --- Boot resilience: no silent fallback, but no crash either -------
166
  #
167
  # leaderboard.load_leaderboard_split / load_admin_table *raise*
@@ -304,25 +210,3 @@ def test_refresh_handler_shows_banner_and_warns_on_error(monkeypatch):
304
  assert len(validated) == 0 and len(unvalidated) == 0
305
  # The banner output is a gr.Markdown update flipped visible.
306
  assert getattr(banner, "visible", None) is True
307
-
308
-
309
- def test_iframe_escape_is_attribute_safe(monkeypatch):
310
- """Quotes / ampersands inside the report HTML are escaped properly.
311
-
312
- A `"` inside the report would otherwise terminate the srcdoc
313
- attribute prematurely and break parsing. Regression guard.
314
- """
315
- monkeypatch.setattr(
316
- app, "_fetch_report_html",
317
- lambda sid: b'<html><body>tag: <a href="https://x.test">x</a> & co.</body></html>',
318
- )
319
- df = _stub_row()
320
- _md, iframe = app._format_detail_and_report(df, _fake_evt())
321
- # Within the srcdoc value, double-quotes must be HTML-escaped.
322
- srcdoc = re.search(r'srcdoc="(.*)"\s+style=', iframe, re.DOTALL)
323
- assert srcdoc is not None
324
- inner = srcdoc.group(1)
325
- # No unescaped " inside the attribute value.
326
- assert '"' not in inner
327
- assert "&quot;" in inner
328
- assert "&amp;" in inner
 
1
+ """Unit tests for the report-proxy route.
2
 
3
+ The Space exposes the per-submission HTML report at
4
+ ``/reports/{submission_id}.html`` (FastAPI route): it re-serves the
5
+ file with ``Content-Type: text/html``. Now that the Space is public,
6
+ HF's edge serves this route to in-browser users, so the leaderboard's
7
+ submission_name cell links straight to it (opening in a new tab)
8
+ rather than inlining the report into the page.
 
 
 
 
 
 
9
 
10
  Tests stub the Hub fetch via monkeypatch so the suite has zero
11
  network I/O.
12
  """
13
  from __future__ import annotations
14
 
 
15
  import types
16
 
17
  import pandas as pd
 
68
  assert "/reports/{submission_id}.html" in routes
69
 
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  # --- Boot resilience: no silent fallback, but no crash either -------
72
  #
73
  # leaderboard.load_leaderboard_split / load_admin_table *raise*
 
210
  assert len(validated) == 0 and len(unvalidated) == 0
211
  # The banner output is a gr.Markdown update flipped visible.
212
  assert getattr(banner, "visible", None) is True