Spaces:

HuggingAI4Engineering
/

cadgenbench-leaderboard

Running

Michael Rabinovich Cursor commited on about 6 hours ago

Commit

461547b

1 Parent(s): 5140b0a

leaderboard: rename tabs, relabel fixtures as samples, inline gallery row stats

- Rename the visual "Gallery" tab to "Leaderboard" and the table tab to
"Detailed View" (order unchanged); update the smoke test accordingly.
- Replace user-facing "fixture" wording with "sample" across the gallery,
task browser, submit instructions, and submitter status/error messages.
Internal data keys, routes, CSS, and identifiers are unchanged.
- Drop the gallery's "more numbers" expander: surface Generation/Editing
inline under the aggregate score and move Download ZIP under the name.
- Link each gallery submission title to its hosted report (same
/reports/<id>.html the Detailed View table uses); drop the date line.

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (5) hide show

app.py +6 -6
gallery.py +38 -48
submit.py +11 -11
tasks.py +2 -2
tests/test_smoke.py +6 -5

app.py CHANGED Viewed

@@ -84,7 +84,7 @@ logging.basicConfig(
 # Canonical policy doc lives in the code repo so contributors reading
 # the GitHub repo see it without needing to visit the Space. Linked
-# from both the Leaderboard tab's Validation Guidelines accordion and
 # the About tab.
 VALIDATION_DOC_URL = (
     "https://github.com/huggingface/cadgenbench/blob/main/docs/benchmark/validation.md"
@@ -774,7 +774,7 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
         "_Benchmarking AI-driven CAD generation._"
     )
-    with gr.Tab("Gallery"):
         # Visual-first leaderboard. The bespoke surface (sticky GT row,
         # fixture picker, turntable grid, compare modal) is a
         # self-contained HTML doc inlined into an iframe `srcdoc` so it
@@ -788,7 +788,7 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
             fn=_gallery_iframe_html, outputs=gallery_html,
         )
-    with gr.Tab("Leaderboard"):
         # Load both tiers once at boot. `_safe_load_split` keeps a Hub
         # read failure from crashing the Space: on failure the frames
         # come up empty and `initial_error` carries the message the
@@ -882,9 +882,9 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
             f"""
 **Submission format.** A single zip with:
-- one folder per fixture in `{HF_DATA_REPO}`; include `output.step` for
-  fixtures where your system produced a candidate. Missing `output.step`
-  scores zero for that fixture;
 - a top-level `meta.json`:
 ```json

 # Canonical policy doc lives in the code repo so contributors reading
 # the GitHub repo see it without needing to visit the Space. Linked
+# from both the Detailed View tab's Validation Guidelines accordion and
 # the About tab.
 VALIDATION_DOC_URL = (
     "https://github.com/huggingface/cadgenbench/blob/main/docs/benchmark/validation.md"
         "_Benchmarking AI-driven CAD generation._"
     )
+    with gr.Tab("Leaderboard"):
         # Visual-first leaderboard. The bespoke surface (sticky GT row,
         # fixture picker, turntable grid, compare modal) is a
         # self-contained HTML doc inlined into an iframe `srcdoc` so it
             fn=_gallery_iframe_html, outputs=gallery_html,
         )
+    with gr.Tab("Detailed View"):
         # Load both tiers once at boot. `_safe_load_split` keeps a Hub
         # read failure from crashing the Space: on failure the frames
         # come up empty and `initial_error` carries the message the
             f"""
 **Submission format.** A single zip with:
+- one folder per sample in `{HF_DATA_REPO}`; include `output.step` for
+  samples where your system produced a candidate. Missing `output.step`
+  scores zero for that sample;
 - a top-level `meta.json`:
 ```json

gallery.py CHANGED Viewed

@@ -46,6 +46,8 @@ from __future__ import annotations
 import json
 # Gallery shows the top-N verified submissions only (the visual shop
 # window). The numeric long tail lives on the Full results / Leaderboard
 # tab, not here.
@@ -134,6 +136,9 @@ def _sub_payload(row: dict, render_resolver, diff_resolver) -> dict:
     return {
         "id": sid,
         "name": row.get("submission_name") or "(unnamed submission)",
         "who": row.get("submitter_name") or "",
         "score": row.get("aggregate_score"),
         "validity": row.get("validity_rate"),
@@ -297,7 +302,7 @@ body {
 .gallery { background: var(--panel); border: 1px solid var(--line); border-radius: var(--radius); box-shadow: var(--shadow); position: relative; }
 .grid-head, .grow {
   display: grid;
-  grid-template-columns: 52px minmax(220px, 1.4fr) 116px repeat(var(--ncol, 3), minmax(150px, 1fr));
   align-items: stretch;
 }
 .grid-head {
@@ -333,7 +338,8 @@ body {
 .rank.medal-1 { color: #b8860b; } .rank.medal-2 { color: #6b7280; } .rank.medal-3 { color: #a0522d; }
 .ident { padding: 14px; display: flex; flex-direction: column; justify-content: center; gap: 3px; min-width: 0; }
-.ident .sub-name { font-weight: 600; font-size: 14.5px; line-height: 1.25; }
 .ident .submitter { font-size: 12px; color: var(--ink-faint); font-family: var(--mono); }
 .score-cell { padding: 14px; display: flex; flex-direction: column; justify-content: center; gap: 4px; }
@@ -364,28 +370,18 @@ body {
 .thumb.failed:hover { transform: none; box-shadow: none; border-color: #e9b3ae; }
 .thumb.failed .ftag { font-family: var(--mono); font-size: 10px; font-weight: 700; color: var(--bad); text-transform: uppercase; letter-spacing: .04em; text-align: center; line-height: 1.4; }
-.sub-row.open { background: #fafbff; }
-.detail {
-  grid-column: 1 / -1; background: #f8f9fc; border-top: 1px dashed var(--line-strong);
-  padding: 0 14px; max-height: 0; overflow: hidden; transition: max-height .28s ease, padding .28s ease;
-}
-.detail.show { max-height: 260px; padding: 18px 14px 22px; }
-.metric-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(130px, 1fr)); gap: 12px; }
-.metric { background: var(--panel); border: 1px solid var(--line); border-radius: 10px; padding: 12px 14px; }
-.metric .m-label { font-size: 10px; text-transform: uppercase; letter-spacing: .05em; color: var(--ink-faint); font-weight: 700; }
-.metric .m-val { font-size: 20px; font-weight: 800; margin-top: 4px; letter-spacing: -.01em; }
-.detail-foot { margin-top: 14px; font-size: 12.5px; color: var(--ink-soft); display: flex; gap: 18px; flex-wrap: wrap; align-items: center; }
-.detail-foot a { color: var(--accent); text-decoration: none; font-weight: 600; }
-.detail-foot a:hover { text-decoration: underline; }
-.row-toggle {
-  grid-column: 1 / -1; display: flex; align-items: center; justify-content: center; gap: 6px;
-  padding: 7px; cursor: pointer; font-size: 11px; font-weight: 700; color: var(--ink-faint);
-  text-transform: uppercase; letter-spacing: .05em; border-top: 1px solid var(--line);
-  background: #fcfcfe; user-select: none;
-}
-.row-toggle:hover { color: var(--accent); background: var(--accent-soft); }
-.row-toggle .chev { transition: transform .2s ease; }
-.sub-row.open .row-toggle .chev { transform: rotate(180deg); }
 .empty-note { background: var(--panel); border: 1px dashed var(--line-strong); border-radius: var(--radius); padding: 48px 24px; text-align: center; color: var(--ink-faint); font-size: 14px; }
@@ -416,13 +412,13 @@ body {
 _BODY = """
 <div class="wrap">
   <div class="controls">
-    <div class="label">Fixtures shown <span class="picker-help">- pick up to 3 to compare across all models (changes columns globally)</span></div>
     <div class="picker-row">
       <div class="pills" id="pills"></div>
       <div class="picker-anchor">
-        <button class="add-fixture" id="addFixtureBtn">+ Add fixture</button>
         <div class="popover" id="popover" hidden>
-          <input type="text" class="popover-search" id="popoverSearch" placeholder="Search fixtures..." autocomplete="off">
           <div class="popover-list" id="popoverList"></div>
         </div>
       </div>
@@ -557,14 +553,14 @@ function renderPopoverList() {
         + tag + '<span class="iname">' + esc(f.name) + '</span></div>';
     }).join('');
   });
-  if (!html) html = '<div class="popover-empty">No fixtures match \\u201c' + esc(popoverQuery) + '\\u201d.</div>';
   list.innerHTML = html;
   // At the cap, show a note rather than silently dropping a pick.
   const existingCap = document.getElementById('popoverCap');
   if (existingCap) existingCap.remove();
   if (selected.length >= MAX_FIXTURES) {
     list.insertAdjacentHTML('afterend',
-      '<div class="popover-cap" id="popoverCap">Max ' + MAX_FIXTURES + ' fixtures - remove one to add another.</div>');
   }
   list.querySelectorAll('.popover-item').forEach(it => {
     it.onclick = () => {
@@ -678,23 +674,24 @@ function buildGallery() {
     row.className = 'grow sub-row';
     const medal = i < 3 ? 'medal-' + (i + 1) : '';
     const imperfect = (s.validity !== null && s.validity < 1) ? 'imperfect' : '';
     let cells = '<div class="rank ' + medal + '">' + (i + 1) + '</div>'
-      + '<div class="ident"><span class="sub-name">' + esc(s.name) + '</span><span class="submitter">' + esc(s.who) + '</span></div>'
       + '<div class="score-cell"><span class="agg">' + fmt(s.score, 3) + '</span>'
       + '<span class="validity ' + imperfect + '">' + pct(s.validity) + ' <span class="vlabel">valid</span></span></div>';
     selected.forEach(id => {
       cells += '<div class="thumb-cell">' + thumbHTML(gridRenderFor(s, id), 'data-sub="' + esc(s.id) + '" data-fix="' + esc(id) + '"', true) + '</div>';
     });
-    cells += '<div class="row-toggle" data-toggle="' + esc(s.id) + '">more numbers <span class="chev">&#9662;</span></div>';
-    cells += '<div class="detail" id="detail-' + esc(s.id) + '">'
-      + '<div class="metric-grid">'
-      + '<div class="metric"><div class="m-label">Generation</div><div class="m-val">' + fmt(s.gen, 3) + '</div></div>'
-      + '<div class="metric"><div class="m-label">Editing</div><div class="m-val">' + fmt(s.edit, 3) + '</div></div>'
-      + '<div class="metric"><div class="m-label">Validity</div><div class="m-val">' + pct(s.validity) + '</div></div>'
-      + '</div>'
-      + '<div class="detail-foot"><span>Submitted ' + esc(s.date) + (s.version ? ' - cadgenbench v' + esc(s.version) : '') + '</span>'
-      + (s.blobUrl ? '<a href="' + esc(s.blobUrl) + '" target="_blank" rel="noopener">Download ZIP</a>' : '')
-      + '</div></div>';
     row.innerHTML = cells;
     g.appendChild(row);
   });
@@ -704,13 +701,6 @@ function buildGallery() {
 }
 function wireGallery() {
-  document.querySelectorAll('[data-toggle]').forEach(t => {
-    t.onclick = () => {
-      const id = t.dataset.toggle;
-      document.getElementById('detail-' + id).classList.toggle('show');
-      t.closest('.sub-row').classList.toggle('open');
-    };
-  });
   document.querySelectorAll('.thumb[data-sub]').forEach(th => {
     th.onclick = () => {
       const sub = DATA.subs.find(x => x.id === th.dataset.sub);
@@ -737,7 +727,7 @@ function openModal(fxId, sub) {
   }
   const cad = (cell.cad === null || cell.cad === undefined) ? '-' : Number(cell.cad).toFixed(3);
   document.getElementById('modalNote').innerHTML =
-    'CAD score for this fixture: <b>' + cad + '</b>. The full per-fixture report '
     + '(shape similarity, interface, topology + 3D view) opens from the report viewer.';
   document.getElementById('modalBack').classList.add('show');
 }

 import json
+from leaderboard import _report_relative_url
 # Gallery shows the top-N verified submissions only (the visual shop
 # window). The numeric long tail lives on the Full results / Leaderboard
 # tab, not here.
     return {
         "id": sid,
         "name": row.get("submission_name") or "(unnamed submission)",
+        "reportUrl": _report_relative_url(
+            sid, row.get("status"), row.get("submission_sha256"),
+        ),
         "who": row.get("submitter_name") or "",
         "score": row.get("aggregate_score"),
         "validity": row.get("validity_rate"),
 .gallery { background: var(--panel); border: 1px solid var(--line); border-radius: var(--radius); box-shadow: var(--shadow); position: relative; }
 .grid-head, .grow {
   display: grid;
+  grid-template-columns: 52px minmax(220px, 1.4fr) 170px repeat(var(--ncol, 3), minmax(150px, 1fr));
   align-items: stretch;
 }
 .grid-head {
 .rank.medal-1 { color: #b8860b; } .rank.medal-2 { color: #6b7280; } .rank.medal-3 { color: #a0522d; }
 .ident { padding: 14px; display: flex; flex-direction: column; justify-content: center; gap: 3px; min-width: 0; }
+.ident .sub-name { font-weight: 600; font-size: 14.5px; line-height: 1.25; color: var(--ink); text-decoration: none; }
+a.sub-name:hover { color: var(--accent); text-decoration: underline; }
 .ident .submitter { font-size: 12px; color: var(--ink-faint); font-family: var(--mono); }
 .score-cell { padding: 14px; display: flex; flex-direction: column; justify-content: center; gap: 4px; }
 .thumb.failed:hover { transform: none; box-shadow: none; border-color: #e9b3ae; }
 .thumb.failed .ftag { font-family: var(--mono); font-size: 10px; font-weight: 700; color: var(--bad); text-transform: uppercase; letter-spacing: .04em; text-align: center; line-height: 1.4; }
+/* Inline Gen/Edit breakdown beneath the aggregate score (replaces the old
+   "more numbers" expander). Validity stays as its own line below. */
+.score-cell .score-breakdown { display: flex; gap: 14px; margin: 1px 0; }
+.score-cell .sb { display: flex; flex-direction: column; line-height: 1.15; }
+.score-cell .sb-l { font-size: 9px; text-transform: uppercase; letter-spacing: .05em; color: var(--ink-faint); font-weight: 700; }
+.score-cell .sb-v { font-size: 13px; font-weight: 700; font-family: var(--mono); color: var(--ink-soft); }
+/* Download link + submission date, tucked under the submitter name. */
+.ident .ident-foot { display: flex; align-items: center; gap: 10px; margin-top: 5px; flex-wrap: wrap; }
+.ident .dl { font-size: 11.5px; font-weight: 600; color: var(--accent); text-decoration: none; display: inline-flex; align-items: center; gap: 4px; }
+.ident .dl .dl-ic { font-size: 13px; line-height: 1; }
+.ident .dl:hover { text-decoration: underline; }
 .empty-note { background: var(--panel); border: 1px dashed var(--line-strong); border-radius: var(--radius); padding: 48px 24px; text-align: center; color: var(--ink-faint); font-size: 14px; }
 _BODY = """
 <div class="wrap">
   <div class="controls">
+    <div class="label">Samples shown <span class="picker-help">- pick up to 3 to compare across all models (changes columns globally)</span></div>
     <div class="picker-row">
       <div class="pills" id="pills"></div>
       <div class="picker-anchor">
+        <button class="add-fixture" id="addFixtureBtn">+ Add sample</button>
         <div class="popover" id="popover" hidden>
+          <input type="text" class="popover-search" id="popoverSearch" placeholder="Search samples..." autocomplete="off">
           <div class="popover-list" id="popoverList"></div>
         </div>
       </div>
         + tag + '<span class="iname">' + esc(f.name) + '</span></div>';
     }).join('');
   });
+  if (!html) html = '<div class="popover-empty">No samples match \\u201c' + esc(popoverQuery) + '\\u201d.</div>';
   list.innerHTML = html;
   // At the cap, show a note rather than silently dropping a pick.
   const existingCap = document.getElementById('popoverCap');
   if (existingCap) existingCap.remove();
   if (selected.length >= MAX_FIXTURES) {
     list.insertAdjacentHTML('afterend',
+      '<div class="popover-cap" id="popoverCap">Max ' + MAX_FIXTURES + ' samples - remove one to add another.</div>');
   }
   list.querySelectorAll('.popover-item').forEach(it => {
     it.onclick = () => {
     row.className = 'grow sub-row';
     const medal = i < 3 ? 'medal-' + (i + 1) : '';
     const imperfect = (s.validity !== null && s.validity < 1) ? 'imperfect' : '';
+    const nameHTML = s.reportUrl
+      ? '<a class="sub-name" href="' + esc(s.reportUrl) + '" target="_blank" rel="noopener">' + esc(s.name) + '</a>'
+      : '<span class="sub-name">' + esc(s.name) + '</span>';
     let cells = '<div class="rank ' + medal + '">' + (i + 1) + '</div>'
+      + '<div class="ident">' + nameHTML
+      + '<span class="submitter">' + esc(s.who) + '</span>'
+      + '<div class="ident-foot">'
+      + (s.blobUrl ? '<a class="dl" href="' + esc(s.blobUrl) + '" target="_blank" rel="noopener"><span class="dl-ic">&#8675;</span>Download ZIP</a>' : '')
+      + '</div></div>'
       + '<div class="score-cell"><span class="agg">' + fmt(s.score, 3) + '</span>'
+      + '<div class="score-breakdown">'
+      + '<span class="sb"><span class="sb-l">Gen</span><span class="sb-v">' + fmt(s.gen, 3) + '</span></span>'
+      + '<span class="sb"><span class="sb-l">Edit</span><span class="sb-v">' + fmt(s.edit, 3) + '</span></span>'
+      + '</div>'
       + '<span class="validity ' + imperfect + '">' + pct(s.validity) + ' <span class="vlabel">valid</span></span></div>';
     selected.forEach(id => {
       cells += '<div class="thumb-cell">' + thumbHTML(gridRenderFor(s, id), 'data-sub="' + esc(s.id) + '" data-fix="' + esc(id) + '"', true) + '</div>';
     });
     row.innerHTML = cells;
     g.appendChild(row);
   });
 }
 function wireGallery() {
   document.querySelectorAll('.thumb[data-sub]').forEach(th => {
     th.onclick = () => {
       const sub = DATA.subs.find(x => x.id === th.dataset.sub);
   }
   const cad = (cell.cad === null || cell.cad === undefined) ? '-' : Number(cell.cad).toFixed(3);
   document.getElementById('modalNote').innerHTML =
+    'CAD score for this sample: <b>' + cad + '</b>. The full per-sample report '
     + '(shape similarity, interface, topology + 3D view) opens from the report viewer.';
   document.getElementById('modalBack').classList.add('show');
 }

submit.py CHANGED Viewed

@@ -437,7 +437,7 @@ def handle_submit(
     yield _submit_status(
         "working",
-        "Validating submission (unpacking the zip, checking the fixture set "
         "and STEP files)…",
     )
@@ -481,7 +481,7 @@ def handle_submit(
         )
         yield _submit_status(
             "working",
-            f"Uploading `{submission_id}` ({len(fixture_names)} fixtures) and "
             f"queuing the evaluation… (this can take a moment, and retries "
             f"automatically if the Hub is busy).",
         )
@@ -502,14 +502,14 @@ def handle_submit(
         progress.publish(
             submission_id,
             progress.QUEUED,
-            f"Queued ({len(fixture_names)} fixtures) — waiting for the "
             f"evaluation to start…",
         )
         _spawn_worker(submission_id, blob_url, sorted(fixture_names))
         yield _submit_status(
             "queued",
             f"Submission `{submission_id}` queued ({len(fixture_names)} "
-            f"fixtures). The eval runs on an HF Jobs GPU; your row appears on "
             f"the **Unvalidated** leaderboard and flips to completed when the "
             f"job finishes (typically 1–3 minutes). Live progress below.",
         )
@@ -603,7 +603,7 @@ def _load_and_validate_meta(unpacked: Path) -> dict[str, Any]:
     if not meta_path.is_file():
         raise _ValidationError(
             "Zip is missing top-level `meta.json` (expected at the root of "
-            "the zip, alongside the per-fixture folders)."
         )
     try:
         meta = json.loads(meta_path.read_text())
@@ -667,7 +667,7 @@ def _validate_fixture_set(unpacked: Path) -> set[str]:
         inputs_root = data_inputs_dir()
     except Exception as e:  # noqa: BLE001 - paths.py raises a few types
         raise _ValidationError(
-            f"Server-side error resolving the fixture set "
             f"({type(e).__name__}: {e})."
         ) from e
     expected = {p.name for p in inputs_root.iterdir() if p.is_dir()}
@@ -677,11 +677,11 @@ def _validate_fixture_set(unpacked: Path) -> set[str]:
     if missing or extras:
         parts: list[str] = []
         if missing:
-            parts.append(f"missing fixture(s): {', '.join(sorted(missing))}")
         if extras:
             parts.append(f"unexpected folder(s): {', '.join(sorted(extras))}")
         raise _ValidationError(
-            "Fixture set does not match the dataset. " + "; ".join(parts) + "."
         )
     return expected
@@ -694,7 +694,7 @@ def _validate_steps_parseable(unpacked: Path, fixture_names: set[str]) -> None:
     # 1-5s, so fanning out a 5+ fixture set across cpu-upgrade vCPUs
     # cuts wall time roughly linearly. ex.map raises the first child
     # exception when its iterator is consumed, so wrapping in list()
-    # preserves the same `Fixture <name>` rejection text as the
     # sequential loop did.
     def _check_one_step(name: str) -> None:
         step = _candidate_step_path(unpacked / name)
@@ -704,13 +704,13 @@ def _validate_steps_parseable(unpacked: Path, fixture_names: set[str]) -> None:
             return
         if step.stat().st_size == 0:
             raise _ValidationError(
-                f"Fixture `{name}` has an empty `{step.name}`."
             )
         try:
             parse_step(step)
         except RuntimeError as e:
             raise _ValidationError(
-                f"Fixture `{name}` has an `{step.name}` that is not loadable "
                 f"as STEP geometry: {e}"
             ) from e

     yield _submit_status(
         "working",
+        "Validating submission (unpacking the zip, checking the sample set "
         "and STEP files)…",
     )
         )
         yield _submit_status(
             "working",
+            f"Uploading `{submission_id}` ({len(fixture_names)} samples) and "
             f"queuing the evaluation… (this can take a moment, and retries "
             f"automatically if the Hub is busy).",
         )
         progress.publish(
             submission_id,
             progress.QUEUED,
+            f"Queued ({len(fixture_names)} samples) — waiting for the "
             f"evaluation to start…",
         )
         _spawn_worker(submission_id, blob_url, sorted(fixture_names))
         yield _submit_status(
             "queued",
             f"Submission `{submission_id}` queued ({len(fixture_names)} "
+            f"samples). The eval runs on an HF Jobs GPU; your row appears on "
             f"the **Unvalidated** leaderboard and flips to completed when the "
             f"job finishes (typically 1–3 minutes). Live progress below.",
         )
     if not meta_path.is_file():
         raise _ValidationError(
             "Zip is missing top-level `meta.json` (expected at the root of "
+            "the zip, alongside the per-sample folders)."
         )
     try:
         meta = json.loads(meta_path.read_text())
         inputs_root = data_inputs_dir()
     except Exception as e:  # noqa: BLE001 - paths.py raises a few types
         raise _ValidationError(
+            f"Server-side error resolving the sample set "
             f"({type(e).__name__}: {e})."
         ) from e
     expected = {p.name for p in inputs_root.iterdir() if p.is_dir()}
     if missing or extras:
         parts: list[str] = []
         if missing:
+            parts.append(f"missing sample(s): {', '.join(sorted(missing))}")
         if extras:
             parts.append(f"unexpected folder(s): {', '.join(sorted(extras))}")
         raise _ValidationError(
+            "Sample set does not match the dataset. " + "; ".join(parts) + "."
         )
     return expected
     # 1-5s, so fanning out a 5+ fixture set across cpu-upgrade vCPUs
     # cuts wall time roughly linearly. ex.map raises the first child
     # exception when its iterator is consumed, so wrapping in list()
+    # preserves the same `Sample <name>` rejection text as the
     # sequential loop did.
     def _check_one_step(name: str) -> None:
         step = _candidate_step_path(unpacked / name)
             return
         if step.stat().st_size == 0:
             raise _ValidationError(
+                f"Sample `{name}` has an empty `{step.name}`."
             )
         try:
             parse_step(step)
         except RuntimeError as e:
             raise _ValidationError(
+                f"Sample `{name}` has an `{step.name}` that is not loadable "
                 f"as STEP geometry: {e}"
             ) from e

tasks.py CHANGED Viewed

@@ -164,7 +164,7 @@ def _render_task_card(task: dict, idx: int, asset_url) -> str:
 def _render_summary_table(tasks: list[dict]) -> str:
     rows = [
         '<table class="summary-table" id="summary-table">',
-        "<thead><tr><th>Fixture</th><th>Type</th></tr></thead><tbody>",
     ]
     for i, t in enumerate(tasks):
         rows.append(
@@ -225,7 +225,7 @@ def render_tasks_page(tasks: list[dict], asset_url) -> str:
         p.append(_render_summary_table(tasks))
     else:
         p.append(
-            '<p class="note">No tasks found in the fixture inputs dataset.</p>'
         )
     p.append("</div>")

 def _render_summary_table(tasks: list[dict]) -> str:
     rows = [
         '<table class="summary-table" id="summary-table">',
+        "<thead><tr><th>Sample</th><th>Type</th></tr></thead><tbody>",
     ]
     for i, t in enumerate(tasks):
         rows.append(
         p.append(_render_summary_table(tasks))
     else:
         p.append(
+            '<p class="note">No tasks found in the sample inputs dataset.</p>'
         )
     p.append("</div>")

tests/test_smoke.py CHANGED Viewed

@@ -4,7 +4,7 @@ Boots ``app.py`` in a subprocess (via the ``app_url`` fixture in
 :mod:`conftest`) and asserts the Phase D minimum:
 - All three Gradio tabs render.
-- The Leaderboard tab carries two stacked ``Leaderboard`` widgets
   (Validated + Unvalidated, per the two-tier viewer landed in C3).
 If the Space won't load these tabs or the leaderboards don't render,
@@ -34,10 +34,11 @@ def test_three_tabs_render(app_url):
 def test_two_leaderboard_widgets_render(app_url):
-    """Both Validated and Unvalidated leaderboards render on the Leaderboard tab.
-    Gallery is the default (first) tab, so the Leaderboard tab's content
-    is mounted-but-hidden until selected; click the tab first, then
     assert. The two widgets are identified by their labels (set in
     app.py). The labels are case-sensitive substrings that don't overlap
     ("Validated Leaderboard" is not a substring of "Unvalidated
@@ -54,7 +55,7 @@ def test_two_leaderboard_widgets_render(app_url):
         try:
             page = browser.new_page()
             page.goto(app_url)
-            page.get_by_role("tab", name="Leaderboard").click()
             expect(
                 page.get_by_text("Validated Leaderboard", exact=True)
                 .and_(page.locator("p"))

 :mod:`conftest`) and asserts the Phase D minimum:
 - All three Gradio tabs render.
+- The Detailed View tab carries two stacked ``Leaderboard`` widgets
   (Validated + Unvalidated, per the two-tier viewer landed in C3).
 If the Space won't load these tabs or the leaderboards don't render,
 def test_two_leaderboard_widgets_render(app_url):
+    """Both Validated and Unvalidated leaderboards render on the Detailed View tab.
+    The Leaderboard (visual) tab is the default (first) tab, so the
+    Detailed View tab's content is mounted-but-hidden until selected;
+    click the tab first, then
     assert. The two widgets are identified by their labels (set in
     app.py). The labels are case-sensitive substrings that don't overlap
     ("Validated Leaderboard" is not a substring of "Unvalidated
         try:
             page = browser.new_page()
             page.goto(app_url)
+            page.get_by_role("tab", name="Detailed View").click()
             expect(
                 page.get_by_text("Validated Leaderboard", exact=True)
                 .and_(page.locator("p"))