Michael Rabinovich Cursor commited on
Commit
01d67e9
·
1 Parent(s): 3909559

add visual Gallery tab (top-10 verified, sticky GT, fixture picker)

Browse files

Visual-first leaderboard as a new default tab. Self-contained HTML
(gallery.py) inlined into an iframe srcdoc with base64 thumbnails, so
it works on the private Space where HF's edge 404s in-browser
custom-route fetches (same constraint the report viewer's srcdoc path
already handles). Top-10 verified only, teal sticky ground-truth row,
3-fixture picker (drop-oldest, global re-render), dashed cell for
invalid/missing fixtures, per-row generation/editing/validity expander,
and a GT-vs-output compare modal per thumbnail. Existing Leaderboard /
Submit / About / Admin tabs unchanged.

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (3) hide show
  1. app.py +122 -0
  2. gallery.py +562 -0
  3. leaderboard.py +4 -0
app.py CHANGED
@@ -24,6 +24,7 @@ rather than render).
24
  """
25
  from __future__ import annotations
26
 
 
27
  import html
28
  import logging
29
  import os
@@ -41,6 +42,7 @@ from huggingface_hub import hf_hub_download
41
  from leaderboard import (
42
  ADMIN_COLUMNS,
43
  ADMIN_SELECT_COL,
 
44
  HF_DATA_REPO,
45
  HF_SUBMISSIONS_REPO,
46
  LEADERBOARD_COLS,
@@ -50,10 +52,12 @@ from leaderboard import (
50
  VALIDATED_LEADERBOARD_DATATYPES,
51
  LeaderboardDataError,
52
  _fmt_timestamp,
 
53
  build_combined_csv,
54
  load_admin_table,
55
  load_leaderboard_split,
56
  )
 
57
  from admin import (
58
  VALID_METHODS,
59
  delete_rows,
@@ -513,12 +517,130 @@ def serve_report(submission_id: str) -> Response:
513
  return Response(content=content, media_type="text/html; charset=utf-8")
514
 
515
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
516
  with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as blocks:
517
  gr.Markdown(
518
  "# CADGenBench Leaderboard\n"
519
  "_Benchmarking AI-driven CAD generation._"
520
  )
521
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
522
  with gr.Tab("Leaderboard"):
523
  # Load both tiers once at boot. `_safe_load_split` keeps a Hub
524
  # read failure from crashing the Space: on failure the frames
 
24
  """
25
  from __future__ import annotations
26
 
27
+ import base64
28
  import html
29
  import logging
30
  import os
 
42
  from leaderboard import (
43
  ADMIN_COLUMNS,
44
  ADMIN_SELECT_COL,
45
+ HF_DATA_GT_REPO,
46
  HF_DATA_REPO,
47
  HF_SUBMISSIONS_REPO,
48
  LEADERBOARD_COLS,
 
52
  VALIDATED_LEADERBOARD_DATATYPES,
53
  LeaderboardDataError,
54
  _fmt_timestamp,
55
+ _load_rows_from_hub,
56
  build_combined_csv,
57
  load_admin_table,
58
  load_leaderboard_split,
59
  )
60
+ from gallery import render_gallery_page
61
  from admin import (
62
  VALID_METHODS,
63
  delete_rows,
 
517
  return Response(content=content, media_type="text/html; charset=utf-8")
518
 
519
 
520
+ # Single canonical view served as the gallery thumbnail. Matches the
521
+ # view uploaded by the eval job (eval_job.py GALLERY_THUMB_VIEW) and the
522
+ # GT render the gallery pairs it with, so columns stay comparable.
523
+ GALLERY_THUMB_VIEW = "iso"
524
+
525
+
526
+ @lru_cache(maxsize=512)
527
+ def _fetch_render(submission_id: str, fixture: str) -> bytes | None:
528
+ """Pull a submission's gallery thumbnail (``renders/<id>/<fixture>.png``).
529
+
530
+ Cached in-process so repeated scrolls past the same thumbnail don't
531
+ re-hit the Hub. Returns ``None`` on any failure so the caller serves
532
+ a clean 404 (the gallery only requests this for fixtures it already
533
+ knows are valid, so a miss here is genuinely unexpected/transient).
534
+ """
535
+ try:
536
+ local_path = hf_hub_download(
537
+ repo_id=HF_SUBMISSIONS_REPO,
538
+ filename=f"renders/{submission_id}/{fixture}.png",
539
+ repo_type="dataset",
540
+ )
541
+ return Path(local_path).read_bytes()
542
+ except Exception as e: # noqa: BLE001 - any Hub failure -> 404
543
+ logger.warning(
544
+ "Failed to fetch render %s/%s (%s: %s)",
545
+ submission_id, fixture, type(e).__name__, e,
546
+ )
547
+ return None
548
+
549
+
550
+ @lru_cache(maxsize=256)
551
+ def _fetch_gt_render(fixture: str) -> bytes | None:
552
+ """Pull a fixture's ground-truth thumbnail from the private GT dataset.
553
+
554
+ Path inside the GT repo is ``<fixture>/renders/<view>.png`` (see
555
+ ``cadgenbench.common.paths.data_gt_dir``). GT renders are a property
556
+ of the data revision, not of any submission, so they're served
557
+ straight from the GT repo rather than duplicated per submission.
558
+ Needs the Space ``HF_TOKEN``'s read scope on the private repo.
559
+ """
560
+ try:
561
+ local_path = hf_hub_download(
562
+ repo_id=HF_DATA_GT_REPO,
563
+ filename=f"{fixture}/renders/{GALLERY_THUMB_VIEW}.png",
564
+ repo_type="dataset",
565
+ )
566
+ return Path(local_path).read_bytes()
567
+ except Exception as e: # noqa: BLE001 - any Hub failure -> 404
568
+ logger.warning(
569
+ "Failed to fetch GT render for %s (%s: %s)",
570
+ fixture, type(e).__name__, e,
571
+ )
572
+ return None
573
+
574
+
575
+ def _data_uri(png_bytes: bytes | None) -> str | None:
576
+ """Base64 ``data:`` URI for PNG bytes, or ``None``.
577
+
578
+ The gallery inlines thumbnails as data URIs rather than referencing
579
+ a proxy route, because while the Space is **private** HF's edge
580
+ 404s in-browser requests to custom routes (same constraint that
581
+ makes the report viewer use ``srcdoc`` + base64; see
582
+ ``space-setup/post-gt-swap.md`` item 12). Inlining means the browser
583
+ makes no second request. Switches to lazy proxy URLs once the Space
584
+ is public.
585
+ """
586
+ if png_bytes is None:
587
+ return None
588
+ return "data:image/png;base64," + base64.b64encode(png_bytes).decode("ascii")
589
+
590
+
591
+ def _render_data_uri(submission_id: str, fixture: str) -> str | None:
592
+ """Resolver for a submission's per-fixture gallery thumbnail."""
593
+ return _data_uri(_fetch_render(submission_id, fixture))
594
+
595
+
596
+ def _gt_data_uri(fixture: str) -> str | None:
597
+ """Resolver for a fixture's ground-truth gallery thumbnail."""
598
+ return _data_uri(_fetch_gt_render(fixture))
599
+
600
+
601
+ def _gallery_iframe_html() -> str:
602
+ """Build the gallery as a self-contained ``srcdoc`` iframe.
603
+
604
+ Reads the live rows, renders the page with base64-inlined images,
605
+ and inlines the whole document into an iframe ``srcdoc`` so it gets
606
+ its own style context (no Gradio CSS collision) and makes no
607
+ second HTTP request (works on the private Space). A Hub read
608
+ failure degrades to an empty gallery rather than crashing the tab.
609
+ """
610
+ try:
611
+ rows = _load_rows_from_hub()
612
+ except LeaderboardDataError:
613
+ logger.exception("Gallery row load failed; rendering empty gallery")
614
+ rows = []
615
+ doc = render_gallery_page(rows, _render_data_uri, _gt_data_uri)
616
+ escaped = html.escape(doc, quote=True)
617
+ return (
618
+ f'<iframe srcdoc="{escaped}" '
619
+ 'style="width:100%; height:90vh; border:0; display:block;" '
620
+ 'title="CADGenBench gallery"></iframe>'
621
+ )
622
+
623
+
624
  with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as blocks:
625
  gr.Markdown(
626
  "# CADGenBench Leaderboard\n"
627
  "_Benchmarking AI-driven CAD generation._"
628
  )
629
 
630
+ with gr.Tab("Gallery"):
631
+ # Visual-first leaderboard. The bespoke surface (sticky GT row,
632
+ # fixture picker, thumbnail grid, compare modal) is a
633
+ # self-contained HTML doc inlined into an iframe `srcdoc` with
634
+ # base64 thumbnails, so it keeps its own style context and makes
635
+ # no second HTTP request (works on the private Space, where
636
+ # HF's edge 404s in-browser custom-route fetches). Built at boot;
637
+ # the Refresh button rebuilds it after a promotion/new result.
638
+ gallery_html = gr.HTML(value=_gallery_iframe_html())
639
+ gallery_refresh_btn = gr.Button("Refresh gallery", size="sm")
640
+ gallery_refresh_btn.click(
641
+ fn=_gallery_iframe_html, outputs=gallery_html,
642
+ )
643
+
644
  with gr.Tab("Leaderboard"):
645
  # Load both tiers once at boot. `_safe_load_split` keeps a Hub
646
  # read failure from crashing the Space: on failure the frames
gallery.py ADDED
@@ -0,0 +1,562 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2026 Hugging Face
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Visual gallery leaderboard page.
16
+
17
+ Builds a self-contained HTML document (its own CSS + JS) from the live
18
+ submission rows. The Space serves it at ``/gallery`` and embeds it in
19
+ the Gradio "Gallery" tab via an iframe, so the bespoke visual surface
20
+ (sticky ground-truth row, fixture picker, thumbnail grid, report
21
+ modal) lives in plain HTML/JS isolated from Gradio's styles rather
22
+ than being forced into Gradio components.
23
+
24
+ The page is data-driven: :func:`build_gallery_payload` shapes the
25
+ top-10 verified rows + the fixture universe into a small JSON blob,
26
+ which the page's JS renders. Render lookups are isolated behind the
27
+ ``renderFor`` / ``gtRenderFor`` JS hooks (mirroring the design brief),
28
+ pointed at the Space's render-proxy routes:
29
+
30
+ - ``renderFor(sub, fixtureId)`` -> ``/render/<id>/<fixture>.png``
31
+ (or ``null`` when the per-fixture status is invalid/missing, which
32
+ draws the dashed "invalid generation" cell).
33
+ - ``gtRenderFor(fixtureId)`` -> ``/gt-render/<fixture>.png``.
34
+
35
+ Thumbnail clicks open the existing per-submission report (served by
36
+ the Space's ``/reports/<id>.html`` proxy) deep-linked to the clicked
37
+ fixture's card via ``#fixture=<name>``.
38
+ """
39
+ from __future__ import annotations
40
+
41
+ import json
42
+
43
+ # Gallery shows the top-N verified submissions only (the visual shop
44
+ # window). The numeric long tail lives on the Full results / Leaderboard
45
+ # tab, not here.
46
+ GALLERY_TOP_N = 10
47
+
48
+ # Default number of fixture columns the picker opens with, capped at the
49
+ # size of the available fixture universe.
50
+ DEFAULT_FIXTURE_COLUMNS = 3
51
+
52
+
53
+ def _verified_rows(rows: list[dict]) -> list[dict]:
54
+ """Completed + validated rows, score-sorted desc, capped at the top N.
55
+
56
+ Mirrors the leaderboard's notion of "verified": ``validation_status
57
+ == 'validated'`` and a terminal ``status == 'completed'`` with a
58
+ real aggregate score. Pending / failed / unvalidated rows never
59
+ reach the visual gallery.
60
+ """
61
+ verified = [
62
+ r
63
+ for r in rows
64
+ if r.get("validation_status") == "validated"
65
+ and r.get("status") == "completed"
66
+ and isinstance(r.get("aggregate_score"), (int, float))
67
+ ]
68
+ verified.sort(key=lambda r: r.get("aggregate_score") or 0.0, reverse=True)
69
+ return verified[:GALLERY_TOP_N]
70
+
71
+
72
+ def _fixture_universe(rows: list[dict]) -> list[dict]:
73
+ """Ordered fixture list discovered from the rows' ``per_fixture_scores``.
74
+
75
+ The fixture set is never hardcoded (it shifts as parts get added /
76
+ removed): it is the union of every ``per_fixture_scores`` key across
77
+ the verified rows, sorted for a stable column order. ``task_type``
78
+ is carried along (first non-null wins) as the small chip tag, since
79
+ difficulty tags are not available in the data.
80
+ """
81
+ task_by_fixture: dict[str, str] = {}
82
+ for r in rows:
83
+ pfs = r.get("per_fixture_scores") or {}
84
+ for fixture_id, fx in pfs.items():
85
+ if fixture_id not in task_by_fixture:
86
+ task_by_fixture[fixture_id] = (fx or {}).get("task_type") or ""
87
+ return [
88
+ {"id": fid, "name": fid, "task": task_by_fixture[fid]}
89
+ for fid in sorted(task_by_fixture)
90
+ ]
91
+
92
+
93
+ def _sub_payload(row: dict, render_resolver) -> dict:
94
+ """Project one verified row into the compact shape the page JS needs.
95
+
96
+ ``render_resolver(submission_id, fixture_id)`` returns the image
97
+ source (a base64 data URI on the private Space, a proxy URL once
98
+ public) for a *valid* fixture, or ``None``. Invalid/missing
99
+ fixtures carry ``img: null`` so the page draws the dashed cell.
100
+ """
101
+ by_task = row.get("score_by_task_type") or {}
102
+ pfs = row.get("per_fixture_scores") or {}
103
+ sid = row.get("submission_id") or ""
104
+ cells: dict[str, dict] = {}
105
+ for fid, fx in pfs.items():
106
+ fx = fx or {}
107
+ status = fx.get("status") or "missing"
108
+ cells[fid] = {
109
+ "status": status,
110
+ "cad": fx.get("cad_score"),
111
+ "img": render_resolver(sid, fid) if status == "valid" else None,
112
+ }
113
+ return {
114
+ "id": sid,
115
+ "name": row.get("submission_name") or "(unnamed submission)",
116
+ "who": row.get("submitter_name") or "",
117
+ "score": row.get("aggregate_score"),
118
+ "validity": row.get("validity_rate"),
119
+ "gen": by_task.get("generation"),
120
+ "edit": by_task.get("editing"),
121
+ "date": row.get("submitted_at") or "",
122
+ "version": row.get("cadgenbench_version") or "",
123
+ "blobUrl": row.get("submission_blob_url") or "",
124
+ "cells": cells,
125
+ }
126
+
127
+
128
+ def build_gallery_payload(rows: list[dict], render_resolver, gt_resolver) -> dict:
129
+ """Shape live rows into the JSON the gallery page renders from.
130
+
131
+ Image sources are injected via two resolvers so this module stays
132
+ agnostic to how renders are served (base64-inlined for the private
133
+ Space, proxy URLs once public):
134
+
135
+ - ``render_resolver(submission_id, fixture_id) -> str | None``
136
+ - ``gt_resolver(fixture_id) -> str | None``
137
+
138
+ Returns ``{"fixtures", "subs", "selected", "gtImg"}`` where
139
+ ``selected`` is the default set of (up to three) fixture columns and
140
+ ``gtImg`` maps each fixture to its ground-truth image source.
141
+ """
142
+ verified = _verified_rows(rows)
143
+ fixtures = _fixture_universe(verified)
144
+ selected = [f["id"] for f in fixtures[:DEFAULT_FIXTURE_COLUMNS]]
145
+ gt_img = {f["id"]: gt_resolver(f["id"]) for f in fixtures}
146
+ return {
147
+ "fixtures": fixtures,
148
+ "subs": [_sub_payload(r, render_resolver) for r in verified],
149
+ "selected": selected,
150
+ "gtImg": gt_img,
151
+ }
152
+
153
+
154
+ def render_gallery_page(rows: list[dict], render_resolver, gt_resolver) -> str:
155
+ """Build the full standalone gallery HTML document from live rows.
156
+
157
+ ``render_resolver`` / ``gt_resolver`` supply image sources (see
158
+ :func:`build_gallery_payload`). The caller (the Space) inlines
159
+ base64 data URIs while private; a local harness can do the same so
160
+ the page is self-contained with no second requests.
161
+ """
162
+ payload = build_gallery_payload(rows, render_resolver, gt_resolver)
163
+ data_json = json.dumps(payload, ensure_ascii=False)
164
+ return (
165
+ "<!DOCTYPE html><html lang='en'><head>"
166
+ "<meta charset='UTF-8'>"
167
+ "<meta name='viewport' content='width=device-width, initial-scale=1.0'>"
168
+ "<title>CADGenBench Gallery</title>"
169
+ f"<style>{_CSS}</style>"
170
+ "</head><body>"
171
+ f"{_BODY}"
172
+ f"<script>window.GALLERY_DATA = {data_json};</script>"
173
+ f"<script>{_JS}</script>"
174
+ "</body></html>"
175
+ )
176
+
177
+
178
+ # ---------------------------------------------------------------------------
179
+ # CSS (ported from the reference prototype, trimmed to the gallery surface)
180
+ # ---------------------------------------------------------------------------
181
+
182
+ _CSS = """
183
+ @import url('https://fonts.googleapis.com/css2?family=Archivo:wght@400;500;600;700;800&family=Space+Mono:wght@400;700&display=swap');
184
+ :root {
185
+ --bg: #f4f5f7; --panel: #ffffff; --ink: #14161c; --ink-soft: #5b6170;
186
+ --ink-faint: #9aa0ad; --line: #e3e5ea; --line-strong: #d2d5dd;
187
+ --accent: #4338ca; --accent-soft: #eef0ff; --good: #15803d;
188
+ --good-soft: #e9f7ee; --bad: #b42318; --bad-soft: #fdeceb;
189
+ --gt: #0f766e; --gt-soft: #e6f4f2; --thumb-bg: #eceef2;
190
+ --shadow: 0 1px 2px rgba(20,22,28,.04), 0 8px 24px rgba(20,22,28,.06);
191
+ --radius: 14px;
192
+ }
193
+ * { box-sizing: border-box; }
194
+ body {
195
+ margin: 0; background: var(--bg); color: var(--ink);
196
+ font-family: 'Archivo', sans-serif; -webkit-font-smoothing: antialiased;
197
+ padding: 18px 0 60px;
198
+ }
199
+ .wrap { max-width: 1180px; margin: 0 auto; padding: 0 24px; }
200
+
201
+ .controls {
202
+ background: var(--panel); border: 1px solid var(--line);
203
+ border-radius: var(--radius); padding: 18px 20px; box-shadow: var(--shadow);
204
+ }
205
+ .controls .label {
206
+ font-size: 12px; font-weight: 700; text-transform: uppercase;
207
+ letter-spacing: .06em; color: var(--ink-faint); margin-bottom: 12px;
208
+ }
209
+ .picker-help { font-weight: 500; text-transform: none; letter-spacing: 0; color: var(--ink-faint); font-size: 12px; }
210
+ .chips { display: flex; gap: 10px; flex-wrap: wrap; }
211
+ .chip {
212
+ font-family: inherit; font-size: 13.5px; cursor: pointer;
213
+ border: 1px solid var(--line-strong); background: #fafbfc; color: var(--ink-soft);
214
+ padding: 9px 14px; border-radius: 10px; display: flex; align-items: center;
215
+ gap: 8px; transition: all .14s ease; font-weight: 500;
216
+ }
217
+ .chip:hover { border-color: var(--accent); color: var(--ink); }
218
+ .chip.on { background: var(--accent-soft); border-color: var(--accent); color: var(--accent); font-weight: 600; }
219
+ .chip .tag { font-family: 'Space Mono', monospace; font-size: 10px; padding: 2px 6px; border-radius: 5px; background: rgba(0,0,0,.05); text-transform: uppercase; letter-spacing: .03em; }
220
+ .chip.on .tag { background: rgba(67,56,202,.12); }
221
+
222
+ .section-label {
223
+ display: flex; align-items: center; gap: 10px; margin: 28px 0 14px;
224
+ font-size: 14px; font-weight: 700; color: var(--accent);
225
+ text-transform: uppercase; letter-spacing: .05em;
226
+ }
227
+ .section-label .verified-pill {
228
+ font-family: 'Space Mono', monospace; font-size: 10px; color: var(--good);
229
+ background: var(--good-soft); padding: 3px 8px; border-radius: 999px;
230
+ letter-spacing: .02em; display: inline-flex; align-items: center; gap: 5px;
231
+ }
232
+ .dot { width: 6px; height: 6px; border-radius: 50%; background: currentColor; }
233
+
234
+ .gallery { background: var(--panel); border: 1px solid var(--line); border-radius: var(--radius); box-shadow: var(--shadow); position: relative; }
235
+ .grid-head, .grow {
236
+ display: grid;
237
+ grid-template-columns: 52px minmax(220px, 1.4fr) 116px repeat(var(--ncol, 3), minmax(150px, 1fr));
238
+ align-items: stretch;
239
+ }
240
+ .grid-head {
241
+ background: #fbfbfd; border-bottom: 1px solid var(--line); font-size: 11px;
242
+ text-transform: uppercase; letter-spacing: .05em; color: var(--ink-faint);
243
+ font-weight: 700; position: sticky; top: 0; z-index: 20;
244
+ border-radius: var(--radius) var(--radius) 0 0;
245
+ }
246
+ .grid-head > div { padding: 13px 14px; display: flex; align-items: center; }
247
+ .grid-head .fix-h { flex-direction: column; align-items: flex-start; gap: 2px; }
248
+ .grid-head .fix-h .fname { color: var(--ink-soft); text-transform: none; letter-spacing: 0; font-family: 'Space Mono', monospace; font-size: 11px; font-weight: 700; }
249
+ .grid-head .fix-h .ftask { font-size: 9.5px; color: var(--ink-faint); text-transform: uppercase; letter-spacing: .04em; }
250
+
251
+ .grow.gt-row {
252
+ background: var(--gt-soft); border-bottom: 2px solid var(--gt);
253
+ position: sticky; top: var(--head-h, 46px); z-index: 15;
254
+ box-shadow: 0 6px 14px -8px rgba(15,118,110,.45);
255
+ }
256
+ .grow.gt-row .rank, .grow.gt-row .ident { display: flex; align-items: center; }
257
+ .grow.gt-row .ident { font-weight: 700; color: var(--gt); flex-direction: column; align-items: flex-start; justify-content: center; gap: 2px; }
258
+ .grow.gt-row .ident .gt-sub { font-weight: 500; font-size: 11.5px; color: var(--gt); opacity: .8; }
259
+ .grow.gt-row .score-cell { color: var(--gt); }
260
+
261
+ .grow.sub-row { border-bottom: 1px solid var(--line); transition: background .12s ease; }
262
+ .grow.sub-row:last-child { border-bottom: none; }
263
+ .grow.sub-row:hover { background: #fafbff; }
264
+
265
+ .rank {
266
+ padding: 16px 14px; font-family: 'Space Mono', monospace; font-weight: 700;
267
+ font-size: 15px; color: var(--ink-faint); display: flex; align-items: center;
268
+ justify-content: center;
269
+ }
270
+ .rank.medal-1 { color: #b8860b; } .rank.medal-2 { color: #6b7280; } .rank.medal-3 { color: #a0522d; }
271
+
272
+ .ident { padding: 14px; display: flex; flex-direction: column; justify-content: center; gap: 3px; min-width: 0; }
273
+ .ident .sub-name { font-weight: 600; font-size: 14.5px; line-height: 1.25; }
274
+ .ident .submitter { font-size: 12px; color: var(--ink-faint); font-family: 'Space Mono', monospace; }
275
+
276
+ .score-cell { padding: 14px; display: flex; flex-direction: column; justify-content: center; gap: 4px; }
277
+ .score-cell .agg { font-size: 22px; font-weight: 800; letter-spacing: -.01em; }
278
+ .score-cell .validity { font-size: 11.5px; font-family: 'Space Mono', monospace; color: var(--good); font-weight: 700; display: flex; align-items: baseline; gap: 5px; }
279
+ .score-cell .validity .vlabel { font-weight: 400; color: var(--ink-faint); text-transform: uppercase; letter-spacing: .04em; font-size: 10px; }
280
+ .score-cell .validity.imperfect { color: #b45309; }
281
+ .score-cell .validity.imperfect .vlabel { color: #c98a3a; }
282
+
283
+ .thumb-cell { padding: 8px; display: flex; align-items: center; justify-content: center; position: relative; }
284
+ .thumb {
285
+ width: 100%; aspect-ratio: 16/10; border-radius: 8px; background: var(--thumb-bg);
286
+ border: 1px solid var(--line); overflow: hidden; cursor: pointer; position: relative;
287
+ transition: transform .14s ease, box-shadow .14s ease, border-color .14s ease;
288
+ }
289
+ .thumb:hover { transform: translateY(-2px); box-shadow: 0 6px 18px rgba(20,22,28,.14); border-color: var(--accent); }
290
+ .thumb img { width: 100%; height: 100%; display: block; object-fit: contain; }
291
+ .thumb .open-hint {
292
+ position: absolute; inset: 0; display: flex; align-items: center; justify-content: center;
293
+ opacity: 0; background: rgba(67,56,202,.08); transition: opacity .14s ease;
294
+ font-size: 11px; font-weight: 700; color: var(--accent); text-transform: uppercase; letter-spacing: .04em;
295
+ }
296
+ .thumb:hover .open-hint { opacity: 1; }
297
+
298
+ .thumb.failed { cursor: default; background: var(--bad-soft); border: 1px dashed #e9b3ae; display: flex; align-items: center; justify-content: center; }
299
+ .thumb.failed:hover { transform: none; box-shadow: none; border-color: #e9b3ae; }
300
+ .thumb.failed .ftag { font-family: 'Space Mono', monospace; font-size: 10px; font-weight: 700; color: var(--bad); text-transform: uppercase; letter-spacing: .04em; text-align: center; line-height: 1.4; }
301
+
302
+ .sub-row.open { background: #fafbff; }
303
+ .detail {
304
+ grid-column: 1 / -1; background: #f8f9fc; border-top: 1px dashed var(--line-strong);
305
+ padding: 0 14px; max-height: 0; overflow: hidden; transition: max-height .28s ease, padding .28s ease;
306
+ }
307
+ .detail.show { max-height: 260px; padding: 18px 14px 22px; }
308
+ .metric-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(130px, 1fr)); gap: 12px; }
309
+ .metric { background: var(--panel); border: 1px solid var(--line); border-radius: 10px; padding: 12px 14px; }
310
+ .metric .m-label { font-size: 10px; text-transform: uppercase; letter-spacing: .05em; color: var(--ink-faint); font-weight: 700; }
311
+ .metric .m-val { font-size: 20px; font-weight: 800; margin-top: 4px; letter-spacing: -.01em; }
312
+ .detail-foot { margin-top: 14px; font-size: 12.5px; color: var(--ink-soft); display: flex; gap: 18px; flex-wrap: wrap; align-items: center; }
313
+ .detail-foot a { color: var(--accent); text-decoration: none; font-weight: 600; }
314
+ .detail-foot a:hover { text-decoration: underline; }
315
+ .row-toggle {
316
+ grid-column: 1 / -1; display: flex; align-items: center; justify-content: center; gap: 6px;
317
+ padding: 7px; cursor: pointer; font-size: 11px; font-weight: 700; color: var(--ink-faint);
318
+ text-transform: uppercase; letter-spacing: .05em; border-top: 1px solid var(--line);
319
+ background: #fcfcfe; user-select: none;
320
+ }
321
+ .row-toggle:hover { color: var(--accent); background: var(--accent-soft); }
322
+ .row-toggle .chev { transition: transform .2s ease; }
323
+ .sub-row.open .row-toggle .chev { transform: rotate(180deg); }
324
+
325
+ .empty-note { background: var(--panel); border: 1px dashed var(--line-strong); border-radius: var(--radius); padding: 48px 24px; text-align: center; color: var(--ink-faint); font-size: 14px; }
326
+
327
+ /* compare modal (GT vs output) */
328
+ .modal-back { position: fixed; inset: 0; background: rgba(20,22,28,.5); backdrop-filter: blur(3px); display: none; align-items: center; justify-content: center; z-index: 50; padding: 24px; }
329
+ .modal-back.show { display: flex; }
330
+ .modal { background: var(--panel); border-radius: 16px; width: 100%; max-width: 620px; padding: 26px; box-shadow: 0 24px 60px rgba(0,0,0,.3); }
331
+ .modal h4 { margin: 0 0 4px; font-size: 18px; }
332
+ .modal .msub { color: var(--ink-faint); font-size: 13px; font-family: 'Space Mono', monospace; margin-bottom: 18px; }
333
+ .modal-compare { display: grid; grid-template-columns: 1fr 1fr; gap: 14px; }
334
+ .modal-compare figure { margin: 0; }
335
+ .modal-compare figcaption { font-size: 11px; text-transform: uppercase; letter-spacing: .05em; color: var(--ink-faint); font-weight: 700; margin-bottom: 6px; }
336
+ .modal-compare .mthumb { width: 100%; aspect-ratio: 16/10; border-radius: 8px; background: var(--thumb-bg); border: 1px solid var(--line); overflow: hidden; }
337
+ .modal-compare .mthumb img { width: 100%; height: 100%; object-fit: contain; display: block; }
338
+ .modal-compare .mthumb.failed { background: var(--bad-soft); border: 1px dashed #e9b3ae; display: flex; align-items: center; justify-content: center; }
339
+ .modal-compare .mthumb.failed span { font-family: 'Space Mono', monospace; font-size: 10px; font-weight: 700; color: var(--bad); text-transform: uppercase; letter-spacing: .04em; text-align: center; }
340
+ .modal-note { margin-top: 18px; font-size: 12.5px; color: var(--ink-soft); background: var(--accent-soft); padding: 12px 14px; border-radius: 10px; }
341
+ .modal-close { margin-top: 20px; width: 100%; padding: 11px; border: 1px solid var(--line-strong); background: #fafbfc; border-radius: 10px; font-family: inherit; font-weight: 600; cursor: pointer; font-size: 14px; }
342
+ .modal-close:hover { background: var(--accent-soft); border-color: var(--accent); color: var(--accent); }
343
+ """
344
+
345
+
346
+ # ---------------------------------------------------------------------------
347
+ # Body
348
+ # ---------------------------------------------------------------------------
349
+
350
+ _BODY = """
351
+ <div class="wrap">
352
+ <div class="controls">
353
+ <div class="label">Fixtures shown <span class="picker-help">- pick 3 to compare across all models (changes columns globally)</span></div>
354
+ <div class="chips" id="chips"></div>
355
+ </div>
356
+ <div class="section-label">
357
+ Validated leaderboard - Top 10
358
+ <span class="verified-pill"><span class="dot"></span>verified only</span>
359
+ </div>
360
+ <div class="gallery" id="gallery">
361
+ <div class="grid-head" id="gridHead"></div>
362
+ </div>
363
+ </div>
364
+ <div class="modal-back" id="modalBack">
365
+ <div class="modal">
366
+ <h4 id="modalTitle"></h4>
367
+ <div class="msub" id="modalSub"></div>
368
+ <div class="modal-compare">
369
+ <figure><figcaption>Ground truth</figcaption><div class="mthumb" id="modalGt"></div></figure>
370
+ <figure><figcaption id="modalOutCap">Output (aligned)</figcaption><div class="mthumb" id="modalOut"></div></figure>
371
+ </div>
372
+ <div class="modal-note" id="modalNote"></div>
373
+ <button class="modal-close" id="modalClose">Close</button>
374
+ </div>
375
+ </div>
376
+ """
377
+
378
+
379
+ # ---------------------------------------------------------------------------
380
+ # JS (data-driven render of the gallery; render lookups isolated behind
381
+ # renderFor / gtRenderFor as in the design brief)
382
+ # ---------------------------------------------------------------------------
383
+
384
+ _JS = """
385
+ const DATA = window.GALLERY_DATA || {fixtures: [], subs: [], selected: []};
386
+ const FIXTURES = DATA.fixtures;
387
+ let selected = (DATA.selected || []).slice();
388
+
389
+ // --- Render hooks. ---------------------------------------------------------
390
+ // The image sources are injected by the server (base64 data URIs while the
391
+ // Space is private; proxy URLs once public), so these just read the payload.
392
+ // renderFor returns null for an invalid/missing fixture -> dashed cell.
393
+ function renderFor(sub, fxId) {
394
+ const c = sub.cells[fxId];
395
+ return c ? c.img : null;
396
+ }
397
+ function gtRenderFor(fxId) {
398
+ return (DATA.gtImg || {})[fxId] || null;
399
+ }
400
+ function cellOf(sub, fxId) { return sub.cells[fxId] || {}; }
401
+
402
+ function fmt(x, d) { return (x === null || x === undefined) ? '-' : Number(x).toFixed(d); }
403
+ function pct(x) { return (x === null || x === undefined) ? '-' : Math.round(Number(x) * 100) + '%'; }
404
+ function esc(s) { return String(s == null ? '' : s).replace(/[&<>"']/g, c => ({'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'}[c])); }
405
+ function fixtureMeta(id) { return FIXTURES.find(f => f.id === id); }
406
+
407
+ function buildChips() {
408
+ const wrap = document.getElementById('chips');
409
+ wrap.innerHTML = FIXTURES.map(f => {
410
+ const on = selected.includes(f.id);
411
+ const tag = f.task ? '<span class="tag">' + esc(f.task) + '</span>' : '';
412
+ return '<button class="chip ' + (on ? 'on' : '') + '" data-id="' + esc(f.id) + '">' + tag + esc(f.name) + '</button>';
413
+ }).join('');
414
+ wrap.querySelectorAll('.chip').forEach(c => {
415
+ c.onclick = () => {
416
+ const id = c.dataset.id;
417
+ if (selected.includes(id)) {
418
+ if (selected.length <= 1) return; // keep at least 1
419
+ selected = selected.filter(x => x !== id);
420
+ } else {
421
+ if (selected.length >= 3) selected.shift(); // cap at 3, drop oldest
422
+ selected.push(id);
423
+ }
424
+ buildChips(); buildGallery();
425
+ };
426
+ });
427
+ }
428
+
429
+ function buildHead() {
430
+ const head = document.getElementById('gridHead');
431
+ let h = '<div>#</div><div>Submission</div><div>Score</div>';
432
+ selected.forEach(id => {
433
+ const f = fixtureMeta(id);
434
+ const task = f && f.task ? '<span class="ftask">' + esc(f.task) + '</span>' : '';
435
+ h += '<div class="fix-h"><span class="fname">' + esc(f ? f.name : id) + '</span>' + task + '</div>';
436
+ });
437
+ head.innerHTML = h;
438
+ }
439
+
440
+ function thumbHTML(url, attrs, clickable) {
441
+ if (!url) {
442
+ return '<div class="thumb failed"><span class="ftag">invalid<br>generation</span></div>';
443
+ }
444
+ const hint = clickable ? '<span class="open-hint">open</span>' : '';
445
+ return '<div class="thumb" ' + attrs + '><img loading="lazy" src="' + url + '" alt="">' + hint + '</div>';
446
+ }
447
+
448
+ function buildGallery() {
449
+ const g = document.getElementById('gallery');
450
+ g.style.setProperty('--ncol', selected.length);
451
+ buildHead();
452
+ g.querySelectorAll('.grow').forEach(n => n.remove());
453
+
454
+ if (!DATA.subs.length) {
455
+ let note = g.querySelector('.empty-note');
456
+ if (!note) {
457
+ note = document.createElement('div');
458
+ note.className = 'empty-note';
459
+ note.textContent = 'No verified submissions yet. Once a submission is promoted to the validated tier it appears here.';
460
+ g.appendChild(note);
461
+ }
462
+ return;
463
+ }
464
+
465
+ // Ground-truth pinned row.
466
+ const gt = document.createElement('div');
467
+ gt.className = 'grow gt-row';
468
+ let gtCells = '<div class="rank">&#9733;</div>'
469
+ + '<div class="ident">Ground truth<span class="gt-sub">reference geometry</span></div>'
470
+ + '<div class="score-cell"><span class="agg">1.000</span></div>';
471
+ selected.forEach(id => {
472
+ gtCells += '<div class="thumb-cell">' + thumbHTML(gtRenderFor(id), 'data-gt="' + esc(id) + '"', false) + '</div>';
473
+ });
474
+ gt.innerHTML = gtCells;
475
+ g.appendChild(gt);
476
+
477
+ DATA.subs.forEach((s, i) => {
478
+ const row = document.createElement('div');
479
+ row.className = 'grow sub-row';
480
+ const medal = i < 3 ? 'medal-' + (i + 1) : '';
481
+ const imperfect = (s.validity !== null && s.validity < 1) ? 'imperfect' : '';
482
+ let cells = '<div class="rank ' + medal + '">' + (i + 1) + '</div>'
483
+ + '<div class="ident"><span class="sub-name">' + esc(s.name) + '</span><span class="submitter">' + esc(s.who) + '</span></div>'
484
+ + '<div class="score-cell"><span class="agg">' + fmt(s.score, 3) + '</span>'
485
+ + '<span class="validity ' + imperfect + '">' + pct(s.validity) + ' <span class="vlabel">valid</span></span></div>';
486
+ selected.forEach(id => {
487
+ cells += '<div class="thumb-cell">' + thumbHTML(renderFor(s, id), 'data-sub="' + esc(s.id) + '" data-fix="' + esc(id) + '"', true) + '</div>';
488
+ });
489
+ cells += '<div class="row-toggle" data-toggle="' + esc(s.id) + '">more numbers <span class="chev">&#9662;</span></div>';
490
+ cells += '<div class="detail" id="detail-' + esc(s.id) + '">'
491
+ + '<div class="metric-grid">'
492
+ + '<div class="metric"><div class="m-label">Generation</div><div class="m-val">' + fmt(s.gen, 3) + '</div></div>'
493
+ + '<div class="metric"><div class="m-label">Editing</div><div class="m-val">' + fmt(s.edit, 3) + '</div></div>'
494
+ + '<div class="metric"><div class="m-label">Validity</div><div class="m-val">' + pct(s.validity) + '</div></div>'
495
+ + '</div>'
496
+ + '<div class="detail-foot"><span>Submitted ' + esc(s.date) + (s.version ? ' - cadgenbench v' + esc(s.version) : '') + '</span>'
497
+ + (s.blobUrl ? '<a href="' + esc(s.blobUrl) + '" target="_blank" rel="noopener">Download ZIP</a>' : '')
498
+ + '</div></div>';
499
+ row.innerHTML = cells;
500
+ g.appendChild(row);
501
+ });
502
+
503
+ wireGallery();
504
+ syncHeadHeight();
505
+ }
506
+
507
+ function wireGallery() {
508
+ document.querySelectorAll('[data-toggle]').forEach(t => {
509
+ t.onclick = () => {
510
+ const id = t.dataset.toggle;
511
+ document.getElementById('detail-' + id).classList.toggle('show');
512
+ t.closest('.sub-row').classList.toggle('open');
513
+ };
514
+ });
515
+ document.querySelectorAll('.thumb[data-sub]').forEach(th => {
516
+ th.onclick = () => {
517
+ const sub = DATA.subs.find(x => x.id === th.dataset.sub);
518
+ openModal(th.dataset.fix, sub);
519
+ };
520
+ });
521
+ }
522
+
523
+ function openModal(fxId, sub) {
524
+ document.getElementById('modalTitle').textContent = fxId;
525
+ document.getElementById('modalSub').textContent = sub.name;
526
+ const gt = gtRenderFor(fxId);
527
+ const out = renderFor(sub, fxId);
528
+ const cell = cellOf(sub, fxId);
529
+ document.getElementById('modalGt').innerHTML = gt
530
+ ? '<img src="' + gt + '" alt="ground truth">' : '<span>no GT render</span>';
531
+ const outEl = document.getElementById('modalOut');
532
+ if (out) {
533
+ outEl.className = 'mthumb';
534
+ outEl.innerHTML = '<img src="' + out + '" alt="output">';
535
+ } else {
536
+ outEl.className = 'mthumb failed';
537
+ outEl.innerHTML = '<span>invalid<br>generation</span>';
538
+ }
539
+ const cad = (cell.cad === null || cell.cad === undefined) ? '-' : Number(cell.cad).toFixed(3);
540
+ document.getElementById('modalNote').innerHTML =
541
+ 'CAD score for this fixture: <b>' + cad + '</b>. The full per-fixture report '
542
+ + '(shape similarity, interface, topology + 3D view) opens from the report viewer.';
543
+ document.getElementById('modalBack').classList.add('show');
544
+ }
545
+ function closeModal() {
546
+ document.getElementById('modalBack').classList.remove('show');
547
+ }
548
+ document.getElementById('modalClose').onclick = closeModal;
549
+ document.getElementById('modalBack').onclick = (e) => { if (e.target.id === 'modalBack') closeModal(); };
550
+ document.addEventListener('keydown', (e) => { if (e.key === 'Escape') closeModal(); });
551
+
552
+ // Pin the GT row exactly beneath the sticky column header.
553
+ function syncHeadHeight() {
554
+ const head = document.getElementById('gridHead');
555
+ if (head) document.documentElement.style.setProperty('--head-h', head.offsetHeight + 'px');
556
+ }
557
+
558
+ buildChips();
559
+ buildGallery();
560
+ window.addEventListener('resize', syncHeadHeight);
561
+ if (document.fonts && document.fonts.ready) document.fonts.ready.then(syncHeadHeight);
562
+ """
leaderboard.py CHANGED
@@ -44,6 +44,10 @@ HF_SUBMISSIONS_REPO = os.getenv(
44
  "HF_SUBMISSIONS_REPO", f"{HF_ORG}/cadgenbench-submissions"
45
  )
46
  HF_DATA_REPO = os.getenv("HF_DATA_REPO", f"{HF_ORG}/cadgenbench-data")
 
 
 
 
47
 
48
  RESULTS_FILENAME = "results.jsonl"
49
  HUB_FETCH_TIMEOUT_SECONDS = 30
 
44
  "HF_SUBMISSIONS_REPO", f"{HF_ORG}/cadgenbench-submissions"
45
  )
46
  HF_DATA_REPO = os.getenv("HF_DATA_REPO", f"{HF_ORG}/cadgenbench-data")
47
+ # Private ground-truth dataset. The gallery's GT render proxy reads
48
+ # `<fixture>/renders/iso.png` from here (needs the Space HF_TOKEN's
49
+ # read scope, same token the eval already uses for GT).
50
+ HF_DATA_GT_REPO = os.getenv("HF_DATA_GT_REPO", f"{HF_ORG}/cadgenbench-data-gt")
51
 
52
  RESULTS_FILENAME = "results.jsonl"
53
  HUB_FETCH_TIMEOUT_SECONDS = 30