Michael Rabinovich Cursor commited on
Commit ·
08eae45
1
Parent(s): 96dcaf6
leaderboard: fixed 4-sample gallery, auto-size iframe, center modal
Browse filesReplace the gallery's interactive fixture picker with a fixed
comparison sheet: two generation + two editing samples, one Medium and
one Hard per task. Difficulty is chosen once from the Claude Opus 4.8
baseline's per-sample CAD scores (median = Medium, 20th percentile =
Hard) and frozen into FIXED_FIXTURES; the GUI no longer recomputes it
as the leaderboard evolves, trading a moving ground truth for a simple,
stable interface. Adds tools/pick_gallery_fixtures.py to regenerate the
constant.
Size the gallery iframe to its content (no more 90vh fixed box / nested
scrollbar) and pin the compare modal to the visible part of the iframe
so it opens centered in view instead of far below the fold.
Co-authored-by: Cursor <cursoragent@cursor.com>
- app.py +5 -1
- gallery.py +173 -276
- tools/pick_gallery_fixtures.py +125 -0
app.py
CHANGED
|
@@ -793,9 +793,13 @@ def _gallery_iframe_html() -> str:
|
|
| 793 |
rows, _render_proxy_url, _gt_proxy_url, _render_diff_proxy_url,
|
| 794 |
)
|
| 795 |
escaped = html.escape(doc, quote=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 796 |
return (
|
| 797 |
f'<iframe srcdoc="{escaped}" '
|
| 798 |
-
'style="width:100%; height:
|
| 799 |
'title="CADGenBench gallery"></iframe>'
|
| 800 |
)
|
| 801 |
|
|
|
|
| 793 |
rows, _render_proxy_url, _gt_proxy_url, _render_diff_proxy_url,
|
| 794 |
)
|
| 795 |
escaped = html.escape(doc, quote=True)
|
| 796 |
+
# The gallery JS (`fitIframe`) resizes this iframe to its own content so the
|
| 797 |
+
# page scrolls naturally in the parent (no oversized fixed box, no nested
|
| 798 |
+
# scrollbar). The inline height is only the pre-script fallback for the case
|
| 799 |
+
# where same-origin `frameElement` access is blocked.
|
| 800 |
return (
|
| 801 |
f'<iframe srcdoc="{escaped}" '
|
| 802 |
+
'style="width:100%; height:700px; border:0; display:block;" '
|
| 803 |
'title="CADGenBench gallery"></iframe>'
|
| 804 |
)
|
| 805 |
|
gallery.py
CHANGED
|
@@ -17,27 +17,29 @@
|
|
| 17 |
Builds a self-contained HTML document (its own CSS + JS) from the live
|
| 18 |
submission rows. The Space serves it at ``/gallery`` and embeds it in
|
| 19 |
the Gradio "Gallery" tab via an iframe, so the bespoke visual surface
|
| 20 |
-
(sticky ground-truth row,
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
The page is
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
- ``renderFor(sub, fixtureId)`` -> ``/render/<id>/<fixture>.webp`` (or
|
| 32 |
``null`` when the per-fixture status is invalid/missing, which draws
|
| 33 |
the dashed "invalid generation" cell).
|
| 34 |
- ``gtRenderFor(fixtureId)`` -> ``/gt-render/<fixture>.webp``.
|
| 35 |
|
| 36 |
-
GIFs are lazy-loaded by the browser, so only the
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
while private).
|
| 41 |
|
| 42 |
Turntable clicks open a GT-vs-output compare modal that points at the
|
| 43 |
existing per-submission detail/report view.
|
|
@@ -53,10 +55,24 @@ from leaderboard import _report_relative_url
|
|
| 53 |
# tab, not here.
|
| 54 |
GALLERY_TOP_N = 10
|
| 55 |
|
| 56 |
-
#
|
| 57 |
-
#
|
| 58 |
-
#
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
|
| 62 |
def _verified_rows(rows: list[dict]) -> list[dict]:
|
|
@@ -78,30 +94,13 @@ def _verified_rows(rows: list[dict]) -> list[dict]:
|
|
| 78 |
return verified[:GALLERY_TOP_N]
|
| 79 |
|
| 80 |
|
| 81 |
-
def
|
| 82 |
-
"""Ordered fixture list discovered from the rows' ``per_fixture_scores``.
|
| 83 |
-
|
| 84 |
-
The fixture set is never hardcoded (it shifts as parts get added /
|
| 85 |
-
removed): it is the union of every ``per_fixture_scores`` key across
|
| 86 |
-
the verified rows, sorted for a stable column order. ``task_type``
|
| 87 |
-
is carried along (first non-null wins) as the small chip tag, since
|
| 88 |
-
difficulty tags are not available in the data.
|
| 89 |
-
"""
|
| 90 |
-
task_by_fixture: dict[str, str] = {}
|
| 91 |
-
for r in rows:
|
| 92 |
-
pfs = r.get("per_fixture_scores") or {}
|
| 93 |
-
for fixture_id, fx in pfs.items():
|
| 94 |
-
if fixture_id not in task_by_fixture:
|
| 95 |
-
task_by_fixture[fixture_id] = (fx or {}).get("task_type") or ""
|
| 96 |
-
return [
|
| 97 |
-
{"id": fid, "name": fid, "task": task_by_fixture[fid]}
|
| 98 |
-
for fid in sorted(task_by_fixture)
|
| 99 |
-
]
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
def _sub_payload(row: dict, render_resolver, diff_resolver) -> dict:
|
| 103 |
"""Project one verified row into the compact shape the page JS needs.
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
``render_resolver(submission_id, fixture_id)`` returns the cached
|
| 106 |
proxy URL for a *valid* fixture, or ``None``. Invalid/missing
|
| 107 |
fixtures carry ``img: null`` so the page draws the dashed cell;
|
|
@@ -119,8 +118,8 @@ def _sub_payload(row: dict, render_resolver, diff_resolver) -> dict:
|
|
| 119 |
pfs = row.get("per_fixture_scores") or {}
|
| 120 |
sid = row.get("submission_id") or ""
|
| 121 |
cells: dict[str, dict] = {}
|
| 122 |
-
for fid
|
| 123 |
-
fx =
|
| 124 |
status = fx.get("status") or "missing"
|
| 125 |
valid = status == "valid"
|
| 126 |
is_editing = (fx.get("task_type") or "") == "editing"
|
|
@@ -154,8 +153,10 @@ def _sub_payload(row: dict, render_resolver, diff_resolver) -> dict:
|
|
| 154 |
def build_gallery_payload(rows: list[dict], render_resolver, gt_resolver, diff_resolver) -> dict:
|
| 155 |
"""Shape live rows into the JSON the gallery page renders from.
|
| 156 |
|
| 157 |
-
|
| 158 |
-
|
|
|
|
|
|
|
| 159 |
|
| 160 |
- ``render_resolver(submission_id, fixture_id) -> str | None`` (plain
|
| 161 |
candidate turntable; backs the modal and non-editing grid tiles)
|
|
@@ -163,18 +164,23 @@ def build_gallery_payload(rows: list[dict], render_resolver, gt_resolver, diff_r
|
|
| 163 |
turntable; backs the grid tile for editing fixtures)
|
| 164 |
- ``gt_resolver(fixture_id) -> str | None``
|
| 165 |
|
| 166 |
-
Returns ``{"fixtures", "subs", "
|
| 167 |
-
|
| 168 |
-
|
| 169 |
"""
|
| 170 |
verified = _verified_rows(rows)
|
| 171 |
-
fixtures =
|
| 172 |
-
|
| 173 |
-
|
|
|
|
|
|
|
|
|
|
| 174 |
return {
|
| 175 |
"fixtures": fixtures,
|
| 176 |
-
"subs": [
|
| 177 |
-
|
|
|
|
|
|
|
| 178 |
"gtImg": gt_img,
|
| 179 |
}
|
| 180 |
|
|
@@ -230,65 +236,8 @@ body {
|
|
| 230 |
}
|
| 231 |
.wrap { max-width: 1180px; margin: 0 auto; padding: 0 24px; }
|
| 232 |
|
| 233 |
-
.controls {
|
| 234 |
-
background: var(--panel); border: 1px solid var(--line);
|
| 235 |
-
border-radius: var(--radius); padding: 18px 20px; box-shadow: var(--shadow);
|
| 236 |
-
}
|
| 237 |
-
.controls .label {
|
| 238 |
-
font-size: 12px; font-weight: 700; text-transform: uppercase;
|
| 239 |
-
letter-spacing: .06em; color: var(--ink-faint); margin-bottom: 12px;
|
| 240 |
-
}
|
| 241 |
-
.picker-help { font-weight: 500; text-transform: none; letter-spacing: 0; color: var(--ink-faint); font-size: 12px; }
|
| 242 |
-
|
| 243 |
-
/* Compact picker: current picks as removable pills + Add + Reset */
|
| 244 |
-
.picker-row { display: flex; align-items: center; gap: 10px; flex-wrap: wrap; }
|
| 245 |
-
.pills { display: flex; gap: 8px; flex-wrap: wrap; }
|
| 246 |
-
.pill {
|
| 247 |
-
display: inline-flex; align-items: center; gap: 8px; font-size: 13.5px; font-weight: 600;
|
| 248 |
-
background: var(--accent-soft); border: 1px solid var(--accent); color: var(--accent);
|
| 249 |
-
padding: 8px 8px 8px 12px; border-radius: 10px; font-family: inherit;
|
| 250 |
-
}
|
| 251 |
-
.pill .pgroup { font-family: var(--mono); font-size: 10px; color: var(--accent); opacity: .7; }
|
| 252 |
-
.pill .pname { font-family: var(--mono); font-weight: 700; }
|
| 253 |
-
.pill .premove { cursor: pointer; border: none; background: rgba(67,56,202,.12); color: var(--accent); width: 18px; height: 18px; border-radius: 5px; font-size: 13px; line-height: 1; display: grid; place-items: center; font-family: inherit; }
|
| 254 |
-
.pill .premove:hover { background: var(--accent); color: #fff; }
|
| 255 |
-
|
| 256 |
-
.picker-anchor { position: relative; }
|
| 257 |
-
.add-fixture {
|
| 258 |
-
font-family: inherit; font-size: 13.5px; font-weight: 600; cursor: pointer; color: var(--ink-soft);
|
| 259 |
-
background: #fafbfc; border: 1px dashed var(--line-strong); padding: 9px 14px; border-radius: 10px;
|
| 260 |
-
transition: all .14s ease;
|
| 261 |
-
}
|
| 262 |
-
.add-fixture:hover { border-color: var(--accent); color: var(--accent); border-style: solid; }
|
| 263 |
-
.add-fixture:disabled { opacity: .45; cursor: not-allowed; }
|
| 264 |
-
.reset-fixtures {
|
| 265 |
-
font-family: inherit; font-size: 12.5px; font-weight: 500; cursor: pointer; color: var(--ink-faint);
|
| 266 |
-
background: none; border: none; padding: 9px 6px; text-decoration: underline; text-underline-offset: 2px;
|
| 267 |
-
}
|
| 268 |
-
.reset-fixtures:hover { color: var(--accent); }
|
| 269 |
-
|
| 270 |
-
/* Searchable grouped dropdown over all fixtures */
|
| 271 |
-
.popover {
|
| 272 |
-
position: absolute; top: calc(100% + 8px); left: 0; z-index: 40; width: 340px;
|
| 273 |
-
background: var(--panel); border: 1px solid var(--line-strong); border-radius: 12px;
|
| 274 |
-
box-shadow: 0 16px 40px rgba(20,22,28,.18); overflow: hidden;
|
| 275 |
-
}
|
| 276 |
-
.popover[hidden] { display: none; }
|
| 277 |
-
.popover-search { width: 100%; border: none; border-bottom: 1px solid var(--line); padding: 13px 15px; font-family: inherit; font-size: 14px; outline: none; }
|
| 278 |
-
.popover-search:focus { background: #fbfbff; }
|
| 279 |
-
.popover-list { max-height: 300px; overflow-y: auto; padding: 6px; }
|
| 280 |
-
.popover-group { font-size: 10px; font-weight: 700; text-transform: uppercase; letter-spacing: .06em; color: var(--ink-faint); padding: 10px 10px 5px; position: sticky; top: 0; background: var(--panel); }
|
| 281 |
-
.popover-item { display: flex; align-items: center; gap: 9px; padding: 9px 10px; border-radius: 8px; cursor: pointer; font-size: 13.5px; }
|
| 282 |
-
.popover-item:hover { background: var(--accent-soft); }
|
| 283 |
-
.popover-item.is-selected { color: var(--accent); font-weight: 600; }
|
| 284 |
-
.popover-item.is-selected::after { content: '\\2713'; margin-left: auto; color: var(--accent); }
|
| 285 |
-
.popover-item .itask { font-family: var(--mono); font-size: 9px; padding: 2px 6px; border-radius: 5px; background: rgba(0,0,0,.05); text-transform: uppercase; }
|
| 286 |
-
.popover-item .iname { font-family: var(--mono); font-weight: 700; }
|
| 287 |
-
.popover-empty { padding: 18px; text-align: center; color: var(--ink-faint); font-size: 13px; }
|
| 288 |
-
.popover-cap { padding: 10px 12px; font-size: 11.5px; color: #b45309; background: #fdf3e7; border-top: 1px solid var(--line); }
|
| 289 |
-
|
| 290 |
.section-label {
|
| 291 |
-
display: flex; align-items: center; gap: 10px; margin:
|
| 292 |
font-size: 14px; font-weight: 700; color: var(--accent);
|
| 293 |
text-transform: uppercase; letter-spacing: .05em;
|
| 294 |
}
|
|
@@ -298,11 +247,13 @@ body {
|
|
| 298 |
letter-spacing: .02em; display: inline-flex; align-items: center; gap: 5px;
|
| 299 |
}
|
| 300 |
.dot { width: 6px; height: 6px; border-radius: 50%; background: currentColor; }
|
|
|
|
|
|
|
| 301 |
|
| 302 |
.gallery { background: var(--panel); border: 1px solid var(--line); border-radius: var(--radius); box-shadow: var(--shadow); position: relative; }
|
| 303 |
.grid-head, .grow {
|
| 304 |
display: grid;
|
| 305 |
-
grid-template-columns: 52px minmax(
|
| 306 |
align-items: stretch;
|
| 307 |
}
|
| 308 |
.grid-head {
|
|
@@ -312,9 +263,16 @@ body {
|
|
| 312 |
border-radius: var(--radius) var(--radius) 0 0;
|
| 313 |
}
|
| 314 |
.grid-head > div { padding: 13px 14px; display: flex; align-items: center; }
|
| 315 |
-
.grid-head .fix-h { flex-direction: column; align-items: flex-start; gap:
|
| 316 |
-
.grid-head .fix-h .
|
| 317 |
-
.grid-head .fix-h .
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
|
| 319 |
.grow.gt-row {
|
| 320 |
background: var(--gt-soft); border-bottom: 2px solid var(--gt);
|
|
@@ -411,24 +369,15 @@ a.sub-name:hover { color: var(--accent); text-decoration: underline; }
|
|
| 411 |
|
| 412 |
_BODY = """
|
| 413 |
<div class="wrap">
|
| 414 |
-
<div class="controls">
|
| 415 |
-
<div class="label">Samples shown <span class="picker-help">- pick up to 3 to compare across all models (changes columns globally)</span></div>
|
| 416 |
-
<div class="picker-row">
|
| 417 |
-
<div class="pills" id="pills"></div>
|
| 418 |
-
<div class="picker-anchor">
|
| 419 |
-
<button class="add-fixture" id="addFixtureBtn">+ Add sample</button>
|
| 420 |
-
<div class="popover" id="popover" hidden>
|
| 421 |
-
<input type="text" class="popover-search" id="popoverSearch" placeholder="Search samples..." autocomplete="off">
|
| 422 |
-
<div class="popover-list" id="popoverList"></div>
|
| 423 |
-
</div>
|
| 424 |
-
</div>
|
| 425 |
-
<button class="reset-fixtures" id="resetFixtures" title="Reset to the default comparison set">Reset</button>
|
| 426 |
-
</div>
|
| 427 |
-
</div>
|
| 428 |
<div class="section-label">
|
| 429 |
Validated leaderboard - Top 10
|
| 430 |
<span class="verified-pill"><span class="dot"></span>verified only</span>
|
| 431 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 432 |
<div class="gallery" id="gallery">
|
| 433 |
<div class="grid-head" id="gridHead"></div>
|
| 434 |
</div>
|
|
@@ -454,11 +403,10 @@ _BODY = """
|
|
| 454 |
# ---------------------------------------------------------------------------
|
| 455 |
|
| 456 |
_JS = """
|
| 457 |
-
const DATA = window.GALLERY_DATA || {fixtures: [], subs: [],
|
|
|
|
|
|
|
| 458 |
const FIXTURES = DATA.fixtures || [];
|
| 459 |
-
const MAX_FIXTURES = 3;
|
| 460 |
-
const DEFAULT_FIXTURES = (DATA.selected || []).slice();
|
| 461 |
-
let selected = DEFAULT_FIXTURES.slice();
|
| 462 |
|
| 463 |
// --- Render hooks. ---------------------------------------------------------
|
| 464 |
// The image sources are cached render-proxy URLs injected by the server, so
|
|
@@ -485,140 +433,17 @@ function pct(x) { return (x === null || x === undefined) ? '-' : Math.round(Numb
|
|
| 485 |
function esc(s) { return String(s == null ? '' : s).replace(/[&<>"']/g, c => ({'&':'&','<':'<','>':'>','"':'"',"'":'''}[c])); }
|
| 486 |
function fixtureMeta(id) { return FIXTURES.find(f => f.id === id); }
|
| 487 |
function groupLabel(task) { return task ? (task.charAt(0).toUpperCase() + task.slice(1)) : 'Other'; }
|
| 488 |
-
function groupOf(f) { return groupLabel(f ? f.task : ''); }
|
| 489 |
-
|
| 490 |
-
// Distinct group labels in fixture order (e.g. Generation, Editing).
|
| 491 |
-
const GROUPS = (() => {
|
| 492 |
-
const seen = [];
|
| 493 |
-
FIXTURES.forEach(f => { const g = groupOf(f); if (!seen.includes(g)) seen.push(g); });
|
| 494 |
-
return seen;
|
| 495 |
-
})();
|
| 496 |
-
|
| 497 |
-
// --- URL persistence: ?fixtures=a,b,c -------------------------------------
|
| 498 |
-
// Wrapped in try/catch: history.replaceState and the URL read both throw in
|
| 499 |
-
// sandboxed iframe contexts (this caused an "Uncaught Error: Script error.").
|
| 500 |
-
function loadSelectedFromURL() {
|
| 501 |
-
try {
|
| 502 |
-
const p = new URLSearchParams(location.search).get('fixtures');
|
| 503 |
-
if (!p) return;
|
| 504 |
-
const ids = p.split(',').map(s => s.trim())
|
| 505 |
-
.filter(id => FIXTURES.some(f => f.id === id)).slice(0, MAX_FIXTURES);
|
| 506 |
-
if (ids.length) selected = ids;
|
| 507 |
-
} catch (e) { /* sandboxed context -> keep defaults */ }
|
| 508 |
-
}
|
| 509 |
-
function syncURL() {
|
| 510 |
-
try {
|
| 511 |
-
const u = new URL(location.href);
|
| 512 |
-
u.searchParams.set('fixtures', selected.join(','));
|
| 513 |
-
history.replaceState(null, '', u);
|
| 514 |
-
} catch (e) { /* sandboxed/cross-origin context -> URL persistence no-ops */ }
|
| 515 |
-
}
|
| 516 |
-
|
| 517 |
-
// --- Fixture picker: pills + searchable grouped dropdown -------------------
|
| 518 |
-
function renderPills() {
|
| 519 |
-
const wrap = document.getElementById('pills');
|
| 520 |
-
if (!selected.length) { wrap.innerHTML = ''; return; }
|
| 521 |
-
wrap.innerHTML = selected.map(id => {
|
| 522 |
-
const f = fixtureMeta(id);
|
| 523 |
-
const grp = (f && f.task) ? '<span class="pgroup">' + esc(groupOf(f)) + '</span>' : '';
|
| 524 |
-
return '<span class="pill">' + grp
|
| 525 |
-
+ '<span class="pname">' + esc(f ? f.name : id) + '</span>'
|
| 526 |
-
+ '<button class="premove" data-remove="' + esc(id) + '" title="Remove" aria-label="Remove ' + esc(id) + '">\\u00d7</button>'
|
| 527 |
-
+ '</span>';
|
| 528 |
-
}).join('');
|
| 529 |
-
wrap.querySelectorAll('.premove').forEach(b => {
|
| 530 |
-
b.onclick = () => {
|
| 531 |
-
if (selected.length <= 1) return; // keep at least 1 column
|
| 532 |
-
selected = selected.filter(x => x !== b.dataset.remove);
|
| 533 |
-
refreshPicker(); buildGallery(); syncURL();
|
| 534 |
-
};
|
| 535 |
-
});
|
| 536 |
-
}
|
| 537 |
-
|
| 538 |
-
let popoverQuery = '';
|
| 539 |
-
function renderPopoverList() {
|
| 540 |
-
const list = document.getElementById('popoverList');
|
| 541 |
-
const q = popoverQuery.trim().toLowerCase();
|
| 542 |
-
const match = f => !q || f.name.toLowerCase().includes(q)
|
| 543 |
-
|| groupOf(f).toLowerCase().includes(q) || (f.task || '').toLowerCase().includes(q);
|
| 544 |
-
let html = '';
|
| 545 |
-
GROUPS.forEach(g => {
|
| 546 |
-
const items = FIXTURES.filter(f => groupOf(f) === g && match(f));
|
| 547 |
-
if (!items.length) return;
|
| 548 |
-
html += '<div class="popover-group">' + esc(g) + '</div>';
|
| 549 |
-
html += items.map(f => {
|
| 550 |
-
const sel = selected.includes(f.id);
|
| 551 |
-
const tag = f.task ? '<span class="itask">' + esc(f.task) + '</span>' : '';
|
| 552 |
-
return '<div class="popover-item ' + (sel ? 'is-selected' : '') + '" data-pick="' + esc(f.id) + '">'
|
| 553 |
-
+ tag + '<span class="iname">' + esc(f.name) + '</span></div>';
|
| 554 |
-
}).join('');
|
| 555 |
-
});
|
| 556 |
-
if (!html) html = '<div class="popover-empty">No samples match \\u201c' + esc(popoverQuery) + '\\u201d.</div>';
|
| 557 |
-
list.innerHTML = html;
|
| 558 |
-
// At the cap, show a note rather than silently dropping a pick.
|
| 559 |
-
const existingCap = document.getElementById('popoverCap');
|
| 560 |
-
if (existingCap) existingCap.remove();
|
| 561 |
-
if (selected.length >= MAX_FIXTURES) {
|
| 562 |
-
list.insertAdjacentHTML('afterend',
|
| 563 |
-
'<div class="popover-cap" id="popoverCap">Max ' + MAX_FIXTURES + ' samples - remove one to add another.</div>');
|
| 564 |
-
}
|
| 565 |
-
list.querySelectorAll('.popover-item').forEach(it => {
|
| 566 |
-
it.onclick = () => {
|
| 567 |
-
const id = it.dataset.pick;
|
| 568 |
-
if (selected.includes(id)) {
|
| 569 |
-
if (selected.length <= 1) return; // keep at least 1
|
| 570 |
-
selected = selected.filter(x => x !== id);
|
| 571 |
-
} else {
|
| 572 |
-
if (selected.length >= MAX_FIXTURES) return; // hard cap; user removes to add
|
| 573 |
-
selected.push(id);
|
| 574 |
-
}
|
| 575 |
-
refreshPicker(); buildGallery(); syncURL();
|
| 576 |
-
};
|
| 577 |
-
});
|
| 578 |
-
}
|
| 579 |
-
|
| 580 |
-
function refreshPicker() {
|
| 581 |
-
renderPills();
|
| 582 |
-
renderPopoverList();
|
| 583 |
-
const add = document.getElementById('addFixtureBtn');
|
| 584 |
-
if (add) add.disabled = !FIXTURES.length;
|
| 585 |
-
}
|
| 586 |
-
|
| 587 |
-
function openPopover() {
|
| 588 |
-
const pop = document.getElementById('popover');
|
| 589 |
-
pop.hidden = false;
|
| 590 |
-
popoverQuery = '';
|
| 591 |
-
document.getElementById('popoverSearch').value = '';
|
| 592 |
-
renderPopoverList();
|
| 593 |
-
document.getElementById('popoverSearch').focus();
|
| 594 |
-
}
|
| 595 |
-
function closePopover() { document.getElementById('popover').hidden = true; }
|
| 596 |
-
|
| 597 |
-
function wirePicker() {
|
| 598 |
-
document.getElementById('addFixtureBtn').onclick = (e) => {
|
| 599 |
-
e.stopPropagation();
|
| 600 |
-
const pop = document.getElementById('popover');
|
| 601 |
-
pop.hidden ? openPopover() : closePopover();
|
| 602 |
-
};
|
| 603 |
-
document.getElementById('popoverSearch').oninput = (e) => { popoverQuery = e.target.value; renderPopoverList(); };
|
| 604 |
-
document.getElementById('resetFixtures').onclick = () => {
|
| 605 |
-
selected = DEFAULT_FIXTURES.slice(); refreshPicker(); buildGallery(); syncURL(); closePopover();
|
| 606 |
-
};
|
| 607 |
-
// click-outside closes the popover
|
| 608 |
-
document.addEventListener('click', (e) => {
|
| 609 |
-
const anchor = document.querySelector('.picker-anchor');
|
| 610 |
-
if (anchor && !anchor.contains(e.target)) closePopover();
|
| 611 |
-
});
|
| 612 |
-
}
|
| 613 |
|
| 614 |
// --- Gallery render -------------------------------------------------------
|
| 615 |
function buildHead() {
|
| 616 |
const head = document.getElementById('gridHead');
|
| 617 |
let h = '<div>#</div><div>Submission</div><div>Score</div>';
|
| 618 |
-
|
| 619 |
-
const
|
| 620 |
-
|
| 621 |
-
|
|
|
|
|
|
|
| 622 |
});
|
| 623 |
head.innerHTML = h;
|
| 624 |
}
|
|
@@ -628,6 +453,7 @@ function buildHead() {
|
|
| 628 |
function imgFail(img) {
|
| 629 |
const cell = img.closest('.thumb-cell');
|
| 630 |
if (cell) cell.innerHTML = '<div class="thumb failed"><span class="ftag">invalid<br>generation</span></div>';
|
|
|
|
| 631 |
}
|
| 632 |
|
| 633 |
function thumbHTML(url, attrs, clickable) {
|
|
@@ -636,13 +462,13 @@ function thumbHTML(url, attrs, clickable) {
|
|
| 636 |
}
|
| 637 |
const hint = clickable ? '<span class="open-hint">open</span>' : '';
|
| 638 |
return '<div class="thumb" ' + attrs + '>'
|
| 639 |
-
+ '<img loading="lazy" decoding="async" src="' + url + '" alt="" onerror="imgFail(this)">'
|
| 640 |
+ hint + '</div>';
|
| 641 |
}
|
| 642 |
|
| 643 |
function buildGallery() {
|
| 644 |
const g = document.getElementById('gallery');
|
| 645 |
-
g.style.setProperty('--ncol', Math.max(
|
| 646 |
buildHead();
|
| 647 |
g.querySelectorAll('.grow').forEach(n => n.remove());
|
| 648 |
|
|
@@ -663,8 +489,8 @@ function buildGallery() {
|
|
| 663 |
let gtCells = '<div class="rank">★</div>'
|
| 664 |
+ '<div class="ident">Ground truth<span class="gt-sub">reference geometry</span></div>'
|
| 665 |
+ '<div class="score-cell"><span class="agg">1.000</span></div>';
|
| 666 |
-
|
| 667 |
-
gtCells += '<div class="thumb-cell">' + thumbHTML(gtRenderFor(id), 'data-gt="' + esc(id) + '"', false) + '</div>';
|
| 668 |
});
|
| 669 |
gt.innerHTML = gtCells;
|
| 670 |
g.appendChild(gt);
|
|
@@ -689,8 +515,8 @@ function buildGallery() {
|
|
| 689 |
+ '<span class="sb"><span class="sb-l">Edit</span><span class="sb-v">' + fmt(s.edit, 3) + '</span></span>'
|
| 690 |
+ '</div>'
|
| 691 |
+ '<span class="validity ' + imperfect + '">' + pct(s.validity) + ' <span class="vlabel">valid</span></span></div>';
|
| 692 |
-
|
| 693 |
-
cells += '<div class="thumb-cell">' + thumbHTML(gridRenderFor(s, id), 'data-sub="' + esc(s.id) + '" data-fix="' + esc(id) + '"', true) + '</div>';
|
| 694 |
});
|
| 695 |
row.innerHTML = cells;
|
| 696 |
g.appendChild(row);
|
|
@@ -710,7 +536,11 @@ function wireGallery() {
|
|
| 710 |
}
|
| 711 |
|
| 712 |
function openModal(fxId, sub) {
|
| 713 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 714 |
document.getElementById('modalSub').textContent = sub.name;
|
| 715 |
const gt = gtRenderFor(fxId);
|
| 716 |
const out = renderFor(sub, fxId);
|
|
@@ -729,26 +559,93 @@ function openModal(fxId, sub) {
|
|
| 729 |
document.getElementById('modalNote').innerHTML =
|
| 730 |
'CAD score for this sample: <b>' + cad + '</b>. The full per-sample report '
|
| 731 |
+ '(shape similarity, interface, topology + 3D view) opens from the report viewer.';
|
| 732 |
-
document.getElementById('modalBack')
|
|
|
|
|
|
|
|
|
|
| 733 |
}
|
| 734 |
function closeModal() {
|
| 735 |
document.getElementById('modalBack').classList.remove('show');
|
|
|
|
| 736 |
}
|
| 737 |
document.getElementById('modalClose').onclick = closeModal;
|
| 738 |
document.getElementById('modalBack').onclick = (e) => { if (e.target.id === 'modalBack') closeModal(); };
|
| 739 |
document.addEventListener('keydown', (e) => { if (e.key === 'Escape') closeModal(); });
|
| 740 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 741 |
// Pin the GT row exactly beneath the sticky column header.
|
| 742 |
function syncHeadHeight() {
|
| 743 |
const head = document.getElementById('gridHead');
|
| 744 |
if (head) document.documentElement.style.setProperty('--head-h', head.offsetHeight + 'px');
|
| 745 |
}
|
| 746 |
|
| 747 |
-
|
| 748 |
-
|
| 749 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 750 |
buildGallery();
|
| 751 |
-
|
| 752 |
-
window.addEventListener('resize', syncHeadHeight);
|
| 753 |
-
if (
|
|
|
|
| 754 |
"""
|
|
|
|
| 17 |
Builds a self-contained HTML document (its own CSS + JS) from the live
|
| 18 |
submission rows. The Space serves it at ``/gallery`` and embeds it in
|
| 19 |
the Gradio "Gallery" tab via an iframe, so the bespoke visual surface
|
| 20 |
+
(sticky ground-truth row, turntable grid, report modal) lives in plain
|
| 21 |
+
HTML/JS isolated from Gradio's styles rather than being forced into
|
| 22 |
+
Gradio components.
|
| 23 |
+
|
| 24 |
+
The page is intentionally simple: it shows a **fixed** set of four
|
| 25 |
+
sample columns (see :data:`FIXED_FIXTURES`) for the top-10 verified
|
| 26 |
+
rows. There is no fixture picker -- the columns are the same on every
|
| 27 |
+
visit so the page reads like a printed comparison sheet rather than an
|
| 28 |
+
interactive matrix. :func:`build_gallery_payload` shapes the rows +
|
| 29 |
+
the fixed fixtures into a small JSON blob the page's JS renders. Render
|
| 30 |
+
lookups are isolated behind the ``renderFor`` / ``gtRenderFor`` JS
|
| 31 |
+
hooks (mirroring the design brief), pointed at the cached render-proxy
|
| 32 |
+
URLs the caller injects via the two resolvers:
|
| 33 |
|
| 34 |
- ``renderFor(sub, fixtureId)`` -> ``/render/<id>/<fixture>.webp`` (or
|
| 35 |
``null`` when the per-fixture status is invalid/missing, which draws
|
| 36 |
the dashed "invalid generation" cell).
|
| 37 |
- ``gtRenderFor(fixtureId)`` -> ``/gt-render/<fixture>.webp``.
|
| 38 |
|
| 39 |
+
GIFs are lazy-loaded by the browser, so only the on-screen tiles are
|
| 40 |
+
fetched and CDN/browser caching makes repeat visits essentially free.
|
| 41 |
+
This requires the Space to be **public** (HF's edge 404s in-browser
|
| 42 |
+
fetches to our custom routes while private).
|
|
|
|
| 43 |
|
| 44 |
Turntable clicks open a GT-vs-output compare modal that points at the
|
| 45 |
existing per-submission detail/report view.
|
|
|
|
| 55 |
# tab, not here.
|
| 56 |
GALLERY_TOP_N = 10
|
| 57 |
|
| 58 |
+
# Fixed gallery columns: two generation + two editing samples, one
|
| 59 |
+
# "Medium" and one "Hard" per task. Difficulty is deliberately **not**
|
| 60 |
+
# dynamic. It was chosen once from the Claude Opus 4.8 baseline
|
| 61 |
+
# (submission ``huggingface_claude-opus-4-8-hf-baseline-with-build12...``)
|
| 62 |
+
# by taking, within each task type's *valid* per-fixture CAD scores
|
| 63 |
+
# sorted ascending, the 50th-percentile fixture as "Medium" and the
|
| 64 |
+
# 20th-percentile fixture (i.e. 80% of fixtures score higher, so it is
|
| 65 |
+
# harder) as "Hard". See ``tools/pick_gallery_fixtures.py`` to recompute.
|
| 66 |
+
#
|
| 67 |
+
# The GUI does NOT refresh this selection as the leaderboard evolves:
|
| 68 |
+
# what counts as medium/hard may drift with new models, but a stable,
|
| 69 |
+
# simple comparison sheet is worth more here than a moving ground truth.
|
| 70 |
+
FIXED_FIXTURES = [
|
| 71 |
+
{"id": "108", "task": "generation", "difficulty": "Medium"},
|
| 72 |
+
{"id": "111", "task": "generation", "difficulty": "Hard"},
|
| 73 |
+
{"id": "246", "task": "editing", "difficulty": "Medium"},
|
| 74 |
+
{"id": "211", "task": "editing", "difficulty": "Hard"},
|
| 75 |
+
]
|
| 76 |
|
| 77 |
|
| 78 |
def _verified_rows(rows: list[dict]) -> list[dict]:
|
|
|
|
| 94 |
return verified[:GALLERY_TOP_N]
|
| 95 |
|
| 96 |
|
| 97 |
+
def _sub_payload(row: dict, fixture_ids: list[str], render_resolver, diff_resolver) -> dict:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
"""Project one verified row into the compact shape the page JS needs.
|
| 99 |
|
| 100 |
+
Only the fixed gallery columns (``fixture_ids``) are projected; a
|
| 101 |
+
fixture the row never scored shows up as a ``missing`` cell (dashed
|
| 102 |
+
tile) rather than being dropped.
|
| 103 |
+
|
| 104 |
``render_resolver(submission_id, fixture_id)`` returns the cached
|
| 105 |
proxy URL for a *valid* fixture, or ``None``. Invalid/missing
|
| 106 |
fixtures carry ``img: null`` so the page draws the dashed cell;
|
|
|
|
| 118 |
pfs = row.get("per_fixture_scores") or {}
|
| 119 |
sid = row.get("submission_id") or ""
|
| 120 |
cells: dict[str, dict] = {}
|
| 121 |
+
for fid in fixture_ids:
|
| 122 |
+
fx = pfs.get(fid) or {}
|
| 123 |
status = fx.get("status") or "missing"
|
| 124 |
valid = status == "valid"
|
| 125 |
is_editing = (fx.get("task_type") or "") == "editing"
|
|
|
|
| 153 |
def build_gallery_payload(rows: list[dict], render_resolver, gt_resolver, diff_resolver) -> dict:
|
| 154 |
"""Shape live rows into the JSON the gallery page renders from.
|
| 155 |
|
| 156 |
+
The fixture columns are the fixed :data:`FIXED_FIXTURES` set (no
|
| 157 |
+
picker), so the page is the same every visit. Image sources are
|
| 158 |
+
injected via resolvers so this module stays agnostic to how the
|
| 159 |
+
cached render URLs are constructed:
|
| 160 |
|
| 161 |
- ``render_resolver(submission_id, fixture_id) -> str | None`` (plain
|
| 162 |
candidate turntable; backs the modal and non-editing grid tiles)
|
|
|
|
| 164 |
turntable; backs the grid tile for editing fixtures)
|
| 165 |
- ``gt_resolver(fixture_id) -> str | None``
|
| 166 |
|
| 167 |
+
Returns ``{"fixtures", "subs", "gtImg"}`` where ``fixtures`` carries
|
| 168 |
+
the fixed columns (id + task + difficulty) and ``gtImg`` maps each
|
| 169 |
+
fixture to its ground-truth image source.
|
| 170 |
"""
|
| 171 |
verified = _verified_rows(rows)
|
| 172 |
+
fixtures = [
|
| 173 |
+
{"id": f["id"], "name": f["id"], "task": f["task"], "difficulty": f["difficulty"]}
|
| 174 |
+
for f in FIXED_FIXTURES
|
| 175 |
+
]
|
| 176 |
+
fixture_ids = [f["id"] for f in fixtures]
|
| 177 |
+
gt_img = {fid: gt_resolver(fid) for fid in fixture_ids}
|
| 178 |
return {
|
| 179 |
"fixtures": fixtures,
|
| 180 |
+
"subs": [
|
| 181 |
+
_sub_payload(r, fixture_ids, render_resolver, diff_resolver)
|
| 182 |
+
for r in verified
|
| 183 |
+
],
|
| 184 |
"gtImg": gt_img,
|
| 185 |
}
|
| 186 |
|
|
|
|
| 236 |
}
|
| 237 |
.wrap { max-width: 1180px; margin: 0 auto; padding: 0 24px; }
|
| 238 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
.section-label {
|
| 240 |
+
display: flex; align-items: center; gap: 10px; margin: 4px 0 6px;
|
| 241 |
font-size: 14px; font-weight: 700; color: var(--accent);
|
| 242 |
text-transform: uppercase; letter-spacing: .05em;
|
| 243 |
}
|
|
|
|
| 247 |
letter-spacing: .02em; display: inline-flex; align-items: center; gap: 5px;
|
| 248 |
}
|
| 249 |
.dot { width: 6px; height: 6px; border-radius: 50%; background: currentColor; }
|
| 250 |
+
.section-caption { margin: 0 0 16px; font-size: 12.5px; color: var(--ink-soft); line-height: 1.5; }
|
| 251 |
+
.section-caption b { color: var(--ink); font-weight: 600; }
|
| 252 |
|
| 253 |
.gallery { background: var(--panel); border: 1px solid var(--line); border-radius: var(--radius); box-shadow: var(--shadow); position: relative; }
|
| 254 |
.grid-head, .grow {
|
| 255 |
display: grid;
|
| 256 |
+
grid-template-columns: 52px minmax(200px, 1.3fr) 160px repeat(var(--ncol, 4), minmax(140px, 1fr));
|
| 257 |
align-items: stretch;
|
| 258 |
}
|
| 259 |
.grid-head {
|
|
|
|
| 263 |
border-radius: var(--radius) var(--radius) 0 0;
|
| 264 |
}
|
| 265 |
.grid-head > div { padding: 13px 14px; display: flex; align-items: center; }
|
| 266 |
+
.grid-head .fix-h { flex-direction: column; align-items: flex-start; gap: 3px; }
|
| 267 |
+
.grid-head .fix-h .ftask { font-size: 11px; color: var(--ink-soft); text-transform: none; letter-spacing: 0; font-weight: 700; }
|
| 268 |
+
.grid-head .fix-h .ftop { display: flex; align-items: center; gap: 6px; }
|
| 269 |
+
.grid-head .fix-h .fname { font-size: 9.5px; color: var(--ink-faint); text-transform: none; letter-spacing: 0; font-family: var(--mono); font-weight: 600; }
|
| 270 |
+
.grid-head .fix-h .fdiff {
|
| 271 |
+
font-size: 9px; font-weight: 700; text-transform: uppercase; letter-spacing: .05em;
|
| 272 |
+
padding: 2px 7px; border-radius: 999px;
|
| 273 |
+
}
|
| 274 |
+
.fdiff.diff-medium { color: #b45309; background: #fdf3e7; }
|
| 275 |
+
.fdiff.diff-hard { color: var(--bad); background: var(--bad-soft); }
|
| 276 |
|
| 277 |
.grow.gt-row {
|
| 278 |
background: var(--gt-soft); border-bottom: 2px solid var(--gt);
|
|
|
|
| 369 |
|
| 370 |
_BODY = """
|
| 371 |
<div class="wrap">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
<div class="section-label">
|
| 373 |
Validated leaderboard - Top 10
|
| 374 |
<span class="verified-pill"><span class="dot"></span>verified only</span>
|
| 375 |
</div>
|
| 376 |
+
<p class="section-caption">
|
| 377 |
+
A fixed comparison sheet: <b>two generation</b> and <b>two editing</b> samples,
|
| 378 |
+
one <b>Medium</b> and one <b>Hard</b> per task. Difficulty is fixed (picked from the
|
| 379 |
+
Claude Opus 4.8 baseline's per-sample scores), so every visit shows the same columns.
|
| 380 |
+
</p>
|
| 381 |
<div class="gallery" id="gallery">
|
| 382 |
<div class="grid-head" id="gridHead"></div>
|
| 383 |
</div>
|
|
|
|
| 403 |
# ---------------------------------------------------------------------------
|
| 404 |
|
| 405 |
_JS = """
|
| 406 |
+
const DATA = window.GALLERY_DATA || {fixtures: [], subs: [], gtImg: {}};
|
| 407 |
+
// Fixed columns: the server hands us exactly the gallery's sample set, in
|
| 408 |
+
// order, so there is no picker and no client-side selection state.
|
| 409 |
const FIXTURES = DATA.fixtures || [];
|
|
|
|
|
|
|
|
|
|
| 410 |
|
| 411 |
// --- Render hooks. ---------------------------------------------------------
|
| 412 |
// The image sources are cached render-proxy URLs injected by the server, so
|
|
|
|
| 433 |
function esc(s) { return String(s == null ? '' : s).replace(/[&<>"']/g, c => ({'&':'&','<':'<','>':'>','"':'"',"'":'''}[c])); }
|
| 434 |
function fixtureMeta(id) { return FIXTURES.find(f => f.id === id); }
|
| 435 |
function groupLabel(task) { return task ? (task.charAt(0).toUpperCase() + task.slice(1)) : 'Other'; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 436 |
|
| 437 |
// --- Gallery render -------------------------------------------------------
|
| 438 |
function buildHead() {
|
| 439 |
const head = document.getElementById('gridHead');
|
| 440 |
let h = '<div>#</div><div>Submission</div><div>Score</div>';
|
| 441 |
+
FIXTURES.forEach(f => {
|
| 442 |
+
const diff = f.difficulty
|
| 443 |
+
? '<span class="fdiff diff-' + esc((f.difficulty || '').toLowerCase()) + '">' + esc(f.difficulty) + '</span>'
|
| 444 |
+
: '';
|
| 445 |
+
h += '<div class="fix-h"><div class="ftop"><span class="ftask">' + esc(groupLabel(f.task)) + '</span>' + diff + '</div>'
|
| 446 |
+
+ '<span class="fname">#' + esc(f.name) + '</span></div>';
|
| 447 |
});
|
| 448 |
head.innerHTML = h;
|
| 449 |
}
|
|
|
|
| 453 |
function imgFail(img) {
|
| 454 |
const cell = img.closest('.thumb-cell');
|
| 455 |
if (cell) cell.innerHTML = '<div class="thumb failed"><span class="ftag">invalid<br>generation</span></div>';
|
| 456 |
+
fitIframe();
|
| 457 |
}
|
| 458 |
|
| 459 |
function thumbHTML(url, attrs, clickable) {
|
|
|
|
| 462 |
}
|
| 463 |
const hint = clickable ? '<span class="open-hint">open</span>' : '';
|
| 464 |
return '<div class="thumb" ' + attrs + '>'
|
| 465 |
+
+ '<img loading="lazy" decoding="async" src="' + url + '" alt="" onload="fitIframe()" onerror="imgFail(this)">'
|
| 466 |
+ hint + '</div>';
|
| 467 |
}
|
| 468 |
|
| 469 |
function buildGallery() {
|
| 470 |
const g = document.getElementById('gallery');
|
| 471 |
+
g.style.setProperty('--ncol', Math.max(FIXTURES.length, 1));
|
| 472 |
buildHead();
|
| 473 |
g.querySelectorAll('.grow').forEach(n => n.remove());
|
| 474 |
|
|
|
|
| 489 |
let gtCells = '<div class="rank">★</div>'
|
| 490 |
+ '<div class="ident">Ground truth<span class="gt-sub">reference geometry</span></div>'
|
| 491 |
+ '<div class="score-cell"><span class="agg">1.000</span></div>';
|
| 492 |
+
FIXTURES.forEach(f => {
|
| 493 |
+
gtCells += '<div class="thumb-cell">' + thumbHTML(gtRenderFor(f.id), 'data-gt="' + esc(f.id) + '"', false) + '</div>';
|
| 494 |
});
|
| 495 |
gt.innerHTML = gtCells;
|
| 496 |
g.appendChild(gt);
|
|
|
|
| 515 |
+ '<span class="sb"><span class="sb-l">Edit</span><span class="sb-v">' + fmt(s.edit, 3) + '</span></span>'
|
| 516 |
+ '</div>'
|
| 517 |
+ '<span class="validity ' + imperfect + '">' + pct(s.validity) + ' <span class="vlabel">valid</span></span></div>';
|
| 518 |
+
FIXTURES.forEach(f => {
|
| 519 |
+
cells += '<div class="thumb-cell">' + thumbHTML(gridRenderFor(s, f.id), 'data-sub="' + esc(s.id) + '" data-fix="' + esc(f.id) + '"', true) + '</div>';
|
| 520 |
});
|
| 521 |
row.innerHTML = cells;
|
| 522 |
g.appendChild(row);
|
|
|
|
| 536 |
}
|
| 537 |
|
| 538 |
function openModal(fxId, sub) {
|
| 539 |
+
const f = fixtureMeta(fxId);
|
| 540 |
+
const title = f
|
| 541 |
+
? groupLabel(f.task) + (f.difficulty ? ' \\u00b7 ' + f.difficulty : '') + ' (#' + fxId + ')'
|
| 542 |
+
: fxId;
|
| 543 |
+
document.getElementById('modalTitle').textContent = title;
|
| 544 |
document.getElementById('modalSub').textContent = sub.name;
|
| 545 |
const gt = gtRenderFor(fxId);
|
| 546 |
const out = renderFor(sub, fxId);
|
|
|
|
| 559 |
document.getElementById('modalNote').innerHTML =
|
| 560 |
'CAD score for this sample: <b>' + cad + '</b>. The full per-sample report '
|
| 561 |
+ '(shape similarity, interface, topology + 3D view) opens from the report viewer.';
|
| 562 |
+
const back = document.getElementById('modalBack');
|
| 563 |
+
back.classList.add('show');
|
| 564 |
+
positionModalToView();
|
| 565 |
+
attachModalViewSync();
|
| 566 |
}
|
| 567 |
function closeModal() {
|
| 568 |
document.getElementById('modalBack').classList.remove('show');
|
| 569 |
+
detachModalViewSync();
|
| 570 |
}
|
| 571 |
document.getElementById('modalClose').onclick = closeModal;
|
| 572 |
document.getElementById('modalBack').onclick = (e) => { if (e.target.id === 'modalBack') closeModal(); };
|
| 573 |
document.addEventListener('keydown', (e) => { if (e.key === 'Escape') closeModal(); });
|
| 574 |
|
| 575 |
+
// --- Modal positioning ----------------------------------------------------
|
| 576 |
+
// The page lives in a srcdoc iframe sized to its full content (see fitIframe),
|
| 577 |
+
// so a plain `position: fixed` overlay would anchor to the iframe's full
|
| 578 |
+
// height and land far below the fold. Instead we pin the overlay to the part
|
| 579 |
+
// of the iframe currently visible inside the parent viewport. srcdoc iframes
|
| 580 |
+
// are same-origin with the embedding document, so frameElement / parent are
|
| 581 |
+
// readable; everything is wrapped in try/catch and falls back to a fixed,
|
| 582 |
+
// viewport-centred overlay if that access is ever blocked.
|
| 583 |
+
function positionModalToView() {
|
| 584 |
+
const back = document.getElementById('modalBack');
|
| 585 |
+
try {
|
| 586 |
+
const fe = window.frameElement;
|
| 587 |
+
const pv = window.parent;
|
| 588 |
+
if (fe && pv) {
|
| 589 |
+
const rect = fe.getBoundingClientRect(); // iframe box in parent viewport
|
| 590 |
+
const docH = document.documentElement.scrollHeight;
|
| 591 |
+
const visTop = Math.max(0, -rect.top); // iframe-doc y at top of view
|
| 592 |
+
const visBottom = Math.min(docH, -rect.top + pv.innerHeight);
|
| 593 |
+
if (visBottom > visTop) {
|
| 594 |
+
back.style.position = 'absolute';
|
| 595 |
+
back.style.left = '0';
|
| 596 |
+
back.style.right = '0';
|
| 597 |
+
back.style.bottom = 'auto';
|
| 598 |
+
back.style.top = visTop + 'px';
|
| 599 |
+
back.style.height = (visBottom - visTop) + 'px';
|
| 600 |
+
return;
|
| 601 |
+
}
|
| 602 |
+
}
|
| 603 |
+
} catch (e) { /* cross-origin / sandboxed -> fixed fallback below */ }
|
| 604 |
+
back.style.position = 'fixed';
|
| 605 |
+
back.style.top = '0'; back.style.left = '0';
|
| 606 |
+
back.style.right = '0'; back.style.bottom = '0';
|
| 607 |
+
back.style.height = '';
|
| 608 |
+
}
|
| 609 |
+
|
| 610 |
+
let _modalSync = null;
|
| 611 |
+
function attachModalViewSync() {
|
| 612 |
+
try {
|
| 613 |
+
const pv = window.parent;
|
| 614 |
+
_modalSync = () => positionModalToView();
|
| 615 |
+
pv.addEventListener('scroll', _modalSync, { passive: true });
|
| 616 |
+
pv.addEventListener('resize', _modalSync);
|
| 617 |
+
} catch (e) { _modalSync = null; }
|
| 618 |
+
}
|
| 619 |
+
function detachModalViewSync() {
|
| 620 |
+
try {
|
| 621 |
+
const pv = window.parent;
|
| 622 |
+
if (_modalSync) {
|
| 623 |
+
pv.removeEventListener('scroll', _modalSync);
|
| 624 |
+
pv.removeEventListener('resize', _modalSync);
|
| 625 |
+
}
|
| 626 |
+
} catch (e) { /* ignore */ }
|
| 627 |
+
_modalSync = null;
|
| 628 |
+
}
|
| 629 |
+
|
| 630 |
// Pin the GT row exactly beneath the sticky column header.
|
| 631 |
function syncHeadHeight() {
|
| 632 |
const head = document.getElementById('gridHead');
|
| 633 |
if (head) document.documentElement.style.setProperty('--head-h', head.offsetHeight + 'px');
|
| 634 |
}
|
| 635 |
|
| 636 |
+
// Size the iframe to its content so the page scrolls naturally in the parent
|
| 637 |
+
// (no oversized fixed-height box, no nested scrollbar). No-ops if frameElement
|
| 638 |
+
// is unreadable (the wrapper then keeps its CSS fallback height).
|
| 639 |
+
function fitIframe() {
|
| 640 |
+
try {
|
| 641 |
+
const fe = window.frameElement;
|
| 642 |
+
if (fe) fe.style.height = Math.ceil(document.body.scrollHeight) + 'px';
|
| 643 |
+
} catch (e) { /* sandboxed -> keep fallback height */ }
|
| 644 |
+
}
|
| 645 |
+
|
| 646 |
buildGallery();
|
| 647 |
+
fitIframe();
|
| 648 |
+
window.addEventListener('resize', () => { syncHeadHeight(); fitIframe(); });
|
| 649 |
+
if (window.ResizeObserver) new ResizeObserver(fitIframe).observe(document.body);
|
| 650 |
+
if (document.fonts && document.fonts.ready) document.fonts.ready.then(() => { syncHeadHeight(); fitIframe(); });
|
| 651 |
"""
|
tools/pick_gallery_fixtures.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Recompute the gallery's fixed Medium/Hard sample columns.
|
| 2 |
+
|
| 3 |
+
The gallery shows a **fixed** four-column comparison sheet (two
|
| 4 |
+
generation + two editing samples, one Medium and one Hard per task);
|
| 5 |
+
see ``gallery.FIXED_FIXTURES``. The difficulty split is picked once from
|
| 6 |
+
a single reference submission's per-fixture CAD scores and then frozen
|
| 7 |
+
into ``FIXED_FIXTURES`` -- the live page never recomputes it.
|
| 8 |
+
|
| 9 |
+
This script reproduces that pick so the constant can be regenerated when
|
| 10 |
+
the reference model changes. Within each task type, over the reference
|
| 11 |
+
submission's *valid* fixtures sorted by score ascending, it takes the
|
| 12 |
+
50th-percentile fixture as "Medium" and the 20th-percentile fixture
|
| 13 |
+
(80% of fixtures score higher, so it is harder) as "Hard".
|
| 14 |
+
|
| 15 |
+
Usage::
|
| 16 |
+
|
| 17 |
+
# From a local results.jsonl:
|
| 18 |
+
python cadgenbench-leaderboard/tools/pick_gallery_fixtures.py path/to/results.jsonl
|
| 19 |
+
|
| 20 |
+
# Or pull the live file straight from the Hub (needs a read token):
|
| 21 |
+
python cadgenbench-leaderboard/tools/pick_gallery_fixtures.py \\
|
| 22 |
+
--repo HuggingAI4Engineering/cadgenbench-submissions
|
| 23 |
+
|
| 24 |
+
By default it selects the Claude Opus 4.8 baseline; override with
|
| 25 |
+
``--submission-id`` or ``--name-contains``.
|
| 26 |
+
"""
|
| 27 |
+
from __future__ import annotations
|
| 28 |
+
|
| 29 |
+
import argparse
|
| 30 |
+
import json
|
| 31 |
+
import sys
|
| 32 |
+
import urllib.request
|
| 33 |
+
from collections import defaultdict
|
| 34 |
+
|
| 35 |
+
DEFAULT_NAME_CONTAINS = "Claude Opus 4.8"
|
| 36 |
+
# Score-distribution percentiles: Medium = median, Hard = low tail.
|
| 37 |
+
MEDIUM_PCT = 0.50
|
| 38 |
+
HARD_PCT = 0.20
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _load_rows(source: str, *, is_repo: bool, token: str | None) -> list[dict]:
|
| 42 |
+
if is_repo:
|
| 43 |
+
url = f"https://huggingface.co/datasets/{source}/resolve/main/results.jsonl"
|
| 44 |
+
req = urllib.request.Request(url, headers={"Cache-Control": "no-cache"})
|
| 45 |
+
if token:
|
| 46 |
+
req.add_header("Authorization", f"Bearer {token}")
|
| 47 |
+
with urllib.request.urlopen(req, timeout=30) as resp: # noqa: S310
|
| 48 |
+
text = resp.read().decode("utf-8")
|
| 49 |
+
else:
|
| 50 |
+
with open(source, encoding="utf-8") as fh:
|
| 51 |
+
text = fh.read()
|
| 52 |
+
return [json.loads(line) for line in text.splitlines() if line.strip()]
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _pick_reference(rows: list[dict], args: argparse.Namespace) -> dict:
|
| 56 |
+
if args.submission_id:
|
| 57 |
+
for r in rows:
|
| 58 |
+
if r.get("submission_id") == args.submission_id:
|
| 59 |
+
return r
|
| 60 |
+
sys.exit(f"No submission with id {args.submission_id!r}")
|
| 61 |
+
needle = (args.name_contains or DEFAULT_NAME_CONTAINS).lower()
|
| 62 |
+
matches = [r for r in rows if needle in (r.get("submission_name") or "").lower()]
|
| 63 |
+
if not matches:
|
| 64 |
+
sys.exit(f"No submission name contains {needle!r}")
|
| 65 |
+
if len(matches) > 1:
|
| 66 |
+
names = ", ".join(repr(r.get("submission_name")) for r in matches)
|
| 67 |
+
sys.exit(f"Ambiguous --name-contains {needle!r}: {names}")
|
| 68 |
+
return matches[0]
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def _pick_at(sorted_scores: list[tuple[float, str]], pct: float) -> tuple[float, str]:
|
| 72 |
+
"""Nearest-rank pick at ``pct`` of an ascending score list."""
|
| 73 |
+
idx = round(pct * (len(sorted_scores) - 1))
|
| 74 |
+
return sorted_scores[idx]
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def main() -> int:
|
| 78 |
+
ap = argparse.ArgumentParser(description=__doc__)
|
| 79 |
+
ap.add_argument("source", nargs="?", help="Path to a local results.jsonl")
|
| 80 |
+
ap.add_argument("--repo", help="Hub dataset repo id to pull results.jsonl from")
|
| 81 |
+
ap.add_argument("--token", help="HF read token (for a private --repo)")
|
| 82 |
+
ap.add_argument("--submission-id", help="Reference submission id (exact)")
|
| 83 |
+
ap.add_argument(
|
| 84 |
+
"--name-contains",
|
| 85 |
+
help=f"Reference by name substring (default: {DEFAULT_NAME_CONTAINS!r})",
|
| 86 |
+
)
|
| 87 |
+
args = ap.parse_args()
|
| 88 |
+
|
| 89 |
+
if bool(args.source) == bool(args.repo):
|
| 90 |
+
ap.error("Pass exactly one of a local results.jsonl path or --repo.")
|
| 91 |
+
rows = _load_rows(
|
| 92 |
+
args.repo or args.source, is_repo=bool(args.repo), token=args.token,
|
| 93 |
+
)
|
| 94 |
+
ref = _pick_reference(rows, args)
|
| 95 |
+
print(f"Reference: {ref.get('submission_name')!r} [{ref.get('submission_id')}]\n")
|
| 96 |
+
|
| 97 |
+
by_task: dict[str, list[tuple[float, str]]] = defaultdict(list)
|
| 98 |
+
for fid, fx in (ref.get("per_fixture_scores") or {}).items():
|
| 99 |
+
fx = fx or {}
|
| 100 |
+
if fx.get("status") == "valid" and fx.get("cad_score") is not None:
|
| 101 |
+
by_task[fx.get("task_type") or "?"].append((float(fx["cad_score"]), fid))
|
| 102 |
+
|
| 103 |
+
snippet = []
|
| 104 |
+
for task in ("generation", "editing"):
|
| 105 |
+
items = sorted(by_task.get(task, []))
|
| 106 |
+
if not items:
|
| 107 |
+
print(f"{task}: no valid fixtures")
|
| 108 |
+
continue
|
| 109 |
+
med = _pick_at(items, MEDIUM_PCT)
|
| 110 |
+
hard = _pick_at(items, HARD_PCT)
|
| 111 |
+
print(f"{task}: {len(items)} valid fixtures")
|
| 112 |
+
print(f" Medium (p{int(MEDIUM_PCT * 100)}): #{med[1]} score={med[0]:.4f}")
|
| 113 |
+
print(f" Hard (p{int(HARD_PCT * 100)}): #{hard[1]} score={hard[0]:.4f}")
|
| 114 |
+
snippet.append((task, "Medium", med[1]))
|
| 115 |
+
snippet.append((task, "Hard", hard[1]))
|
| 116 |
+
|
| 117 |
+
print("\nFIXED_FIXTURES = [")
|
| 118 |
+
for task, diff, fid in snippet:
|
| 119 |
+
print(f' {{"id": "{fid}", "task": "{task}", "difficulty": "{diff}"}},')
|
| 120 |
+
print("]")
|
| 121 |
+
return 0
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
if __name__ == "__main__":
|
| 125 |
+
raise SystemExit(main())
|