CADGenBench / gallery.py
Michael Rabinovich
Update gallery default fixture
85bb21f
# Copyright 2026 Hugging Face
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Visual gallery leaderboard page.
Builds a self-contained HTML document (its own CSS + JS) from the live
submission rows. The Space serves it at ``/gallery`` and embeds it in
the Gradio "Gallery" tab via an iframe, so the bespoke visual surface
(sticky ground-truth row, turntable grid, report modal) lives in plain
HTML/JS isolated from Gradio's styles rather than being forced into
Gradio components.
The page is intentionally simple: it shows a **fixed** set of four
sample columns (see :data:`FIXED_FIXTURES`) for the top-10 verified
rows. There is no fixture picker -- the columns are the same on every
visit so the page reads like a printed comparison sheet rather than an
interactive matrix. :func:`build_gallery_payload` shapes the rows +
the fixed fixtures into a small JSON blob the page's JS renders. Render
lookups are isolated behind the ``renderFor`` / ``gtRenderFor`` JS
hooks (mirroring the design brief), pointed at the cached render-proxy
URLs the caller injects via the two resolvers:
- ``renderFor(sub, fixtureId)`` -> ``/render/<id>/<fixture>.webp`` (or
``null`` when the per-fixture status is invalid/missing, which draws
the dashed "invalid generation" cell).
- ``gtRenderFor(fixtureId)`` -> ``/gt-render/<fixture>.webp``.
GIFs are lazy-loaded by the browser, so only the on-screen tiles are
fetched and CDN/browser caching makes repeat visits essentially free.
This requires the Space to be **public** (HF's edge 404s in-browser
fetches to our custom routes while private).
Turntable clicks open a GT-vs-output compare modal that points at the
existing per-submission detail/report view.
"""
from __future__ import annotations
import json
from leaderboard import _report_relative_url
# Gallery shows the top-N verified submissions only (the visual shop
# window). The numeric long tail lives on the Full results / Leaderboard
# tab, not here.
GALLERY_TOP_N = 10
# Fixed gallery columns: two generation + two editing samples, one
# "Medium" and one "Hard" per task. Difficulty is deliberately **not**
# dynamic. It was chosen once from the Claude Opus 4.8 baseline
# (submission ``huggingface_claude-opus-4-8-hf-baseline-with-build12...``)
# by taking, within each task type's *valid* per-fixture CAD scores
# sorted ascending, the 50th-percentile fixture as "Medium" and the
# 20th-percentile fixture (i.e. 80% of fixtures score higher, so it is
# harder) as "Hard". See ``tools/pick_gallery_fixtures.py`` to recompute.
#
# The GUI does NOT refresh this selection as the leaderboard evolves:
# what counts as medium/hard may drift with new models, but a stable,
# simple comparison sheet is worth more here than a moving ground truth.
FIXED_FIXTURES = [
{"id": "101", "task": "generation", "difficulty": "Medium"},
{"id": "111", "task": "generation", "difficulty": "Hard"},
{"id": "229", "task": "editing", "difficulty": "Medium"},
{"id": "207", "task": "editing", "difficulty": "Hard"},
]
def _verified_rows(rows: list[dict]) -> list[dict]:
"""Completed + validated rows, score-sorted desc, capped at the top N.
Mirrors the leaderboard's notion of "verified": ``validation_status
== 'validated'`` and a terminal ``status == 'completed'`` with a
real aggregate score. Pending / failed / unvalidated rows never
reach the visual gallery.
"""
verified = [
r
for r in rows
if r.get("validation_status") == "validated"
and r.get("status") == "completed"
and isinstance(r.get("aggregate_score"), (int, float))
]
verified.sort(key=lambda r: r.get("aggregate_score") or 0.0, reverse=True)
return verified[:GALLERY_TOP_N]
def _sub_payload(row: dict, fixture_ids: list[str], render_resolver, diff_resolver) -> dict:
"""Project one verified row into the compact shape the page JS needs.
Only the fixed gallery columns (``fixture_ids``) are projected; a
fixture the row never scored shows up as a ``missing`` cell (dashed
tile) rather than being dropped.
``render_resolver(submission_id, fixture_id)`` returns the cached
proxy URL for a *valid* fixture, or ``None``. Invalid/missing
fixtures carry ``img: null`` so the page draws the dashed cell;
note validity is driven by the per-fixture ``status`` in the data,
not by whether an image fetch happened to succeed.
Each cell also carries ``gridImg``, the source the gallery grid tile
uses: for ``editing`` fixtures this is the ghost edit-diff turntable
(``diff_resolver``) so the grid shows what actually changed; for every
other task it is the same plain candidate turntable as ``img``. The
modal keeps using ``img`` (the plain aligned output), so swapping the
grid never changes the modal.
"""
by_task = row.get("score_by_task_type") or {}
pfs = row.get("per_sample_scores") or {}
sid = row.get("submission_id") or ""
cells: dict[str, dict] = {}
for fid in fixture_ids:
fx = pfs.get(fid) or {}
status = fx.get("status") or "missing"
valid = status == "valid"
is_editing = (fx.get("task_type") or "") == "editing"
cells[fid] = {
"status": status,
"cad": fx.get("cad_score"),
"img": render_resolver(sid, fid) if valid else None,
"gridImg": (
(diff_resolver(sid, fid) if is_editing else render_resolver(sid, fid))
if valid else None
),
}
return {
"id": sid,
"name": row.get("submission_name") or "(unnamed submission)",
"reportUrl": _report_relative_url(
sid, row.get("status"), row.get("submission_sha256"),
),
"who": row.get("submitter_name") or "",
"score": row.get("aggregate_score"),
"validity": row.get("validity_rate"),
"gen": by_task.get("generation"),
"edit": by_task.get("editing"),
"date": row.get("submitted_at") or "",
"version": row.get("cadgenbench_version") or "",
"blobUrl": row.get("submission_blob_url") or "",
"cells": cells,
}
def build_gallery_payload(
rows: list[dict], render_resolver, gt_resolver, diff_resolver, gt_diff_resolver,
) -> dict:
"""Shape live rows into the JSON the gallery page renders from.
The fixture columns are the fixed :data:`FIXED_FIXTURES` set (no
picker), so the page is the same every visit. Image sources are
injected via resolvers so this module stays agnostic to how the
cached render URLs are constructed:
- ``render_resolver(submission_id, fixture_id) -> str | None`` (plain
candidate turntable; backs the modal and non-editing grid tiles)
- ``diff_resolver(submission_id, fixture_id) -> str | None`` (edit-diff
turntable; backs the grid tile for editing fixtures)
- ``gt_resolver(fixture_id) -> str | None`` (plain GT turntable)
- ``gt_diff_resolver(fixture_id) -> str | None`` (GT "answer key"
edit-diff turntable; used for the ground-truth row on **editing**
fixtures so the reference also shows the correct change in blue,
mirroring the candidate's red/amber diff in the same column)
Returns ``{"fixtures", "subs", "gtImg"}`` where ``fixtures`` carries
the fixed columns (id + task + difficulty) and ``gtImg`` maps each
fixture to its ground-truth image source (the answer-key diff for
editing fixtures, the plain turntable otherwise).
"""
verified = _verified_rows(rows)
fixtures = [
{"id": f["id"], "name": f["id"], "task": f["task"], "difficulty": f["difficulty"]}
for f in FIXED_FIXTURES
]
fixture_ids = [f["id"] for f in fixtures]
gt_img = {
f["id"]: (
gt_diff_resolver(f["id"]) if f["task"] == "editing" else gt_resolver(f["id"])
)
for f in fixtures
}
return {
"fixtures": fixtures,
"subs": [
_sub_payload(r, fixture_ids, render_resolver, diff_resolver)
for r in verified
],
"gtImg": gt_img,
}
def render_gallery_page(
rows: list[dict], render_resolver, gt_resolver, diff_resolver, gt_diff_resolver,
) -> str:
"""Build the full standalone gallery HTML document from live rows.
``render_resolver`` / ``gt_resolver`` / ``diff_resolver`` /
``gt_diff_resolver`` supply the cached render-proxy URLs (see
:func:`build_gallery_payload`); the browser lazy-loads only the
on-screen turntables.
The document is self-contained and uses **system font stacks only**
(no external font CDN fetch) so it never errors inside a sandboxed
iframe.
"""
payload = build_gallery_payload(
rows, render_resolver, gt_resolver, diff_resolver, gt_diff_resolver,
)
data_json = json.dumps(payload, ensure_ascii=False)
return (
"<!DOCTYPE html><html lang='en'><head>"
"<meta charset='UTF-8'>"
"<meta name='viewport' content='width=device-width, initial-scale=1.0'>"
"<title>CADGenBench Gallery</title>"
f"<style>{_CSS}</style>"
"</head><body>"
f"{_BODY}"
f"<script>window.GALLERY_DATA = {data_json};</script>"
f"<script>{_JS}</script>"
"</body></html>"
)
# ---------------------------------------------------------------------------
# CSS (ported from the reference prototype, trimmed to the gallery surface).
# Self-contained: system font stacks only, no external font CDN fetch.
# ---------------------------------------------------------------------------
_CSS = """
:root {
--bg: #f4f5f7; --panel: #ffffff; --ink: #14161c; --ink-soft: #5b6170;
--ink-faint: #9aa0ad; --line: #e3e5ea; --line-strong: #d2d5dd;
--accent: #4338ca; --accent-soft: #eef0ff; --good: #15803d;
--good-soft: #e9f7ee; --bad: #b42318; --bad-soft: #fdeceb;
--gt: #0f766e; --gt-soft: #e6f4f2; --thumb-bg: #eceef2;
--shadow: 0 1px 2px rgba(20,22,28,.04), 0 8px 24px rgba(20,22,28,.06);
--radius: 14px;
--mono: ui-monospace, 'SF Mono', Menlo, Consolas, monospace;
}
* { box-sizing: border-box; }
body {
margin: 0; background: var(--bg); color: var(--ink);
font-family: system-ui, -apple-system, 'Segoe UI', Roboto, sans-serif;
-webkit-font-smoothing: antialiased; padding: 8px 0 10px;
}
.wrap { max-width: 1180px; margin: 0 auto; padding: 0 24px; }
.section-label {
display: flex; align-items: center; gap: 10px; margin: 4px 0 6px;
font-size: 14px; font-weight: 700; color: var(--accent);
text-transform: uppercase; letter-spacing: .05em;
}
.section-label .verified-pill {
font-family: var(--mono); font-size: 10px; color: var(--good);
background: var(--good-soft); padding: 3px 8px; border-radius: 999px;
letter-spacing: .02em; display: inline-flex; align-items: center; gap: 5px;
}
.dot { width: 6px; height: 6px; border-radius: 50%; background: currentColor; }
.section-caption { margin: 0 0 16px; font-size: 12.5px; color: var(--ink-soft); line-height: 1.5; }
.section-caption b { color: var(--ink); font-weight: 600; }
/* The gallery is its own scroll container so the column header + ground-truth
row (both `position: sticky`) stay locked at the top while the submission
rows scroll inside it. This is the ONLY scroller for the rows -- it must not
leak out to the host page. Height is set in JS from `screen.availHeight`
(the one viewport-ish measure that survives HF's nested iframes, which
inflate `innerHeight`/`vh` to the full content height); the px value here is
the pre-script fallback. */
.gallery {
background: var(--panel); border: 1px solid var(--line);
border-radius: var(--radius); box-shadow: var(--shadow); position: relative;
max-height: var(--gallery-max, 560px); overflow-y: auto; overflow-x: hidden;
scrollbar-width: thin; scrollbar-color: var(--line-strong) transparent;
}
/* Keep the scrollbar visible (macOS overlay scrollbars otherwise hide it, so
it isn't obvious the rows scroll). */
.gallery::-webkit-scrollbar { width: 11px; }
.gallery::-webkit-scrollbar-track { background: transparent; }
.gallery::-webkit-scrollbar-thumb {
background: var(--line-strong); border-radius: 8px; border: 2px solid var(--panel);
}
.gallery::-webkit-scrollbar-thumb:hover { background: var(--ink-faint); }
/* Affordance that there are more rows below: a fade + label pinned to the
bottom of the box, hidden by JS once scrolled to the end. */
.gallery-shell { position: relative; }
.scroll-cue {
position: absolute; left: 1px; right: 12px; bottom: 1px; height: 56px;
pointer-events: none; display: flex; align-items: flex-end; justify-content: center;
padding-bottom: 9px; border-radius: 0 0 var(--radius) var(--radius);
background: linear-gradient(to bottom, rgba(255,255,255,0), var(--panel) 82%);
}
.scroll-cue[hidden] { display: none; }
.scroll-cue span {
font-size: 11px; font-weight: 700; text-transform: uppercase; letter-spacing: .05em;
color: var(--accent); background: var(--accent-soft); padding: 4px 12px; border-radius: 999px;
box-shadow: 0 1px 3px rgba(20,22,28,.12);
}
.grid-head, .grow {
display: grid;
grid-template-columns: 52px minmax(200px, 1.3fr) 160px repeat(var(--ncol, 4), minmax(140px, 1fr));
align-items: stretch;
}
.grid-head {
background: #fbfbfd; border-bottom: 1px solid var(--line); font-size: 11px;
text-transform: uppercase; letter-spacing: .05em; color: var(--ink-faint);
font-weight: 700; position: sticky; top: 0; z-index: 20;
border-radius: var(--radius) var(--radius) 0 0;
}
.grid-head > div { padding: 13px 14px; display: flex; align-items: center; }
.grid-head .fix-h { flex-direction: column; align-items: flex-start; gap: 3px; }
.grid-head .fix-h .ftask { font-size: 11px; color: var(--ink-soft); text-transform: none; letter-spacing: 0; font-weight: 700; }
.grid-head .fix-h .ftop { display: flex; align-items: center; gap: 6px; }
.grid-head .fix-h .fname { font-size: 9.5px; color: var(--ink-faint); text-transform: none; letter-spacing: 0; font-family: var(--mono); font-weight: 600; }
.grid-head .fix-h .fdiff {
font-size: 9px; font-weight: 700; text-transform: uppercase; letter-spacing: .05em;
padding: 2px 7px; border-radius: 999px;
}
.fdiff.diff-medium { color: #b45309; background: #fdf3e7; }
.fdiff.diff-hard { color: var(--bad); background: var(--bad-soft); }
.grow.gt-row {
background: var(--gt-soft); border-bottom: 2px solid var(--gt);
position: sticky; top: var(--head-h, 46px); z-index: 15;
box-shadow: 0 6px 14px -8px rgba(15,118,110,.45);
}
.grow.gt-row .rank, .grow.gt-row .ident { display: flex; align-items: center; }
.grow.gt-row .ident { font-weight: 700; color: var(--gt); flex-direction: column; align-items: flex-start; justify-content: center; gap: 2px; }
.grow.gt-row .ident .gt-sub { font-weight: 500; font-size: 11.5px; color: var(--gt); opacity: .8; }
.grow.gt-row .score-cell { color: var(--gt); }
.grow.sub-row { border-bottom: 1px solid var(--line); transition: background .12s ease; }
.grow.sub-row:last-child { border-bottom: none; }
.grow.sub-row:hover { background: #fafbff; }
.rank {
padding: 16px 14px; font-family: var(--mono); font-weight: 700;
font-size: 15px; color: var(--ink-faint); display: flex; align-items: center;
justify-content: center;
}
.rank.medal-1 { color: #b8860b; } .rank.medal-2 { color: #6b7280; } .rank.medal-3 { color: #a0522d; }
.ident { padding: 14px; display: flex; flex-direction: column; justify-content: center; gap: 3px; min-width: 0; }
.ident .sub-name { font-weight: 600; font-size: 14.5px; line-height: 1.25; color: var(--ink); text-decoration: none; }
a.sub-name:hover { color: var(--accent); text-decoration: underline; }
.ident .submitter { font-size: 12px; color: var(--ink-faint); font-family: var(--mono); }
.score-cell { padding: 14px; display: flex; flex-direction: column; justify-content: center; gap: 4px; }
.score-cell .agg { font-size: 22px; font-weight: 800; letter-spacing: -.01em; }
.score-cell .validity { font-size: 11.5px; font-family: var(--mono); color: var(--good); font-weight: 700; display: flex; align-items: baseline; gap: 5px; }
.score-cell .validity .vlabel { font-weight: 400; color: var(--ink-faint); text-transform: uppercase; letter-spacing: .04em; font-size: 10px; }
.score-cell .validity.imperfect { color: #b45309; }
.score-cell .validity.imperfect .vlabel { color: #c98a3a; }
.thumb-cell { padding: 8px; display: flex; align-items: center; justify-content: center; position: relative; }
.thumb {
width: 100%; aspect-ratio: 16/10; border-radius: 8px; background: var(--thumb-bg);
border: 1px solid var(--line); overflow: hidden; cursor: pointer; position: relative;
transition: transform .14s ease, box-shadow .14s ease, border-color .14s ease;
}
.thumb:hover { transform: translateY(-2px); box-shadow: 0 6px 18px rgba(20,22,28,.14); border-color: var(--accent); }
/* Display width is CSS-constrained so the browser downscales the existing
render artifact: no resize step, no new assets. */
.thumb img { width: 100%; height: 100%; display: block; object-fit: contain; }
.thumb .open-hint {
position: absolute; inset: 0; display: flex; align-items: center; justify-content: center;
opacity: 0; background: rgba(67,56,202,.08); transition: opacity .14s ease;
font-size: 11px; font-weight: 700; color: var(--accent); text-transform: uppercase; letter-spacing: .04em;
}
.thumb:hover .open-hint { opacity: 1; }
.thumb.failed { cursor: default; background: var(--bad-soft); border: 1px dashed #e9b3ae; display: flex; align-items: center; justify-content: center; }
.thumb.failed:hover { transform: none; box-shadow: none; border-color: #e9b3ae; }
.thumb.failed .ftag { font-family: var(--mono); font-size: 10px; font-weight: 700; color: var(--bad); text-transform: uppercase; letter-spacing: .04em; text-align: center; line-height: 1.4; }
/* Inline Gen/Edit breakdown beneath the aggregate score (replaces the old
"more numbers" expander). Validity stays as its own line below. */
.score-cell .score-breakdown { display: flex; gap: 14px; margin: 1px 0; }
.score-cell .sb { display: flex; flex-direction: column; line-height: 1.15; }
.score-cell .sb-l { font-size: 9px; text-transform: uppercase; letter-spacing: .05em; color: var(--ink-faint); font-weight: 700; }
.score-cell .sb-v { font-size: 13px; font-weight: 700; font-family: var(--mono); color: var(--ink-soft); }
/* Download link + submission date, tucked under the submitter name. */
.ident .ident-foot { display: flex; align-items: center; gap: 10px; margin-top: 5px; flex-wrap: wrap; }
.ident .dl { font-size: 11.5px; font-weight: 600; color: var(--accent); text-decoration: none; display: inline-flex; align-items: center; gap: 4px; }
.ident .dl .dl-ic { font-size: 13px; line-height: 1; }
.ident .dl:hover { text-decoration: underline; }
.empty-note { background: var(--panel); border: 1px dashed var(--line-strong); border-radius: var(--radius); padding: 48px 24px; text-align: center; color: var(--ink-faint); font-size: 14px; }
/* compare modal (GT vs output) */
.modal-back { position: fixed; inset: 0; background: rgba(20,22,28,.5); backdrop-filter: blur(3px); display: none; align-items: center; justify-content: center; z-index: 50; padding: 24px; }
.modal-back.show { display: flex; }
.modal { background: var(--panel); border-radius: 16px; width: 100%; max-width: 620px; padding: 26px; box-shadow: 0 24px 60px rgba(0,0,0,.3); }
.modal h4 { margin: 0 0 4px; font-size: 18px; }
.modal .msub { color: var(--ink-faint); font-size: 13px; font-family: var(--mono); margin-bottom: 18px; }
.modal-compare { display: grid; grid-template-columns: 1fr 1fr; gap: 14px; }
.modal-compare figure { margin: 0; }
.modal-compare figcaption { font-size: 11px; text-transform: uppercase; letter-spacing: .05em; color: var(--ink-faint); font-weight: 700; margin-bottom: 6px; }
.modal-compare .mthumb { width: 100%; aspect-ratio: 16/10; border-radius: 8px; background: var(--thumb-bg); border: 1px solid var(--line); overflow: hidden; }
.modal-compare .mthumb img { width: 100%; height: 100%; object-fit: contain; display: block; }
.modal-compare .mthumb.failed { background: var(--bad-soft); border: 1px dashed #e9b3ae; display: flex; align-items: center; justify-content: center; }
.modal-compare .mthumb.failed span { font-family: var(--mono); font-size: 10px; font-weight: 700; color: var(--bad); text-transform: uppercase; letter-spacing: .04em; text-align: center; }
.modal-note { margin-top: 18px; font-size: 12.5px; color: var(--ink-soft); background: var(--accent-soft); padding: 12px 14px; border-radius: 10px; }
.modal-note a { color: var(--accent); font-weight: 600; }
/* Edit-diff color key (editing fixtures only), mirrors the report legend. */
.modal-legend { margin-top: 9px; font-size: 11.5px; color: var(--ink-soft); line-height: 1.7; }
.modal-legend .mlc { display: inline-block; width: 11px; height: 11px; border-radius: 3px;
vertical-align: middle; margin: 0 5px 0 14px; border: 1px solid rgba(0,0,0,0.18); }
.modal-legend .mlc:first-child { margin-left: 0; }
.modal-close { margin-top: 20px; width: 100%; padding: 11px; border: 1px solid var(--line-strong); background: #fafbfc; border-radius: 10px; font-family: inherit; font-weight: 600; cursor: pointer; font-size: 14px; }
.modal-close:hover { background: var(--accent-soft); border-color: var(--accent); color: var(--accent); }
/* --- Mobile / narrow screens ------------------------------------------------
Same comparison as desktop -- the four sample columns with the ground-truth
row pinned on top so each model's render sits directly under the GT it should
match -- but adapted to phone width: there is no room for a left identity
column AND four renders, so the model name + score become a slim bar above
that model's four renders. The four columns stay full-width and aligned
across the GT row and every model, and the header + GT row stay pinned. */
@media (max-width: 760px) {
.wrap { padding: 0 7px; }
.section-label { margin: 2px 0 4px; font-size: 12px; }
.grid-head, .grow { grid-template-columns: repeat(var(--ncol, 4), 1fr); }
/* Header: keep only the four sample-column labels, aligned over the renders. */
.grid-head .h-rank, .grid-head .h-sub, .grid-head .h-score { display: none !important; }
.grid-head .fix-h { padding: 8px 4px; }
.grid-head .fix-h .fname { display: none; } /* drop the #id, keep task+difficulty */
.grid-head .fix-h .ftop { flex-direction: column; align-items: flex-start; gap: 3px; }
.grid-head .fix-h .ftask { font-size: 10px; }
.grid-head .fix-h .fdiff { font-size: 8px; padding: 1px 5px; }
/* Rows: name (3 cols) + score (last col) form a bar above the renders. */
.rank { display: none !important; }
.ident { grid-column: 1 / span 3; padding: 9px 8px 5px; }
.ident .sub-name { font-size: 13.5px; }
.ident .ident-foot { margin-top: 3px; }
.score-cell { grid-column: 4 / span 1; padding: 9px 8px 5px; gap: 1px; align-items: flex-end; }
.score-cell .agg { font-size: 17px; }
.score-cell .score-breakdown { display: none; }
.score-cell .validity { font-size: 10px; }
.grow.gt-row .score-cell { align-items: flex-end; }
.thumb-cell { padding: 4px; }
}
"""
# ---------------------------------------------------------------------------
# Body
# ---------------------------------------------------------------------------
_BODY = """
<div class="wrap">
<div class="section-label">
Validated leaderboard - Top 10
<span class="verified-pill"><span class="dot"></span>verified only</span>
</div>
<div class="gallery-shell">
<div class="gallery" id="gallery">
<div class="grid-head" id="gridHead"></div>
</div>
<div class="scroll-cue" id="scrollCue" hidden><span>&#9662; scroll for more models</span></div>
</div>
</div>
<div class="modal-back" id="modalBack">
<div class="modal">
<h4 id="modalTitle"></h4>
<div class="msub" id="modalSub"></div>
<div class="modal-compare">
<figure><figcaption id="modalGtCap">Ground truth</figcaption><div class="mthumb" id="modalGt"></div></figure>
<figure><figcaption id="modalOutCap">Output (aligned)</figcaption><div class="mthumb" id="modalOut"></div></figure>
</div>
<div class="modal-note" id="modalNote"></div>
<button class="modal-close" id="modalClose">Close</button>
</div>
</div>
"""
# ---------------------------------------------------------------------------
# JS (data-driven render of the gallery; render lookups isolated behind
# renderFor / gtRenderFor as in the design brief)
# ---------------------------------------------------------------------------
_JS = """
const DATA = window.GALLERY_DATA || {fixtures: [], subs: [], gtImg: {}};
// Fixed columns: the server hands us exactly the gallery's sample set, in
// order, so there is no picker and no client-side selection state.
const FIXTURES = DATA.fixtures || [];
// --- Render hooks. ---------------------------------------------------------
// The image sources are cached render-proxy URLs injected by the server, so
// these just read the payload (the browser lazy-loads only the on-screen
// tiles). renderFor returns null for an invalid/missing fixture -> dashed cell.
function renderFor(sub, fxId) {
const c = sub.cells[fxId];
return c ? c.img : null;
}
// Grid tiles use gridImg (the edit-diff turntable for editing fixtures, the
// plain candidate turntable otherwise); the modal keeps renderFor (img), so
// the grid swap never changes the modal.
function gridRenderFor(sub, fxId) {
const c = sub.cells[fxId];
return c ? (c.gridImg || c.img) : null;
}
function gtRenderFor(fxId) {
return (DATA.gtImg || {})[fxId] || null;
}
function cellOf(sub, fxId) { return sub.cells[fxId] || {}; }
function fmt(x, d) { return (x === null || x === undefined) ? '-' : Number(x).toFixed(d); }
function pct(x) { return (x === null || x === undefined) ? '-' : Math.round(Number(x) * 100) + '%'; }
function esc(s) { return String(s == null ? '' : s).replace(/[&<>"']/g, c => ({'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'}[c])); }
function fixtureMeta(id) { return FIXTURES.find(f => f.id === id); }
function groupLabel(task) { return task ? (task.charAt(0).toUpperCase() + task.slice(1)) : 'Other'; }
// --- Gallery render -------------------------------------------------------
function buildHead() {
const head = document.getElementById('gridHead');
let h = '<div class="h-rank">#</div><div class="h-sub">Submission</div><div class="h-score">Score</div>';
FIXTURES.forEach(f => {
const diff = f.difficulty
? '<span class="fdiff diff-' + esc((f.difficulty || '').toLowerCase()) + '">' + esc(f.difficulty) + '</span>'
: '';
h += '<div class="fix-h"><div class="ftop"><span class="ftask">' + esc(groupLabel(f.task)) + '</span>' + diff + '</div>'
+ '<span class="fname">#' + esc(f.name) + '</span></div>';
});
head.innerHTML = h;
}
// Fall back to the dashed cell if a render URL 404s (a fixture marked valid
// whose render upload is missing) instead of showing a broken image.
function imgFail(img) {
const cell = img.closest('.thumb-cell');
if (cell) cell.innerHTML = '<div class="thumb failed"><span class="ftag">invalid<br>generation</span></div>';
fitIframe();
}
function thumbHTML(url, attrs, clickable) {
if (!url) {
return '<div class="thumb failed"><span class="ftag">invalid<br>generation</span></div>';
}
const hint = clickable ? '<span class="open-hint">open</span>' : '';
return '<div class="thumb" ' + attrs + '>'
+ '<img loading="lazy" decoding="async" src="' + url + '" alt="" onload="fitIframe()" onerror="imgFail(this)">'
+ hint + '</div>';
}
function buildGallery() {
const g = document.getElementById('gallery');
g.style.setProperty('--ncol', Math.max(FIXTURES.length, 1));
buildHead();
g.querySelectorAll('.grow').forEach(n => n.remove());
if (!DATA.subs.length) {
let note = g.querySelector('.empty-note');
if (!note) {
note = document.createElement('div');
note.className = 'empty-note';
note.textContent = 'No verified submissions yet. Once a submission is promoted to the validated tier it appears here.';
g.appendChild(note);
}
return;
}
// Ground-truth pinned row.
const gt = document.createElement('div');
gt.className = 'grow gt-row';
let gtCells = '<div class="rank">&#9733;</div>'
+ '<div class="ident">Ground truth<span class="gt-sub">reference geometry</span></div>'
+ '<div class="score-cell"><span class="agg">1.000</span></div>';
FIXTURES.forEach(f => {
gtCells += '<div class="thumb-cell">' + thumbHTML(gtRenderFor(f.id), 'data-gt="' + esc(f.id) + '"', false) + '</div>';
});
gt.innerHTML = gtCells;
g.appendChild(gt);
DATA.subs.forEach((s, i) => {
const row = document.createElement('div');
row.className = 'grow sub-row';
const medal = i < 3 ? 'medal-' + (i + 1) : '';
const imperfect = (s.validity !== null && s.validity < 1) ? 'imperfect' : '';
const nameHTML = s.reportUrl
? '<a class="sub-name" href="' + esc(s.reportUrl) + '" target="_blank" rel="noopener">' + esc(s.name) + '</a>'
: '<span class="sub-name">' + esc(s.name) + '</span>';
let cells = '<div class="rank ' + medal + '">' + (i + 1) + '</div>'
+ '<div class="ident">' + nameHTML
+ '<span class="submitter">' + esc(s.who) + '</span>'
+ '<div class="ident-foot">'
+ (s.blobUrl ? '<a class="dl" href="' + esc(s.blobUrl) + '" target="_blank" rel="noopener"><span class="dl-ic">&#8675;</span>Download ZIP</a>' : '')
+ '</div></div>'
+ '<div class="score-cell"><span class="agg">' + fmt(s.score, 3) + '</span>'
+ '<div class="score-breakdown">'
+ '<span class="sb"><span class="sb-l">Gen</span><span class="sb-v">' + fmt(s.gen, 3) + '</span></span>'
+ '<span class="sb"><span class="sb-l">Edit</span><span class="sb-v">' + fmt(s.edit, 3) + '</span></span>'
+ '</div>'
+ '<span class="validity ' + imperfect + '">' + pct(s.validity) + ' <span class="vlabel">valid</span></span></div>';
FIXTURES.forEach(f => {
cells += '<div class="thumb-cell">' + thumbHTML(gridRenderFor(s, f.id), 'data-sub="' + esc(s.id) + '" data-fix="' + esc(f.id) + '"', true) + '</div>';
});
row.innerHTML = cells;
g.appendChild(row);
});
wireGallery();
syncHeadHeight();
}
function wireGallery() {
document.querySelectorAll('.thumb[data-sub]').forEach(th => {
th.onclick = () => {
const sub = DATA.subs.find(x => x.id === th.dataset.sub);
openModal(th.dataset.fix, sub);
};
});
}
function openModal(fxId, sub) {
const f = fixtureMeta(fxId);
const isEditing = !!(f && f.task === 'editing');
const title = f
? groupLabel(f.task) + (f.difficulty ? ' \\u00b7 ' + f.difficulty : '') + ' (#' + fxId + ')'
: fxId;
document.getElementById('modalTitle').textContent = title;
document.getElementById('modalSub').textContent = sub.name;
const gt = gtRenderFor(fxId);
// Editing fixtures: the meaningful output is the edit-diff turntable (the
// material that actually changed vs GT), mirroring the per-submission report
// -- the plain aligned candidate looks identical to GT for a small edit.
// Generation shows the plain aligned candidate.
const out = isEditing ? gridRenderFor(sub, fxId) : renderFor(sub, fxId);
const cell = cellOf(sub, fxId);
document.getElementById('modalGt').innerHTML = gt
? '<img src="' + gt + '" alt="ground truth">' : '<span>no GT render</span>';
// For editing fixtures the GT side is the "answer key" edit diff (blue = the
// correct change), so it pairs with the output's red/amber diff; label both.
document.getElementById('modalGtCap').textContent =
isEditing ? 'Ground truth (correct change)' : 'Ground truth';
document.getElementById('modalOutCap').textContent =
isEditing ? 'Output vs ground truth (edit diff)' : 'Output (aligned)';
const outEl = document.getElementById('modalOut');
if (out) {
outEl.className = 'mthumb';
outEl.innerHTML = '<img src="' + out + '" alt="' + (isEditing ? 'edit diff' : 'output') + '">';
} else {
outEl.className = 'mthumb failed';
outEl.innerHTML = '<span>invalid<br>generation</span>';
}
const cad = (cell.cad === null || cell.cad === undefined) ? '-' : Number(cell.cad).toFixed(3);
const legend = isEditing
? '<div class="modal-legend">'
+ '<span class="mlc" style="background:#2173f5"></span>correct change (ground truth)'
+ '<span class="mlc" style="background:#bdc4d1"></span>your output'
+ '<span class="mlc" style="background:#e62929"></span>extra material (too much)'
+ '<span class="mlc" style="background:#f5991a"></span>missing material (too little)'
+ '</div>'
: '';
document.getElementById('modalNote').innerHTML =
'CAD score for this sample: <b>' + cad + '</b>. '
+ (isEditing
? 'Left is the ground-truth answer key: blue is the change the edit should '
+ 'make (vs the starting shape). Right is your output\\u2019s edit diff: red '
+ 'is material your output added that the GT lacks, amber is GT material your '
+ 'output is missing. '
: '')
+ 'The full per-sample report (shape similarity, interface, topology + 3D view) '
+ 'opens from the report viewer.' + legend;
document.getElementById('modalBack').classList.add('show');
}
function closeModal() {
document.getElementById('modalBack').classList.remove('show');
}
document.getElementById('modalClose').onclick = closeModal;
document.getElementById('modalBack').onclick = (e) => { if (e.target.id === 'modalBack') closeModal(); };
document.addEventListener('keydown', (e) => { if (e.key === 'Escape') closeModal(); });
// Pin the GT row exactly beneath the sticky column header.
function syncHeadHeight() {
const head = document.getElementById('gridHead');
if (head) document.documentElement.style.setProperty('--head-h', head.offsetHeight + 'px');
}
// Show the "scroll for more" cue only while there are rows below the fold.
function updateScrollCue() {
const g = document.getElementById('gallery');
const cue = document.getElementById('scrollCue');
if (!g || !cue) return;
const more = (g.scrollHeight - g.clientHeight - g.scrollTop) > 8;
cue.hidden = !more;
}
// Height of the gallery scroll box. HF auto-resizes the Space iframe to its
// full content height, so `window.innerHeight` / `vh` inside these nested
// iframes report the inflated content height, not the real viewport -- they
// can't be used to size a one-screen box. `screen.availHeight` is the screen
// work-area and is NOT affected by the iframe nesting, so we derive the box
// height from it (a fraction, clamped) and the rows scroll inside the box while
// the sticky header + ground-truth row stay locked.
// Reserve for everything that is NOT the scroll box but still has to fit on
// screen: the browser/OS chrome between the screen work-area and the window
// viewport, plus the HF page header + Gradio title/tabs + this page's caption
// and Refresh button. Subtracting it from the screen height keeps the whole
// gallery within one viewport, so there is a single scrollbar (the box's own)
// rather than the box plus an outer page scroll. Deliberately generous: a box
// that is a little short (a touch more in-box scrolling) is far better than a
// confusing second scrollbar.
var CHROME_RESERVE = 450;
function sizeGalleryBox() {
try {
const avail = (window.screen && window.screen.availHeight) || 900;
// Phones have much taller browser + HF chrome, so they need a bigger
// reserve (smaller box). Width is not inflated by the iframe nesting, so
// innerWidth is a reliable narrow-screen check. The box still caps + scrolls
// internally on mobile so the GT row + sample headers stay pinned and each
// model's renders sit under the matching GT render (same as desktop).
const narrow = (window.innerWidth || 1000) < 760;
const reserve = narrow ? 410 : CHROME_RESERVE;
const maxH = narrow ? 520 : 1200;
const minH = narrow ? 280 : 320;
const h = Math.max(minH, Math.min(maxH, Math.round(avail - reserve)));
document.documentElement.style.setProperty('--gallery-max', h + 'px');
updateScrollCue();
} catch (e) { /* keep CSS fallback */ }
}
// With the gallery box capped, the page content (caption + box) is bounded, so
// sizing the iframe to it keeps the iframe from adding a second scrollbar: the
// gallery's own box is the single scroller for the rows. No-ops if frameElement
// is unreadable.
function fitIframe() {
try {
const fe = window.frameElement;
if (fe) fe.style.height = Math.ceil(document.body.scrollHeight) + 'px';
} catch (e) { /* sandboxed -> keep fallback height */ }
}
buildGallery();
sizeGalleryBox();
fitIframe();
(function () {
const g = document.getElementById('gallery');
if (g) g.addEventListener('scroll', updateScrollCue, { passive: true });
})();
updateScrollCue();
function relayout() { syncHeadHeight(); sizeGalleryBox(); fitIframe(); updateScrollCue(); }
window.addEventListener('resize', relayout);
if (window.ResizeObserver) new ResizeObserver(fitIframe).observe(document.body);
if (document.fonts && document.fonts.ready) document.fonts.ready.then(relayout);
"""