Michael Rabinovich
leaderboard: rebuild Tasks tab as grouped thumbnail grid
95f3ee8
# Copyright 2026 Hugging Face
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Task browser page.
A read-only "browse the benchmark tasks" surface that mirrors the
per-submission report's look and navigation exactly (summary table ->
click a row -> per-fixture detail card, ``j``/``k`` / arrow keys to
move, ``Esc`` to return) but **without any scores, ground truth, or
submission output**: each task reads as an unsolved problem. The detail
card centers the prompt and the input — the drawing (generation tasks)
or the starting-shape renders (editing tasks).
The task universe comes from the fixture inputs dataset's
``<fixture>/description.yaml`` files (``description`` + ``task_type`` +
``input_files``); :func:`load_tasks_from_dir` shapes them into the
small list the page renders. Image lookups are isolated behind a single
injected resolver so this module stays agnostic to how the URLs are
built (Space proxy/resolve URLs in production, local file paths in the
preview):
- ``asset_url(fixture, relpath)`` -> URL for a public input asset
(e.g. ``input.png`` or ``renders/iso.png``).
Like the gallery, the document is self-contained (its own CSS + JS) so
it can be inlined into an iframe ``srcdoc`` with its own style context,
and images are lazy-loaded so only the on-screen card's renders are
fetched.
"""
from __future__ import annotations
import html
import json
import logging
from pathlib import Path
import yaml
logger = logging.getLogger(__name__)
# Canonical render views shown in the input / ground-truth grids, in
# display order. Missing views degrade away client-side (the <img>
# onerror hook hides the tile) so we don't need to probe the Hub for
# which views exist per fixture.
VIEWS = ["iso", "front", "top", "right"]
_STEP_SUFFIXES = (".step", ".stp")
def load_tasks_from_dir(inputs_dir: Path) -> list[dict]:
"""Shape ``<fixture>/description.yaml`` files into task dicts.
``inputs_dir`` is a fixtures root whose immediate children are
fixture directories (the layout of the inputs dataset snapshot and
of the local data clone). Each task dict carries:
- ``name`` : fixture id (the directory name).
- ``task_type`` : ``"generation"`` (default) or ``"editing"``.
- ``description`` : the prompt text.
- ``image_inputs``: input image filenames to show inline (e.g. the
generation drawing); empty for editing tasks.
- ``wants_shape`` : True when the fixture ships a STEP input (an
editing task), so the caller shows the starting-shape renders.
Sorted by fixture name for a stable order, matching the report.
"""
tasks: list[dict] = []
for desc_path in sorted(inputs_dir.glob("*/description.yaml")):
data = yaml.safe_load(desc_path.read_text()) or {}
name = desc_path.parent.name
task_type = data.get("task_type", "generation")
description = data.get("description", "") or ""
input_files = data.get("input_files", []) or []
image_inputs = [
f for f in input_files
if not str(f).lower().endswith(_STEP_SUFFIXES)
]
wants_shape = any(
str(f).lower().endswith(_STEP_SUFFIXES) for f in input_files
)
# Generation fixtures that didn't list input_files still ship the
# canonical drawing as input.png; reference it so the card isn't
# blank (a missing file just hides itself via the onerror hook).
if not image_inputs and not wants_shape:
image_inputs = ["input.png"]
tasks.append({
"name": name,
"task_type": task_type,
"description": description.strip(),
"image_inputs": image_inputs,
"wants_shape": wants_shape,
})
return tasks
def _type_pill(task_type: str) -> str:
cls = "type-editing" if task_type == "editing" else "type-generation"
return f'<span class="tag {cls}">{html.escape(task_type)}</span>'
def _views_grid(url_for) -> str:
"""Render the iso/front/top/right render grid.
``url_for(view)`` returns the image URL for a given view. Missing
renders hide themselves via the ``onerror`` hook, so an absent view
leaves no gap rather than a broken-image icon.
"""
parts = ['<div class="images">']
for v in VIEWS:
url = url_for(v)
parts.append(
f'<div class="view"><img loading="lazy" decoding="async" '
f'src="{html.escape(url, quote=True)}" alt="{v}" '
f'onerror="taskImgFail(this)"><span>{v}</span></div>'
)
parts.append("</div>")
return "\n".join(parts)
def _render_task_card(task: dict, idx: int, asset_url) -> str:
name = task["name"]
p = [f'<div class="fixture-card" data-idx="{idx}" style="display:none">']
p.append('<div class="task-body">')
p.append(
f'<h2 class="card-title">{html.escape(name)} '
f'{_type_pill(task["task_type"])}</h2>'
)
# The prompt is the headline: centered and prominent.
if task["description"]:
p.append(f'<p class="task-prompt">{html.escape(task["description"])}</p>')
# The input: editing tasks show the starting solid's renders; every
# other task shows its input drawing(s). No ground truth / scores.
if task["wants_shape"]:
p.append('<div class="media-label">Starting shape</div>')
p.append(_views_grid(lambda v: asset_url(name, f"renders/{v}.png")))
elif task["image_inputs"]:
p.append('<div class="media-label">Drawing</div>')
for fname in task["image_inputs"]:
url = asset_url(name, fname)
p.append(
f'<img loading="lazy" decoding="async" '
f'src="{html.escape(url, quote=True)}" alt="input" '
f'class="input-img" onerror="taskImgFail(this)">'
)
p.append("</div>") # task-body
p.append("</div>") # fixture-card
return "\n".join(p)
def _thumb_url(task: dict, asset_url) -> str:
"""Resolve a task's grid thumbnail to an existing input asset.
The thumbnail is always the task's *input geometry*, but the source
differs by type:
- generation -> the input drawing ``input.png`` (the first listed
input image, which defaults to ``input.png``).
- editing -> the isometric render of the input STEP, i.e.
``renders/iso.png`` (same render style/camera used by the detail
card's render grid and elsewhere; generated once in the pipeline
and served cached, never rendered on the fly).
A missing file degrades client-side via the ``onerror`` hook, which
hides the broken image and leaves the placeholder thumb background.
"""
name = task["name"]
if task["task_type"] == "editing":
return asset_url(name, "renders/iso.png")
images = task.get("image_inputs") or ["input.png"]
return asset_url(name, images[0])
def _render_card(task: dict, idx: int, asset_url) -> str:
"""One grid card: lazy thumbnail + sample number + type tag.
``idx`` is the task's global index, used so a card click jumps to the
matching detail card via ``showDetail(idx)``. ``data-type`` /
``data-name`` drive the client-side type filter and number search.
The ``<img>`` carries ``loading="lazy"`` + ``decoding="async"`` so
only the cards in view fetch on first paint, and intrinsic
``width``/``height`` (4:3) plus the ``aspect-ratio`` thumb container
reserve space to avoid layout shift. The card column caps display
width, so the browser downscales the cached input into the slot.
"""
name = task["name"]
ttype = task["task_type"]
cls = "editing" if ttype == "editing" else "generation"
url = _thumb_url(task, asset_url)
return (
f'<button class="card" type="button" data-idx="{idx}" '
f'data-type="{cls}" data-name="{html.escape(name, quote=True)}" '
f'onclick="showDetail({idx})">'
'<span class="thumb">'
f'<img loading="lazy" decoding="async" width="180" height="135" '
f'src="{html.escape(url, quote=True)}" '
f'alt="{html.escape(name, quote=True)}" onerror="taskImgFail(this)">'
"</span>"
'<span class="meta">'
f'<span class="sample">{html.escape(name)}</span>'
f'<span class="type {cls}">{html.escape(ttype)}</span>'
"</span>"
"</button>"
)
# Display order + header label/color class for the two task groups.
_GROUPS = (("generation", "Generation", "gen"), ("editing", "Editing", "edit"))
def _render_grid(tasks: list[dict], asset_url) -> str:
"""Grouped responsive card grid (Generation, then Editing).
Each group renders a header with a live count badge and a grid of
cards; the count and visibility are kept in sync client-side as the
search/filter change. A hidden empty-state shows when nothing
matches.
"""
out = ['<div id="groups">']
for key, label, cls in _GROUPS:
items = [
(i, t)
for i, t in enumerate(tasks)
if (t["task_type"] == "editing") == (key == "editing")
]
if not items:
continue
cards = "".join(_render_card(t, i, asset_url) for i, t in items)
out.append(
f'<section class="group" data-group="{key}">'
f'<div class="group-head {cls}"><span class="glabel">{label}</span> '
f'<span class="gcount" data-count-for="{key}">{len(items)}</span></div>'
f'<div class="grid">{cards}</div>'
"</section>"
)
out.append(
'<div class="empty" id="empty-state" style="display:none">'
"No tasks match your search.</div>"
)
out.append("</div>")
return "\n".join(out)
def _render_controls() -> str:
"""Search box + All/Generation/Editing segmented filter."""
return (
'<div class="controls">'
'<div class="search"><span class="mag">&#8981;</span>'
'<input type="text" id="search" placeholder="Search tasks by number\u2026" '
'autocomplete="off"></div>'
'<div class="seg" id="typeSeg">'
'<button type="button" class="on" data-type="all">All</button>'
'<button type="button" data-type="generation">Generation</button>'
'<button type="button" data-type="editing">Editing</button>'
"</div>"
'<span class="count-note" id="countNote"></span>'
"</div>"
)
def _render_header(tasks: list[dict]) -> str:
n = len(tasks)
n_gen = sum(1 for t in tasks if t["task_type"] != "editing")
n_edit = n - n_gen
return (
'<div class="summary">'
f'<span class="big">{n} tasks</span>'
'<span class="stat"><span class="swatch gen"></span>'
f"generation: <b>{n_gen}</b></span>"
'<span class="stat"><span class="swatch edit"></span>'
f"editing: <b>{n_edit}</b></span>"
'<span class="stat hint">Click a card to open the task.</span>'
"</div>"
)
def render_tasks_page(tasks: list[dict], asset_url) -> str:
"""Build the full standalone task-browser HTML document.
``asset_url(fixture, relpath)`` supplies the input image URLs (see
module docstring). The summary view is a grouped thumbnail grid
(Generation / Editing) with search + type filter; clicking a card
opens that task's detail card (prompt + input), reusing the report's
detail-card navigation (``j``/``k`` / arrow keys, ``Esc`` to return),
minus scores and ground truth.
"""
fixture_names_js = json.dumps([t["name"] for t in tasks])
p = [
"<!DOCTYPE html><html lang='en'><head>",
"<meta charset='utf-8'>",
"<meta name='viewport' content='width=device-width, initial-scale=1.0'>",
"<title>CADGenBench Tasks</title>",
f"<style>{_CSS}</style>",
"</head><body>",
]
p.append('<div class="run-header">')
p.append("<h1>CADGenBench Tasks</h1>")
p.append(_render_header(tasks))
p.append("</div>")
# Summary view: grouped thumbnail grid + search/type controls.
p.append('<div id="summary-view">')
if tasks:
p.append(_render_controls())
p.append(_render_grid(tasks, asset_url))
else:
p.append(
'<p class="note">No tasks found in the sample inputs dataset.</p>'
)
p.append("</div>")
# Detail view
p.append('<div id="detail-view" style="display:none">')
p.append('<div class="nav-bar">')
p.append('<button onclick="showSummary()">&#8592; Summary</button>')
p.append(
'<button id="prev-btn" onclick="showDetail(currentIdx-1)">&#8592; Prev '
'<span class="kbd">k</span></button>'
)
p.append('<span id="fixture-label"></span>')
p.append(
'<button id="next-btn" onclick="showDetail(currentIdx+1)">Next '
'<span class="kbd">j</span> &#8594;</button>'
)
p.append("</div>")
for i, t in enumerate(tasks):
p.append(_render_task_card(t, i, asset_url))
p.append("</div>")
p.append(f"<script>window._fixtureNames = {fixture_names_js};\n{_JS}</script>")
p.append("</body></html>")
return "\n".join(p)
# ---------------------------------------------------------------------------
# CSS (ported from the per-submission report so the look matches exactly;
# trimmed to the surfaces this page uses + task-type pill colors).
# ---------------------------------------------------------------------------
_CSS = """\
:root {
--bg: #f8f9fa; --panel: #ffffff; --ink: #14161c; --ink-soft: #5b6170;
--ink-faint: #9aa0ad; --line: #e3e5ea; --line-strong: #d2d5dd;
--accent: #4338ca; --accent-soft: #eef0ff;
--gen: #1565c0; --gen-soft: #e3f2fd; --edit: #6a1b9a; --edit-soft: #f3e5f5;
--thumb-bg: #eceef2;
--mono: ui-monospace, "SF Mono", Menlo, Consolas, monospace;
}
* { box-sizing: border-box; }
body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
max-width: 1600px; margin: 0 auto; padding: 20px; background: var(--bg);
color: var(--ink); -webkit-font-smoothing: antialiased; }
h1 { border-bottom: 2px solid #333; padding-bottom: 8px; }
h2 { margin-top: 0; }
.tag { font-size: 0.6em; color: #666; font-weight: normal; font-family: monospace;
margin-left: 6px; }
.run-header { background: white; border-radius: 8px; padding: 16px 20px;
margin-bottom: 20px; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
/* summary strip */
.summary { display: flex; align-items: baseline; gap: 24px; flex-wrap: wrap;
margin-top: 10px; font-size: 0.95em; }
.summary .big { font-size: 1.35em; font-weight: 800; letter-spacing: -.01em; }
.summary .stat { color: var(--ink-soft); }
.summary .stat b { color: var(--ink); font-weight: 700; }
.summary .stat.hint { color: var(--ink-faint); }
.summary .swatch { display: inline-block; width: 9px; height: 9px; border-radius: 3px;
margin-right: 6px; }
.summary .swatch.gen { background: var(--gen); }
.summary .swatch.edit { background: var(--edit); }
/* controls: search + segmented type filter */
.controls { display: flex; gap: 12px; align-items: center; flex-wrap: wrap;
margin-bottom: 8px; }
.search { flex: 1; min-width: 240px; position: relative; }
.search input { width: 100%; padding: 11px 14px 11px 36px; border: 1px solid var(--line-strong);
border-radius: 11px; font-family: inherit; font-size: 14.5px;
background: var(--panel); outline: none; }
.search input:focus { border-color: var(--accent); box-shadow: 0 0 0 3px var(--accent-soft); }
.search .mag { position: absolute; left: 12px; top: 50%; transform: translateY(-50%);
color: var(--ink-faint); font-size: 15px; }
.seg { display: flex; gap: 4px; background: var(--panel); border: 1px solid var(--line-strong);
border-radius: 11px; padding: 4px; }
.seg button { font-family: inherit; font-size: 13.5px; font-weight: 600; cursor: pointer;
border: none; background: none; color: var(--ink-soft); padding: 7px 14px;
border-radius: 8px; }
.seg button.on { background: var(--accent); color: #fff; }
.count-note { font-size: 13px; color: var(--ink-faint); margin-left: auto; }
/* group header */
.group-head { display: flex; align-items: center; gap: 10px; margin: 26px 0 12px;
font-size: 13px; font-weight: 700; text-transform: uppercase;
letter-spacing: .05em; }
.group-head.gen { color: var(--gen); }
.group-head.edit { color: var(--edit); }
.group-head .gcount { font-family: var(--mono); font-size: 11px; padding: 3px 9px;
border-radius: 999px; }
.group-head.gen .gcount { background: var(--gen-soft); }
.group-head.edit .gcount { background: var(--edit-soft); }
/* responsive card grid */
.grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(180px, 1fr));
gap: 16px; }
.card { background: var(--panel); border: 1px solid var(--line); border-radius: 12px;
overflow: hidden; cursor: pointer; padding: 0; font-family: inherit;
text-align: left; transition: transform .14s ease, box-shadow .14s ease,
border-color .14s ease; }
.card:hover { transform: translateY(-3px); box-shadow: 0 10px 26px rgba(20,22,28,.13);
border-color: var(--accent); }
.card .thumb { width: 100%; aspect-ratio: 4 / 3; background: var(--thumb-bg); display: block;
border-bottom: 1px solid var(--line); overflow: hidden; }
.card .thumb img { width: 100%; height: 100%; object-fit: contain; display: block; }
.card .meta { padding: 11px 13px; display: flex; align-items: center;
justify-content: space-between; gap: 8px; }
.card .sample { font-family: var(--mono); font-weight: 700; font-size: 15px; }
.card .type { font-family: var(--mono); font-size: 9.5px; font-weight: 700;
text-transform: uppercase; letter-spacing: .03em; padding: 3px 8px;
border-radius: 6px; }
.card .type.generation { color: var(--gen); background: var(--gen-soft); }
.card .type.editing { color: var(--edit); background: var(--edit-soft); }
.empty { padding: 50px; text-align: center; color: var(--ink-faint); }
.nav-bar { display: flex; align-items: center; gap: 12px; padding: 12px 16px;
background: white; border-radius: 8px; margin-bottom: 16px;
box-shadow: 0 1px 3px rgba(0,0,0,0.1); position: sticky; top: 0; z-index: 100; }
.nav-bar button { padding: 6px 14px; border: 1px solid #ccc; border-radius: 4px;
background: white; cursor: pointer; font-size: 0.9em; }
.nav-bar button:hover:not(:disabled) { background: #e3f2fd; }
.nav-bar button:disabled { opacity: 0.4; cursor: default; }
#fixture-label { flex: 1; text-align: center; font-weight: 600; }
.kbd { background: #eee; border: 1px solid #ccc; border-radius: 3px;
padding: 1px 5px; font-size: 0.75em; font-family: monospace; color: #555; }
.fixture-card { background: white; border-radius: 8px; padding: 28px 20px 36px;
box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
/* Single centered column: the prompt + input are the whole story. */
.task-body { max-width: 940px; margin: 0 auto; text-align: center; }
.card-title { margin-bottom: 16px; font-size: 1.5em; }
.task-prompt { font-size: 1.2em; line-height: 1.6; color: #222;
background: #fafafa; border: 1px solid #eee; border-radius: 10px;
padding: 20px 26px; margin: 0 auto 28px; max-width: 760px; }
.media-label { color: #607d8b; font-size: 0.8em; text-transform: uppercase;
letter-spacing: 0.06em; font-weight: 700; margin: 8px 0 12px; }
.note { color: #888; font-style: italic; font-size: 0.9em; }
.images { display: flex; gap: 12px; flex-wrap: wrap; margin: 8px 0;
justify-content: center; }
.view { text-align: center; }
.view img { max-height: 260px; border: 1px solid #ddd; border-radius: 4px;
background: #fff; }
.view span { display: block; font-size: 0.72em; color: #888; margin-top: 4px; }
.input-img { display: block; margin: 0 auto; max-height: 620px; max-width: 100%;
border: 1px solid #ddd; border-radius: 6px; }
/* Task-type pill colors */
.type-generation { background: #e3f2fd; color: #1565c0; padding: 2px 8px;
border-radius: 10px; font-weight: 600; }
.type-editing { background: #f3e5f5; color: #6a1b9a; padding: 2px 8px;
border-radius: 10px; font-weight: 600; }
"""
# ---------------------------------------------------------------------------
# JS (navigation ported verbatim from the report: showDetail / j-k-arrows /
# Esc / deep-link hash; the score-column sorter is dropped since there are
# no score columns).
# ---------------------------------------------------------------------------
_JS = """\
let currentIdx = -1;
const total = document.querySelectorAll('.fixture-card').length;
let query = '', typeFilter = 'all';
function taskImgFail(img) {
const view = img.closest('.view');
if (view) { view.style.display = 'none'; return; }
img.style.display = 'none';
}
// Live grid filter: the segmented control filters by type, the search
// box filters by sample number. Group counts + visibility and the
// empty-state stay in sync; cards stay rendered (so showDetail indices
// are stable) and are just shown/hidden.
function applyFilter() {
const q = query.trim().toLowerCase();
let shown = 0;
document.querySelectorAll('#groups .group').forEach(g => {
const key = g.dataset.group;
let vis = 0;
g.querySelectorAll('.card').forEach(c => {
const ok = (typeFilter === 'all' || c.dataset.type === typeFilter) &&
(!q || c.dataset.name.toLowerCase().includes(q));
c.style.display = ok ? '' : 'none';
if (ok) vis++;
});
g.style.display = vis ? '' : 'none';
const badge = g.querySelector('[data-count-for="' + key + '"]');
if (badge) badge.textContent = vis;
shown += vis;
});
const empty = document.getElementById('empty-state');
if (empty) empty.style.display = shown ? 'none' : '';
const note = document.getElementById('countNote');
if (note) note.textContent = shown + ' of ' + total + ' shown';
}
(function wireControls() {
const searchEl = document.getElementById('search');
if (searchEl) searchEl.addEventListener('input', e => {
query = e.target.value; applyFilter();
});
const seg = document.getElementById('typeSeg');
if (seg) seg.querySelectorAll('button').forEach(b => {
b.addEventListener('click', () => {
seg.querySelectorAll('button').forEach(x => x.classList.remove('on'));
b.classList.add('on');
typeFilter = b.dataset.type;
applyFilter();
});
});
applyFilter();
})();
function showSummary() {
document.getElementById('summary-view').style.display = '';
document.getElementById('detail-view').style.display = 'none';
currentIdx = -1;
}
function showDetail(idx) {
if (idx < 0 || idx >= total) return;
document.getElementById('summary-view').style.display = 'none';
document.getElementById('detail-view').style.display = '';
document.querySelectorAll('.fixture-card').forEach(c => c.style.display = 'none');
document.querySelectorAll('.fixture-card')[idx].style.display = '';
currentIdx = idx;
updateNav();
window.scrollTo(0, 0);
}
function updateNav() {
document.getElementById('prev-btn').disabled = (currentIdx <= 0);
document.getElementById('next-btn').disabled = (currentIdx >= total - 1);
const names = window._fixtureNames || [];
document.getElementById('fixture-label').textContent =
(currentIdx + 1) + ' / ' + total + ': ' + (names[currentIdx] || '');
}
document.addEventListener('keydown', function(e) {
if (e.target.tagName === 'INPUT' || e.target.tagName === 'TEXTAREA') return;
if (currentIdx === -1) return;
if (e.key === 'j' || e.key === 'ArrowRight') {
e.preventDefault(); showDetail(currentIdx + 1);
} else if (e.key === 'k' || e.key === 'ArrowLeft') {
e.preventDefault(); showDetail(currentIdx - 1);
} else if (e.key === 'Escape') {
e.preventDefault(); showSummary();
}
});
// Deep-link: opening at `#fixture=<name>` (or `#idx=<n>`) jumps straight
// to that task's detail card. Inert when there is no hash or no match.
function openHashTarget() {
const hash = (window.location.hash || '').replace(/^#/, '');
if (!hash) return;
const params = new URLSearchParams(hash);
const names = window._fixtureNames || [];
let idx = -1;
if (params.has('fixture')) {
idx = names.indexOf(params.get('fixture'));
} else if (params.has('idx')) {
idx = parseInt(params.get('idx'), 10);
}
if (idx >= 0 && idx < total) showDetail(idx);
}
openHashTarget();
window.addEventListener('hashchange', openHashTarget);
"""