cadgenbench-leaderboard / tests /test_tasks.py
Michael Rabinovich
leaderboard: rebuild Tasks tab as grouped thumbnail grid
95f3ee8
# Copyright 2026 Hugging Face
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Hermetic unit tests for the Task-browser page builder (``tasks.py``).
No network: fixtures are written to a tmp dir laid out like the inputs
dataset snapshot (``<fixture>/description.yaml``), then loaded + rendered.
"""
from __future__ import annotations
import textwrap
from pathlib import Path
from tasks import load_tasks_from_dir, render_tasks_page
def _write_fixture(root: Path, name: str, body: str) -> None:
d = root / name
d.mkdir(parents=True)
(d / "description.yaml").write_text(textwrap.dedent(body))
def test_load_tasks_generation_editing_and_multi_image(tmp_path: Path) -> None:
_write_fixture(
tmp_path, "101",
"""
description: Reproduce the geometry from the drawing.
input_files:
- input.png
""",
)
_write_fixture(
tmp_path, "127",
"""
description: Reproduce from the drawings.
input_files:
- input.png
- input2.png
""",
)
_write_fixture(
tmp_path, "201",
"""
description: Bring the pocket walls inward by 6mm.
task_type: editing
input_files:
- input.step
""",
)
tasks = load_tasks_from_dir(tmp_path)
# Sorted by fixture name for a stable order.
assert [t["name"] for t in tasks] == ["101", "127", "201"]
gen, multi, edit = tasks
assert gen["task_type"] == "generation" # defaults when unset
assert gen["image_inputs"] == ["input.png"]
assert gen["wants_shape"] is False
# Both drawings are carried for multi-image generation fixtures.
assert multi["image_inputs"] == ["input.png", "input2.png"]
# Editing fixtures ship a STEP -> shape renders, no inline drawing.
assert edit["task_type"] == "editing"
assert edit["wants_shape"] is True
assert edit["image_inputs"] == []
def test_load_tasks_defaults_to_input_png_when_unlisted(tmp_path: Path) -> None:
_write_fixture(
tmp_path, "300",
"description: A part with no input_files listed.\n",
)
(task,) = load_tasks_from_dir(tmp_path)
assert task["image_inputs"] == ["input.png"]
assert task["wants_shape"] is False
def test_render_tasks_page_structure_and_urls(tmp_path: Path) -> None:
_write_fixture(
tmp_path, "201",
"""
description: Bring the pocket walls inward by 6mm.
task_type: editing
input_files:
- input.step
""",
)
_write_fixture(
tmp_path, "127",
"""
description: Reproduce from the drawings.
input_files:
- input.png
- input2.png
""",
)
tasks = load_tasks_from_dir(tmp_path)
calls: list[tuple[str, str]] = []
def asset_url(fixture: str, relpath: str) -> str:
calls.append((fixture, relpath))
return f"/task-input/{fixture}/{relpath}"
doc = render_tasks_page(tasks, asset_url)
# Report-style navigation scaffolding is present.
assert 'id="summary-view"' in doc
assert 'id="detail-view"' in doc
assert "showDetail(" in doc
assert "window._fixtureNames" in doc
# Grid summary view: grouped sections with live count badges,
# search + type segmented filter, and a card per task (clicking a
# card jumps to its detail via showDetail(idx)).
assert 'class="grid"' in doc
assert 'data-group="generation"' in doc
assert 'data-group="editing"' in doc
assert 'data-count-for="generation"' in doc
assert 'id="typeSeg"' in doc
assert 'id="search"' in doc
assert 'class="card"' in doc
# Editing fixture references its starting-shape renders; generation
# multi-image fixture references both drawings.
assert ("201", "renders/iso.png") in calls
assert ("127", "input.png") in calls
assert ("127", "input2.png") in calls
# The prompt is rendered and HTML-escaped (no raw scores anywhere).
assert "Bring the pocket walls inward by 6mm." in doc
assert "Ground Truth" not in doc
assert "CAD Score" not in doc
def test_thumbnails_are_lazy_and_typed(tmp_path: Path) -> None:
"""Every grid thumbnail lazy-loads with reserved dimensions, and the
thumbnail source differs by task type: generation uses ``input.png``,
editing uses the ``renders/iso.png`` render of the input STEP."""
_write_fixture(
tmp_path, "108",
"""
description: Reproduce the bracket.
input_files:
- input.png
""",
)
_write_fixture(
tmp_path, "204",
"""
description: Widen the slot.
task_type: editing
input_files:
- input.step
""",
)
tasks = load_tasks_from_dir(tmp_path)
def asset_url(fixture: str, relpath: str) -> str:
return f"/task-input/{fixture}/{relpath}"
doc = render_tasks_page(tasks, asset_url)
# Generation card thumbnail -> input.png; editing -> iso render.
assert (
'<img loading="lazy" decoding="async" width="180" height="135" '
'src="/task-input/108/input.png"' in doc
)
assert (
'<img loading="lazy" decoding="async" width="180" height="135" '
'src="/task-input/204/renders/iso.png"' in doc
)
# No thumbnail escapes lazy/async loading.
assert doc.count('class="card"') == 2
assert doc.count('loading="lazy" decoding="async"', ) >= 2
def test_render_tasks_page_empty(tmp_path: Path) -> None:
doc = render_tasks_page([], lambda f, r: "")
assert "No tasks found" in doc