Michael Rabinovich Cursor commited on
Commit ·
f4924d6
1
Parent(s): c1cb5e4
leaderboard: add Tasks tab to browse benchmark fixtures
Browse filesA read-only task browser that mirrors the per-submission report's
summary-table -> detail-card navigation (j/k, Esc, Prev/Next,
deep-linkable) but shows only the prompt + input (drawing for
generation, starting-shape renders for editing) with no scores or
ground truth. Fixtures come from each <fixture>/description.yaml in the
inputs dataset; input images are proxied through the Space's read token
via /task-input and lazy-loaded.
Co-authored-by: Cursor <cursoragent@cursor.com>
- app.py +110 -1
- requirements.txt +4 -0
- tasks.py +393 -0
- tests/test_tasks.py +140 -0
- tools/preview_tasks.py +48 -0
app.py
CHANGED
|
@@ -26,6 +26,7 @@ from __future__ import annotations
|
|
| 26 |
|
| 27 |
import html
|
| 28 |
import logging
|
|
|
|
| 29 |
import os
|
| 30 |
from functools import lru_cache
|
| 31 |
from pathlib import Path
|
|
@@ -36,7 +37,7 @@ import uvicorn
|
|
| 36 |
from fastapi import FastAPI
|
| 37 |
from fastapi.responses import HTMLResponse, Response
|
| 38 |
from gradio_leaderboard import Leaderboard
|
| 39 |
-
from huggingface_hub import hf_hub_download
|
| 40 |
|
| 41 |
from leaderboard import (
|
| 42 |
ADMIN_COLUMNS,
|
|
@@ -57,6 +58,7 @@ from leaderboard import (
|
|
| 57 |
load_leaderboard_split,
|
| 58 |
)
|
| 59 |
from gallery import render_gallery_page
|
|
|
|
| 60 |
from admin import (
|
| 61 |
VALID_METHODS,
|
| 62 |
delete_rows,
|
|
@@ -663,6 +665,93 @@ def _gallery_iframe_html() -> str:
|
|
| 663 |
)
|
| 664 |
|
| 665 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 666 |
with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as blocks:
|
| 667 |
gr.Markdown(
|
| 668 |
"# CADGenBench Leaderboard\n"
|
|
@@ -683,6 +772,17 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
|
|
| 683 |
fn=_gallery_iframe_html, outputs=gallery_html,
|
| 684 |
)
|
| 685 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 686 |
with gr.Tab("Leaderboard"):
|
| 687 |
# Load both tiers once at boot. `_safe_load_split` keeps a Hub
|
| 688 |
# read failure from crashing the Space: on failure the frames
|
|
@@ -964,6 +1064,7 @@ to publish the resulting row on the public leaderboard.
|
|
| 964 |
# Gradio's auth-event plumbing.
|
| 965 |
blocks.load(fn=_enable_submit_when_logged_in, outputs=submit_btn)
|
| 966 |
blocks.load(fn=_gallery_iframe_html, outputs=gallery_html)
|
|
|
|
| 967 |
|
| 968 |
# Same per-load OAuth read, gating the Admin tab's controls on
|
| 969 |
# membership in the CADGENBENCH_ADMINS set. Logged-out / non-admin
|
|
@@ -1006,6 +1107,14 @@ app.add_api_route(
|
|
| 1006 |
serve_gt_render,
|
| 1007 |
methods=["GET"],
|
| 1008 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1009 |
app = gr.mount_gradio_app(app, blocks, path="/")
|
| 1010 |
|
| 1011 |
|
|
|
|
| 26 |
|
| 27 |
import html
|
| 28 |
import logging
|
| 29 |
+
import mimetypes
|
| 30 |
import os
|
| 31 |
from functools import lru_cache
|
| 32 |
from pathlib import Path
|
|
|
|
| 37 |
from fastapi import FastAPI
|
| 38 |
from fastapi.responses import HTMLResponse, Response
|
| 39 |
from gradio_leaderboard import Leaderboard
|
| 40 |
+
from huggingface_hub import hf_hub_download, snapshot_download
|
| 41 |
|
| 42 |
from leaderboard import (
|
| 43 |
ADMIN_COLUMNS,
|
|
|
|
| 58 |
load_leaderboard_split,
|
| 59 |
)
|
| 60 |
from gallery import render_gallery_page
|
| 61 |
+
from tasks import load_tasks_from_dir, render_tasks_page
|
| 62 |
from admin import (
|
| 63 |
VALID_METHODS,
|
| 64 |
delete_rows,
|
|
|
|
| 665 |
)
|
| 666 |
|
| 667 |
|
| 668 |
+
def _fetch_task_input(fixture: str, relpath: str) -> bytes | None:
|
| 669 |
+
"""Pull a fixture input asset (``<fixture>/<relpath>``) from the inputs repo.
|
| 670 |
+
|
| 671 |
+
Serves the Task-browser tab's drawings / starting-shape renders.
|
| 672 |
+
The inputs dataset is private, so these are proxied through the
|
| 673 |
+
Space (which holds the read token) rather than linked directly —
|
| 674 |
+
mirroring :func:`_fetch_render`. Not memoized for the same reason:
|
| 675 |
+
inputs can be added/updated on a data revision bump, and
|
| 676 |
+
``hf_hub_download`` already does per-revision disk caching. Returns
|
| 677 |
+
``None`` on any failure (the page hides the broken tile).
|
| 678 |
+
"""
|
| 679 |
+
try:
|
| 680 |
+
local_path = hf_hub_download(
|
| 681 |
+
repo_id=HF_DATA_REPO,
|
| 682 |
+
filename=f"{fixture}/{relpath}",
|
| 683 |
+
repo_type="dataset",
|
| 684 |
+
)
|
| 685 |
+
return Path(local_path).read_bytes()
|
| 686 |
+
except Exception as e: # noqa: BLE001 - any Hub failure -> 404
|
| 687 |
+
logger.warning(
|
| 688 |
+
"Failed to fetch task input %s/%s (%s: %s)",
|
| 689 |
+
fixture, relpath, type(e).__name__, e,
|
| 690 |
+
)
|
| 691 |
+
return None
|
| 692 |
+
|
| 693 |
+
|
| 694 |
+
def _task_input_url(fixture: str, relpath: str) -> str:
|
| 695 |
+
"""Resolver returning the Space proxy URL for a task input asset.
|
| 696 |
+
|
| 697 |
+
Returns the route string without fetching bytes (the browser
|
| 698 |
+
lazy-fetches only the on-screen task's images). An absolute path
|
| 699 |
+
resolves against the Space origin even inside the iframe ``srcdoc``.
|
| 700 |
+
"""
|
| 701 |
+
return f"/task-input/{fixture}/{relpath}"
|
| 702 |
+
|
| 703 |
+
|
| 704 |
+
def serve_task_input(fixture: str, relpath: str) -> Response:
|
| 705 |
+
"""Stream a fixture input asset with long-lived immutable caching.
|
| 706 |
+
|
| 707 |
+
Path-traversal-guarded (``..`` rejected). The task browser
|
| 708 |
+
references ``/task-input/<fixture>/<relpath>`` and the browser
|
| 709 |
+
fetches it lazily; re-streams the dataset bytes (the Space holds the
|
| 710 |
+
read token) with the same immutable ``Cache-Control`` as the render
|
| 711 |
+
proxies so the CDN/browser cache them hard.
|
| 712 |
+
"""
|
| 713 |
+
if ".." in fixture or ".." in relpath:
|
| 714 |
+
return Response(status_code=404)
|
| 715 |
+
data = _fetch_task_input(fixture, relpath)
|
| 716 |
+
if data is None:
|
| 717 |
+
return Response(status_code=404)
|
| 718 |
+
media_type = mimetypes.guess_type(relpath)[0] or "application/octet-stream"
|
| 719 |
+
return Response(
|
| 720 |
+
content=data,
|
| 721 |
+
media_type=media_type,
|
| 722 |
+
headers={"Cache-Control": RENDER_CACHE_CONTROL},
|
| 723 |
+
)
|
| 724 |
+
|
| 725 |
+
|
| 726 |
+
def _tasks_iframe_html() -> str:
|
| 727 |
+
"""Build the Task browser as a self-contained ``srcdoc`` iframe.
|
| 728 |
+
|
| 729 |
+
Snapshots just the ``<fixture>/description.yaml`` files from the
|
| 730 |
+
inputs dataset (lightweight: the drawings/renders themselves load
|
| 731 |
+
lazily via the ``/task-input`` proxy), shapes them into task cards,
|
| 732 |
+
and inlines the page into an iframe so it keeps its own style
|
| 733 |
+
context (no Gradio CSS collision). A Hub read failure degrades to an
|
| 734 |
+
empty browser rather than crashing the tab.
|
| 735 |
+
"""
|
| 736 |
+
try:
|
| 737 |
+
local = snapshot_download(
|
| 738 |
+
repo_id=HF_DATA_REPO,
|
| 739 |
+
repo_type="dataset",
|
| 740 |
+
allow_patterns=["*/description.yaml"],
|
| 741 |
+
)
|
| 742 |
+
tasks = load_tasks_from_dir(Path(local))
|
| 743 |
+
except Exception: # noqa: BLE001 - degrade to empty browser, never crash
|
| 744 |
+
logger.exception("Task load failed; rendering empty task browser")
|
| 745 |
+
tasks = []
|
| 746 |
+
doc = render_tasks_page(tasks, _task_input_url)
|
| 747 |
+
escaped = html.escape(doc, quote=True)
|
| 748 |
+
return (
|
| 749 |
+
f'<iframe srcdoc="{escaped}" '
|
| 750 |
+
'style="width:100%; height:90vh; border:0; display:block;" '
|
| 751 |
+
'title="CADGenBench tasks"></iframe>'
|
| 752 |
+
)
|
| 753 |
+
|
| 754 |
+
|
| 755 |
with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as blocks:
|
| 756 |
gr.Markdown(
|
| 757 |
"# CADGenBench Leaderboard\n"
|
|
|
|
| 772 |
fn=_gallery_iframe_html, outputs=gallery_html,
|
| 773 |
)
|
| 774 |
|
| 775 |
+
with gr.Tab("Tasks"):
|
| 776 |
+
# Read-only task browser: mirrors the per-submission report's
|
| 777 |
+
# summary-table -> detail-card navigation (j/k, Esc) but shows
|
| 778 |
+
# only the prompt + input (drawing / starting shape), no scores
|
| 779 |
+
# or ground truth. Self-contained HTML inlined into an iframe
|
| 780 |
+
# `srcdoc` like the gallery; input images lazy-load from the
|
| 781 |
+
# `/task-input` proxy. Built at boot, rebuilt on page load.
|
| 782 |
+
tasks_html = gr.HTML(value=_tasks_iframe_html())
|
| 783 |
+
tasks_refresh_btn = gr.Button("Refresh tasks", size="sm")
|
| 784 |
+
tasks_refresh_btn.click(fn=_tasks_iframe_html, outputs=tasks_html)
|
| 785 |
+
|
| 786 |
with gr.Tab("Leaderboard"):
|
| 787 |
# Load both tiers once at boot. `_safe_load_split` keeps a Hub
|
| 788 |
# read failure from crashing the Space: on failure the frames
|
|
|
|
| 1064 |
# Gradio's auth-event plumbing.
|
| 1065 |
blocks.load(fn=_enable_submit_when_logged_in, outputs=submit_btn)
|
| 1066 |
blocks.load(fn=_gallery_iframe_html, outputs=gallery_html)
|
| 1067 |
+
blocks.load(fn=_tasks_iframe_html, outputs=tasks_html)
|
| 1068 |
|
| 1069 |
# Same per-load OAuth read, gating the Admin tab's controls on
|
| 1070 |
# membership in the CADGENBENCH_ADMINS set. Logged-out / non-admin
|
|
|
|
| 1107 |
serve_gt_render,
|
| 1108 |
methods=["GET"],
|
| 1109 |
)
|
| 1110 |
+
# Task-browser input assets (drawings + starting-shape renders). The
|
| 1111 |
+
# `:path` converter lets `relpath` carry a slash (e.g. renders/iso.png).
|
| 1112 |
+
# Registered before the Gradio mount so it's not shadowed.
|
| 1113 |
+
app.add_api_route(
|
| 1114 |
+
"/task-input/{fixture}/{relpath:path}",
|
| 1115 |
+
serve_task_input,
|
| 1116 |
+
methods=["GET"],
|
| 1117 |
+
)
|
| 1118 |
app = gr.mount_gradio_app(app, blocks, path="/")
|
| 1119 |
|
| 1120 |
|
requirements.txt
CHANGED
|
@@ -19,3 +19,7 @@ pandas>=2.0
|
|
| 19 |
huggingface_hub>=1.16.0
|
| 20 |
datasets>=3.0
|
| 21 |
requests>=2.31
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
huggingface_hub>=1.16.0
|
| 20 |
datasets>=3.0
|
| 21 |
requests>=2.31
|
| 22 |
+
# tasks.py parses each fixture's description.yaml (prompt + task_type +
|
| 23 |
+
# input_files) to build the Task-browser tab. Pinned explicitly even
|
| 24 |
+
# though it rides in transitively via gradio/huggingface_hub.
|
| 25 |
+
pyyaml>=6.0
|
tasks.py
ADDED
|
@@ -0,0 +1,393 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2026 Hugging Face
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
"""Task browser page.
|
| 16 |
+
|
| 17 |
+
A read-only "browse the benchmark tasks" surface that mirrors the
|
| 18 |
+
per-submission report's look and navigation exactly (summary table ->
|
| 19 |
+
click a row -> per-fixture detail card, ``j``/``k`` / arrow keys to
|
| 20 |
+
move, ``Esc`` to return) but **without any scores, ground truth, or
|
| 21 |
+
submission output**: each task reads as an unsolved problem. The detail
|
| 22 |
+
card centers the prompt and the input — the drawing (generation tasks)
|
| 23 |
+
or the starting-shape renders (editing tasks).
|
| 24 |
+
|
| 25 |
+
The task universe comes from the fixture inputs dataset's
|
| 26 |
+
``<fixture>/description.yaml`` files (``description`` + ``task_type`` +
|
| 27 |
+
``input_files``); :func:`load_tasks_from_dir` shapes them into the
|
| 28 |
+
small list the page renders. Image lookups are isolated behind a single
|
| 29 |
+
injected resolver so this module stays agnostic to how the URLs are
|
| 30 |
+
built (Space proxy/resolve URLs in production, local file paths in the
|
| 31 |
+
preview):
|
| 32 |
+
|
| 33 |
+
- ``asset_url(fixture, relpath)`` -> URL for a public input asset
|
| 34 |
+
(e.g. ``input.png`` or ``renders/iso.png``).
|
| 35 |
+
|
| 36 |
+
Like the gallery, the document is self-contained (its own CSS + JS) so
|
| 37 |
+
it can be inlined into an iframe ``srcdoc`` with its own style context,
|
| 38 |
+
and images are lazy-loaded so only the on-screen card's renders are
|
| 39 |
+
fetched.
|
| 40 |
+
"""
|
| 41 |
+
from __future__ import annotations
|
| 42 |
+
|
| 43 |
+
import html
|
| 44 |
+
import json
|
| 45 |
+
import logging
|
| 46 |
+
from pathlib import Path
|
| 47 |
+
|
| 48 |
+
import yaml
|
| 49 |
+
|
| 50 |
+
logger = logging.getLogger(__name__)
|
| 51 |
+
|
| 52 |
+
# Canonical render views shown in the input / ground-truth grids, in
|
| 53 |
+
# display order. Missing views degrade away client-side (the <img>
|
| 54 |
+
# onerror hook hides the tile) so we don't need to probe the Hub for
|
| 55 |
+
# which views exist per fixture.
|
| 56 |
+
VIEWS = ["iso", "front", "top", "right"]
|
| 57 |
+
|
| 58 |
+
_STEP_SUFFIXES = (".step", ".stp")
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def load_tasks_from_dir(inputs_dir: Path) -> list[dict]:
|
| 62 |
+
"""Shape ``<fixture>/description.yaml`` files into task dicts.
|
| 63 |
+
|
| 64 |
+
``inputs_dir`` is a fixtures root whose immediate children are
|
| 65 |
+
fixture directories (the layout of the inputs dataset snapshot and
|
| 66 |
+
of the local data clone). Each task dict carries:
|
| 67 |
+
|
| 68 |
+
- ``name`` : fixture id (the directory name).
|
| 69 |
+
- ``task_type`` : ``"generation"`` (default) or ``"editing"``.
|
| 70 |
+
- ``description`` : the prompt text.
|
| 71 |
+
- ``image_inputs``: input image filenames to show inline (e.g. the
|
| 72 |
+
generation drawing); empty for editing tasks.
|
| 73 |
+
- ``wants_shape`` : True when the fixture ships a STEP input (an
|
| 74 |
+
editing task), so the caller shows the starting-shape renders.
|
| 75 |
+
|
| 76 |
+
Sorted by fixture name for a stable order, matching the report.
|
| 77 |
+
"""
|
| 78 |
+
tasks: list[dict] = []
|
| 79 |
+
for desc_path in sorted(inputs_dir.glob("*/description.yaml")):
|
| 80 |
+
data = yaml.safe_load(desc_path.read_text()) or {}
|
| 81 |
+
name = desc_path.parent.name
|
| 82 |
+
task_type = data.get("task_type", "generation")
|
| 83 |
+
description = data.get("description", "") or ""
|
| 84 |
+
input_files = data.get("input_files", []) or []
|
| 85 |
+
image_inputs = [
|
| 86 |
+
f for f in input_files
|
| 87 |
+
if not str(f).lower().endswith(_STEP_SUFFIXES)
|
| 88 |
+
]
|
| 89 |
+
wants_shape = any(
|
| 90 |
+
str(f).lower().endswith(_STEP_SUFFIXES) for f in input_files
|
| 91 |
+
)
|
| 92 |
+
# Generation fixtures that didn't list input_files still ship the
|
| 93 |
+
# canonical drawing as input.png; reference it so the card isn't
|
| 94 |
+
# blank (a missing file just hides itself via the onerror hook).
|
| 95 |
+
if not image_inputs and not wants_shape:
|
| 96 |
+
image_inputs = ["input.png"]
|
| 97 |
+
tasks.append({
|
| 98 |
+
"name": name,
|
| 99 |
+
"task_type": task_type,
|
| 100 |
+
"description": description.strip(),
|
| 101 |
+
"image_inputs": image_inputs,
|
| 102 |
+
"wants_shape": wants_shape,
|
| 103 |
+
})
|
| 104 |
+
return tasks
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def _type_pill(task_type: str) -> str:
|
| 108 |
+
cls = "type-editing" if task_type == "editing" else "type-generation"
|
| 109 |
+
return f'<span class="tag {cls}">{html.escape(task_type)}</span>'
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def _views_grid(url_for) -> str:
|
| 113 |
+
"""Render the iso/front/top/right render grid.
|
| 114 |
+
|
| 115 |
+
``url_for(view)`` returns the image URL for a given view. Missing
|
| 116 |
+
renders hide themselves via the ``onerror`` hook, so an absent view
|
| 117 |
+
leaves no gap rather than a broken-image icon.
|
| 118 |
+
"""
|
| 119 |
+
parts = ['<div class="images">']
|
| 120 |
+
for v in VIEWS:
|
| 121 |
+
url = url_for(v)
|
| 122 |
+
parts.append(
|
| 123 |
+
f'<div class="view"><img loading="lazy" decoding="async" '
|
| 124 |
+
f'src="{html.escape(url, quote=True)}" alt="{v}" '
|
| 125 |
+
f'onerror="taskImgFail(this)"><span>{v}</span></div>'
|
| 126 |
+
)
|
| 127 |
+
parts.append("</div>")
|
| 128 |
+
return "\n".join(parts)
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def _render_task_card(task: dict, idx: int, asset_url) -> str:
|
| 132 |
+
name = task["name"]
|
| 133 |
+
p = [f'<div class="fixture-card" data-idx="{idx}" style="display:none">']
|
| 134 |
+
p.append('<div class="task-body">')
|
| 135 |
+
p.append(
|
| 136 |
+
f'<h2 class="card-title">{html.escape(name)} '
|
| 137 |
+
f'{_type_pill(task["task_type"])}</h2>'
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
# The prompt is the headline: centered and prominent.
|
| 141 |
+
if task["description"]:
|
| 142 |
+
p.append(f'<p class="task-prompt">{html.escape(task["description"])}</p>')
|
| 143 |
+
|
| 144 |
+
# The input: editing tasks show the starting solid's renders; every
|
| 145 |
+
# other task shows its input drawing(s). No ground truth / scores.
|
| 146 |
+
if task["wants_shape"]:
|
| 147 |
+
p.append('<div class="media-label">Starting shape</div>')
|
| 148 |
+
p.append(_views_grid(lambda v: asset_url(name, f"renders/{v}.png")))
|
| 149 |
+
elif task["image_inputs"]:
|
| 150 |
+
p.append('<div class="media-label">Drawing</div>')
|
| 151 |
+
for fname in task["image_inputs"]:
|
| 152 |
+
url = asset_url(name, fname)
|
| 153 |
+
p.append(
|
| 154 |
+
f'<img loading="lazy" decoding="async" '
|
| 155 |
+
f'src="{html.escape(url, quote=True)}" alt="input" '
|
| 156 |
+
f'class="input-img" onerror="taskImgFail(this)">'
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
p.append("</div>") # task-body
|
| 160 |
+
p.append("</div>") # fixture-card
|
| 161 |
+
return "\n".join(p)
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def _render_summary_table(tasks: list[dict]) -> str:
|
| 165 |
+
rows = [
|
| 166 |
+
'<table class="summary-table" id="summary-table">',
|
| 167 |
+
"<thead><tr><th>Fixture</th><th>Type</th></tr></thead><tbody>",
|
| 168 |
+
]
|
| 169 |
+
for i, t in enumerate(tasks):
|
| 170 |
+
rows.append(
|
| 171 |
+
f'<tr onclick="showDetail({i})" style="cursor:pointer">'
|
| 172 |
+
f'<td>{html.escape(t["name"])}</td>'
|
| 173 |
+
f"<td>{_type_pill(t['task_type'])}</td>"
|
| 174 |
+
f"</tr>"
|
| 175 |
+
)
|
| 176 |
+
rows.append("</tbody></table>")
|
| 177 |
+
return "\n".join(rows)
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
def _render_header(tasks: list[dict]) -> str:
|
| 181 |
+
n = len(tasks)
|
| 182 |
+
n_gen = sum(1 for t in tasks if t["task_type"] != "editing")
|
| 183 |
+
n_edit = n - n_gen
|
| 184 |
+
return (
|
| 185 |
+
'<div class="run-stats">'
|
| 186 |
+
f"<span>{n} tasks</span>"
|
| 187 |
+
f"<span>generation: <b>{n_gen}</b></span>"
|
| 188 |
+
f"<span>editing: <b>{n_edit}</b></span>"
|
| 189 |
+
"</div>"
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
def render_tasks_page(tasks: list[dict], asset_url) -> str:
|
| 194 |
+
"""Build the full standalone task-browser HTML document.
|
| 195 |
+
|
| 196 |
+
``asset_url(fixture, relpath)`` supplies the input image URLs (see
|
| 197 |
+
module docstring). The page mirrors the report's summary-table ->
|
| 198 |
+
detail-card navigation exactly, minus scores and ground truth.
|
| 199 |
+
"""
|
| 200 |
+
fixture_names_js = json.dumps([t["name"] for t in tasks])
|
| 201 |
+
p = [
|
| 202 |
+
"<!DOCTYPE html><html lang='en'><head>",
|
| 203 |
+
"<meta charset='utf-8'>",
|
| 204 |
+
"<meta name='viewport' content='width=device-width, initial-scale=1.0'>",
|
| 205 |
+
"<title>CADGenBench Tasks</title>",
|
| 206 |
+
f"<style>{_CSS}</style>",
|
| 207 |
+
"</head><body>",
|
| 208 |
+
]
|
| 209 |
+
|
| 210 |
+
p.append('<div class="run-header">')
|
| 211 |
+
p.append("<h1>CADGenBench Tasks</h1>")
|
| 212 |
+
p.append(_render_header(tasks))
|
| 213 |
+
p.append("</div>")
|
| 214 |
+
|
| 215 |
+
# Summary view
|
| 216 |
+
p.append('<div id="summary-view">')
|
| 217 |
+
p.append(
|
| 218 |
+
'<p style="color:#888;font-size:0.85em">'
|
| 219 |
+
"Click a row to view the task. "
|
| 220 |
+
'<span class="kbd">j</span>/<span class="kbd">k</span> '
|
| 221 |
+
"to navigate, "
|
| 222 |
+
'<span class="kbd">Esc</span> to return.</p>'
|
| 223 |
+
)
|
| 224 |
+
if tasks:
|
| 225 |
+
p.append(_render_summary_table(tasks))
|
| 226 |
+
else:
|
| 227 |
+
p.append(
|
| 228 |
+
'<p class="note">No tasks found in the fixture inputs dataset.</p>'
|
| 229 |
+
)
|
| 230 |
+
p.append("</div>")
|
| 231 |
+
|
| 232 |
+
# Detail view
|
| 233 |
+
p.append('<div id="detail-view" style="display:none">')
|
| 234 |
+
p.append('<div class="nav-bar">')
|
| 235 |
+
p.append('<button onclick="showSummary()">← Summary</button>')
|
| 236 |
+
p.append(
|
| 237 |
+
'<button id="prev-btn" onclick="showDetail(currentIdx-1)">← Prev '
|
| 238 |
+
'<span class="kbd">k</span></button>'
|
| 239 |
+
)
|
| 240 |
+
p.append('<span id="fixture-label"></span>')
|
| 241 |
+
p.append(
|
| 242 |
+
'<button id="next-btn" onclick="showDetail(currentIdx+1)">Next '
|
| 243 |
+
'<span class="kbd">j</span> →</button>'
|
| 244 |
+
)
|
| 245 |
+
p.append("</div>")
|
| 246 |
+
for i, t in enumerate(tasks):
|
| 247 |
+
p.append(_render_task_card(t, i, asset_url))
|
| 248 |
+
p.append("</div>")
|
| 249 |
+
|
| 250 |
+
p.append(f"<script>window._fixtureNames = {fixture_names_js};\n{_JS}</script>")
|
| 251 |
+
p.append("</body></html>")
|
| 252 |
+
return "\n".join(p)
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
# ---------------------------------------------------------------------------
|
| 256 |
+
# CSS (ported from the per-submission report so the look matches exactly;
|
| 257 |
+
# trimmed to the surfaces this page uses + task-type pill colors).
|
| 258 |
+
# ---------------------------------------------------------------------------
|
| 259 |
+
|
| 260 |
+
_CSS = """\
|
| 261 |
+
* { box-sizing: border-box; }
|
| 262 |
+
body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
|
| 263 |
+
max-width: 1600px; margin: 0 auto; padding: 20px; background: #f8f9fa; }
|
| 264 |
+
h1 { border-bottom: 2px solid #333; padding-bottom: 8px; }
|
| 265 |
+
h2 { margin-top: 0; }
|
| 266 |
+
.tag { font-size: 0.6em; color: #666; font-weight: normal; font-family: monospace;
|
| 267 |
+
margin-left: 6px; }
|
| 268 |
+
|
| 269 |
+
.run-header { background: white; border-radius: 8px; padding: 16px 20px;
|
| 270 |
+
margin-bottom: 20px; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
|
| 271 |
+
.run-stats { margin-top: 8px; font-size: 0.95em; }
|
| 272 |
+
.run-stats span { margin-right: 20px; font-weight: 500; }
|
| 273 |
+
|
| 274 |
+
.summary-table { width: 100%; border-collapse: collapse; background: white;
|
| 275 |
+
border-radius: 8px; overflow: hidden;
|
| 276 |
+
box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
|
| 277 |
+
.summary-table th { background: #37474f; color: white; padding: 10px 12px;
|
| 278 |
+
text-align: left; font-size: 0.85em; text-transform: uppercase;
|
| 279 |
+
letter-spacing: 0.05em; }
|
| 280 |
+
.summary-table td { padding: 8px 12px; border-bottom: 1px solid #eee; font-size: 0.9em; }
|
| 281 |
+
.summary-table tr:hover { filter: brightness(0.97); background: #f5f5f5; }
|
| 282 |
+
|
| 283 |
+
.nav-bar { display: flex; align-items: center; gap: 12px; padding: 12px 16px;
|
| 284 |
+
background: white; border-radius: 8px; margin-bottom: 16px;
|
| 285 |
+
box-shadow: 0 1px 3px rgba(0,0,0,0.1); position: sticky; top: 0; z-index: 100; }
|
| 286 |
+
.nav-bar button { padding: 6px 14px; border: 1px solid #ccc; border-radius: 4px;
|
| 287 |
+
background: white; cursor: pointer; font-size: 0.9em; }
|
| 288 |
+
.nav-bar button:hover:not(:disabled) { background: #e3f2fd; }
|
| 289 |
+
.nav-bar button:disabled { opacity: 0.4; cursor: default; }
|
| 290 |
+
#fixture-label { flex: 1; text-align: center; font-weight: 600; }
|
| 291 |
+
.kbd { background: #eee; border: 1px solid #ccc; border-radius: 3px;
|
| 292 |
+
padding: 1px 5px; font-size: 0.75em; font-family: monospace; color: #555; }
|
| 293 |
+
|
| 294 |
+
.fixture-card { background: white; border-radius: 8px; padding: 28px 20px 36px;
|
| 295 |
+
box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
|
| 296 |
+
/* Single centered column: the prompt + input are the whole story. */
|
| 297 |
+
.task-body { max-width: 940px; margin: 0 auto; text-align: center; }
|
| 298 |
+
.card-title { margin-bottom: 16px; font-size: 1.5em; }
|
| 299 |
+
|
| 300 |
+
.task-prompt { font-size: 1.2em; line-height: 1.6; color: #222;
|
| 301 |
+
background: #fafafa; border: 1px solid #eee; border-radius: 10px;
|
| 302 |
+
padding: 20px 26px; margin: 0 auto 28px; max-width: 760px; }
|
| 303 |
+
.media-label { color: #607d8b; font-size: 0.8em; text-transform: uppercase;
|
| 304 |
+
letter-spacing: 0.06em; font-weight: 700; margin: 8px 0 12px; }
|
| 305 |
+
.note { color: #888; font-style: italic; font-size: 0.9em; }
|
| 306 |
+
.images { display: flex; gap: 12px; flex-wrap: wrap; margin: 8px 0;
|
| 307 |
+
justify-content: center; }
|
| 308 |
+
.view { text-align: center; }
|
| 309 |
+
.view img { max-height: 260px; border: 1px solid #ddd; border-radius: 4px;
|
| 310 |
+
background: #fff; }
|
| 311 |
+
.view span { display: block; font-size: 0.72em; color: #888; margin-top: 4px; }
|
| 312 |
+
.input-img { display: block; margin: 0 auto; max-height: 620px; max-width: 100%;
|
| 313 |
+
border: 1px solid #ddd; border-radius: 6px; }
|
| 314 |
+
|
| 315 |
+
/* Task-type pill colors */
|
| 316 |
+
.type-generation { background: #e3f2fd; color: #1565c0; padding: 2px 8px;
|
| 317 |
+
border-radius: 10px; font-weight: 600; }
|
| 318 |
+
.type-editing { background: #f3e5f5; color: #6a1b9a; padding: 2px 8px;
|
| 319 |
+
border-radius: 10px; font-weight: 600; }
|
| 320 |
+
"""
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
# ---------------------------------------------------------------------------
|
| 324 |
+
# JS (navigation ported verbatim from the report: showDetail / j-k-arrows /
|
| 325 |
+
# Esc / deep-link hash; the score-column sorter is dropped since there are
|
| 326 |
+
# no score columns).
|
| 327 |
+
# ---------------------------------------------------------------------------
|
| 328 |
+
|
| 329 |
+
_JS = """\
|
| 330 |
+
let currentIdx = -1;
|
| 331 |
+
const total = document.querySelectorAll('.fixture-card').length;
|
| 332 |
+
|
| 333 |
+
function taskImgFail(img) {
|
| 334 |
+
const view = img.closest('.view');
|
| 335 |
+
if (view) { view.style.display = 'none'; return; }
|
| 336 |
+
img.style.display = 'none';
|
| 337 |
+
}
|
| 338 |
+
|
| 339 |
+
function showSummary() {
|
| 340 |
+
document.getElementById('summary-view').style.display = '';
|
| 341 |
+
document.getElementById('detail-view').style.display = 'none';
|
| 342 |
+
currentIdx = -1;
|
| 343 |
+
}
|
| 344 |
+
|
| 345 |
+
function showDetail(idx) {
|
| 346 |
+
if (idx < 0 || idx >= total) return;
|
| 347 |
+
document.getElementById('summary-view').style.display = 'none';
|
| 348 |
+
document.getElementById('detail-view').style.display = '';
|
| 349 |
+
document.querySelectorAll('.fixture-card').forEach(c => c.style.display = 'none');
|
| 350 |
+
document.querySelectorAll('.fixture-card')[idx].style.display = '';
|
| 351 |
+
currentIdx = idx;
|
| 352 |
+
updateNav();
|
| 353 |
+
window.scrollTo(0, 0);
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
+
function updateNav() {
|
| 357 |
+
document.getElementById('prev-btn').disabled = (currentIdx <= 0);
|
| 358 |
+
document.getElementById('next-btn').disabled = (currentIdx >= total - 1);
|
| 359 |
+
const names = window._fixtureNames || [];
|
| 360 |
+
document.getElementById('fixture-label').textContent =
|
| 361 |
+
(currentIdx + 1) + ' / ' + total + ': ' + (names[currentIdx] || '');
|
| 362 |
+
}
|
| 363 |
+
|
| 364 |
+
document.addEventListener('keydown', function(e) {
|
| 365 |
+
if (e.target.tagName === 'INPUT' || e.target.tagName === 'TEXTAREA') return;
|
| 366 |
+
if (currentIdx === -1) return;
|
| 367 |
+
if (e.key === 'j' || e.key === 'ArrowRight') {
|
| 368 |
+
e.preventDefault(); showDetail(currentIdx + 1);
|
| 369 |
+
} else if (e.key === 'k' || e.key === 'ArrowLeft') {
|
| 370 |
+
e.preventDefault(); showDetail(currentIdx - 1);
|
| 371 |
+
} else if (e.key === 'Escape') {
|
| 372 |
+
e.preventDefault(); showSummary();
|
| 373 |
+
}
|
| 374 |
+
});
|
| 375 |
+
|
| 376 |
+
// Deep-link: opening at `#fixture=<name>` (or `#idx=<n>`) jumps straight
|
| 377 |
+
// to that task's detail card. Inert when there is no hash or no match.
|
| 378 |
+
function openHashTarget() {
|
| 379 |
+
const hash = (window.location.hash || '').replace(/^#/, '');
|
| 380 |
+
if (!hash) return;
|
| 381 |
+
const params = new URLSearchParams(hash);
|
| 382 |
+
const names = window._fixtureNames || [];
|
| 383 |
+
let idx = -1;
|
| 384 |
+
if (params.has('fixture')) {
|
| 385 |
+
idx = names.indexOf(params.get('fixture'));
|
| 386 |
+
} else if (params.has('idx')) {
|
| 387 |
+
idx = parseInt(params.get('idx'), 10);
|
| 388 |
+
}
|
| 389 |
+
if (idx >= 0 && idx < total) showDetail(idx);
|
| 390 |
+
}
|
| 391 |
+
openHashTarget();
|
| 392 |
+
window.addEventListener('hashchange', openHashTarget);
|
| 393 |
+
"""
|
tests/test_tasks.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2026 Hugging Face
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
"""Hermetic unit tests for the Task-browser page builder (``tasks.py``).
|
| 16 |
+
|
| 17 |
+
No network: fixtures are written to a tmp dir laid out like the inputs
|
| 18 |
+
dataset snapshot (``<fixture>/description.yaml``), then loaded + rendered.
|
| 19 |
+
"""
|
| 20 |
+
from __future__ import annotations
|
| 21 |
+
|
| 22 |
+
import textwrap
|
| 23 |
+
from pathlib import Path
|
| 24 |
+
|
| 25 |
+
from tasks import load_tasks_from_dir, render_tasks_page
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def _write_fixture(root: Path, name: str, body: str) -> None:
|
| 29 |
+
d = root / name
|
| 30 |
+
d.mkdir(parents=True)
|
| 31 |
+
(d / "description.yaml").write_text(textwrap.dedent(body))
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def test_load_tasks_generation_editing_and_multi_image(tmp_path: Path) -> None:
|
| 35 |
+
_write_fixture(
|
| 36 |
+
tmp_path, "101",
|
| 37 |
+
"""
|
| 38 |
+
description: Reproduce the geometry from the drawing.
|
| 39 |
+
input_files:
|
| 40 |
+
- input.png
|
| 41 |
+
""",
|
| 42 |
+
)
|
| 43 |
+
_write_fixture(
|
| 44 |
+
tmp_path, "127",
|
| 45 |
+
"""
|
| 46 |
+
description: Reproduce from the drawings.
|
| 47 |
+
input_files:
|
| 48 |
+
- input.png
|
| 49 |
+
- input2.png
|
| 50 |
+
""",
|
| 51 |
+
)
|
| 52 |
+
_write_fixture(
|
| 53 |
+
tmp_path, "201",
|
| 54 |
+
"""
|
| 55 |
+
description: Bring the pocket walls inward by 6mm.
|
| 56 |
+
task_type: editing
|
| 57 |
+
input_files:
|
| 58 |
+
- input.step
|
| 59 |
+
""",
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
tasks = load_tasks_from_dir(tmp_path)
|
| 63 |
+
|
| 64 |
+
# Sorted by fixture name for a stable order.
|
| 65 |
+
assert [t["name"] for t in tasks] == ["101", "127", "201"]
|
| 66 |
+
|
| 67 |
+
gen, multi, edit = tasks
|
| 68 |
+
assert gen["task_type"] == "generation" # defaults when unset
|
| 69 |
+
assert gen["image_inputs"] == ["input.png"]
|
| 70 |
+
assert gen["wants_shape"] is False
|
| 71 |
+
|
| 72 |
+
# Both drawings are carried for multi-image generation fixtures.
|
| 73 |
+
assert multi["image_inputs"] == ["input.png", "input2.png"]
|
| 74 |
+
|
| 75 |
+
# Editing fixtures ship a STEP -> shape renders, no inline drawing.
|
| 76 |
+
assert edit["task_type"] == "editing"
|
| 77 |
+
assert edit["wants_shape"] is True
|
| 78 |
+
assert edit["image_inputs"] == []
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def test_load_tasks_defaults_to_input_png_when_unlisted(tmp_path: Path) -> None:
|
| 82 |
+
_write_fixture(
|
| 83 |
+
tmp_path, "300",
|
| 84 |
+
"description: A part with no input_files listed.\n",
|
| 85 |
+
)
|
| 86 |
+
(task,) = load_tasks_from_dir(tmp_path)
|
| 87 |
+
assert task["image_inputs"] == ["input.png"]
|
| 88 |
+
assert task["wants_shape"] is False
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def test_render_tasks_page_structure_and_urls(tmp_path: Path) -> None:
|
| 92 |
+
_write_fixture(
|
| 93 |
+
tmp_path, "201",
|
| 94 |
+
"""
|
| 95 |
+
description: Bring the pocket walls inward by 6mm.
|
| 96 |
+
task_type: editing
|
| 97 |
+
input_files:
|
| 98 |
+
- input.step
|
| 99 |
+
""",
|
| 100 |
+
)
|
| 101 |
+
_write_fixture(
|
| 102 |
+
tmp_path, "127",
|
| 103 |
+
"""
|
| 104 |
+
description: Reproduce from the drawings.
|
| 105 |
+
input_files:
|
| 106 |
+
- input.png
|
| 107 |
+
- input2.png
|
| 108 |
+
""",
|
| 109 |
+
)
|
| 110 |
+
tasks = load_tasks_from_dir(tmp_path)
|
| 111 |
+
|
| 112 |
+
calls: list[tuple[str, str]] = []
|
| 113 |
+
|
| 114 |
+
def asset_url(fixture: str, relpath: str) -> str:
|
| 115 |
+
calls.append((fixture, relpath))
|
| 116 |
+
return f"/task-input/{fixture}/{relpath}"
|
| 117 |
+
|
| 118 |
+
doc = render_tasks_page(tasks, asset_url)
|
| 119 |
+
|
| 120 |
+
# Report-style navigation scaffolding is present.
|
| 121 |
+
assert 'id="summary-view"' in doc
|
| 122 |
+
assert 'id="detail-view"' in doc
|
| 123 |
+
assert "showDetail(" in doc
|
| 124 |
+
assert "window._fixtureNames" in doc
|
| 125 |
+
|
| 126 |
+
# Editing fixture references its starting-shape renders; generation
|
| 127 |
+
# multi-image fixture references both drawings.
|
| 128 |
+
assert ("201", "renders/iso.png") in calls
|
| 129 |
+
assert ("127", "input.png") in calls
|
| 130 |
+
assert ("127", "input2.png") in calls
|
| 131 |
+
|
| 132 |
+
# The prompt is rendered and HTML-escaped (no raw scores anywhere).
|
| 133 |
+
assert "Bring the pocket walls inward by 6mm." in doc
|
| 134 |
+
assert "Ground Truth" not in doc
|
| 135 |
+
assert "CAD Score" not in doc
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def test_render_tasks_page_empty(tmp_path: Path) -> None:
|
| 139 |
+
doc = render_tasks_page([], lambda f, r: "")
|
| 140 |
+
assert "No tasks found" in doc
|
tools/preview_tasks.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Local preview for the Tasks tab.
|
| 2 |
+
|
| 3 |
+
Renders the task-browser page from the *local* data clones
|
| 4 |
+
(``cadgenbench-data`` + ``cadgenbench-data-gt`` at the repo root) and
|
| 5 |
+
writes a standalone HTML file you can open in a browser. Image URLs are
|
| 6 |
+
relative paths into those local folders, so the output must live at the
|
| 7 |
+
workspace root for the relative paths to resolve.
|
| 8 |
+
|
| 9 |
+
Usage::
|
| 10 |
+
|
| 11 |
+
python cadgenbench-leaderboard/tools/preview_tasks.py
|
| 12 |
+
# writes <workspace>/tasks-preview.html
|
| 13 |
+
"""
|
| 14 |
+
from __future__ import annotations
|
| 15 |
+
|
| 16 |
+
import sys
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
|
| 19 |
+
# Make `tasks` importable when run from anywhere.
|
| 20 |
+
LEADERBOARD_DIR = Path(__file__).resolve().parent.parent
|
| 21 |
+
sys.path.insert(0, str(LEADERBOARD_DIR))
|
| 22 |
+
|
| 23 |
+
from tasks import load_tasks_from_dir, render_tasks_page # noqa: E402
|
| 24 |
+
|
| 25 |
+
WORKSPACE = LEADERBOARD_DIR.parent
|
| 26 |
+
INPUTS_DIR = WORKSPACE / "cadgenbench-data"
|
| 27 |
+
GT_DIR = WORKSPACE / "cadgenbench-data-gt"
|
| 28 |
+
OUT = WORKSPACE / "tasks-preview.html"
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def main() -> int:
|
| 32 |
+
tasks = load_tasks_from_dir(INPUTS_DIR)
|
| 33 |
+
|
| 34 |
+
def asset_url(fixture: str, relpath: str) -> str:
|
| 35 |
+
return f"cadgenbench-data/{fixture}/{relpath}"
|
| 36 |
+
|
| 37 |
+
doc = render_tasks_page(tasks, asset_url)
|
| 38 |
+
OUT.write_text(doc)
|
| 39 |
+
n_edit = sum(1 for t in tasks if t["task_type"] == "editing")
|
| 40 |
+
print(
|
| 41 |
+
f"Wrote {OUT} ({len(tasks)} tasks, {n_edit} editing, "
|
| 42 |
+
f"{OUT.stat().st_size // 1024} KB)"
|
| 43 |
+
)
|
| 44 |
+
return 0
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
if __name__ == "__main__":
|
| 48 |
+
raise SystemExit(main())
|