#!/usr/bin/env python3 """Generate the permanent ground-truth turntable WebPs (one-time work). Each GT fixture in ``cadgenbench-data-gt`` ships a trusted mesh sidecar (``ground_truth.mesh.npz``) next to its ``ground_truth.step``. This tool loads that **cached mesh** via :class:`cadgenbench.common.artifacts.StepArtifacts` (so it never re-tessellates) and renders the same rotating WebP the eval pipeline produces for submissions, then commits each to ``/renders/rotating.webp`` in the GT dataset. GT renders are a property of the data revision, not of any submission, so this runs once (and again only when the GT geometry changes). The shared renderer (:func:`cadgenbench.common.viewer.render_mesh_turntable_webp`) guarantees GT and candidate turntables look identical (same material, lighting, framing, speed). Run locally (against a checkout):: python tools/generate_gt_turntables.py --gt-root ../cadgenbench-data-gt --upload Or on an HF Job (GPU, no checkout — downloads STEP + sidecar from the Hub). After the eval-gpu image is rebuilt with the updated ``cadgenbench`` package, dispatch this file's contents on that image, e.g. with the Python API:: from huggingface_hub import run_job run_job( image="hf.co/spaces/HuggingAI4Engineering/cadgenbench-eval-gpu", command=["python", "-c", Path("generate_gt_turntables.py").read_text() + "\nimport sys; sys.exit(main())"], flavor="a10g-large", namespace="michaelr27", secrets={"HF_TOKEN": ""}, timeout="30m", ) (The job needs a **write**-scoped token for the private GT dataset.) """ from __future__ import annotations import argparse import os import sys import tempfile from pathlib import Path from huggingface_hub import CommitOperationAdd, HfApi, hf_hub_download # Allow running straight from the repo without installing the leaderboard pkg; # cadgenbench itself must be importable (installed in the env / eval-gpu image). _REPO_ROOT = Path(__file__).resolve().parents[2] _SRC = _REPO_ROOT / "cadgenbench" / "src" if _SRC.is_dir(): sys.path.insert(0, str(_SRC)) from cadgenbench.common.artifacts import StepArtifacts # noqa: E402 from cadgenbench.common.viewer import render_mesh_turntable_webp # noqa: E402 GT_STEP_NAME = "ground_truth.step" GT_SIDECAR_NAME = "ground_truth.mesh.npz" RENDER_PATH_IN_FIXTURE = "renders/rotating.webp" # One commit per this many files: ~80 GT fixtures fit in a single commit, but # chunking keeps an individual commit small and rate-limit friendly. COMMIT_CHUNK = 60 def _default_repo_id() -> str: return os.getenv( "HF_DATA_GT_REPO", f"{os.getenv('HF_ORG', 'HuggingAI4Engineering')}/cadgenbench-data-gt", ) def _fixture_ids(api: HfApi, repo_id: str, gt_root: Path | None) -> list[str]: """Fixture ids that have a ``ground_truth.step`` (local checkout or Hub).""" if gt_root is not None: ids = [ p.name for p in gt_root.iterdir() if p.is_dir() and (p / GT_STEP_NAME).is_file() ] else: files = api.list_repo_files(repo_id, repo_type="dataset") ids = [ f.split("/", 1)[0] for f in files if f.endswith("/" + GT_STEP_NAME) ] return sorted(set(ids), key=lambda s: (len(s), s)) def _materialize_fixture( api: HfApi, repo_id: str, fixture: str, gt_root: Path | None, cache_dir: Path, token: str | None, ) -> Path: """Return a local dir holding this fixture's STEP + trusted mesh sidecar. The sidecar must sit next to the STEP so ``StepArtifacts`` takes the trusted-mesh path (no tessellation, no validation). """ if gt_root is not None: return gt_root / fixture dest = cache_dir / fixture dest.mkdir(parents=True, exist_ok=True) for name in (GT_STEP_NAME, GT_SIDECAR_NAME): local = hf_hub_download( repo_id=repo_id, filename=f"{fixture}/{name}", repo_type="dataset", token=token, ) target = dest / name if not target.exists(): target.write_bytes(Path(local).read_bytes()) return dest def _render_fixture_webp(fixture_dir: Path) -> bytes: """Render the turntable WebP from the fixture's cached GT mesh.""" art = StepArtifacts(fixture_dir / GT_STEP_NAME, is_ground_truth=True) mesh = art.mesh() # trusted sidecar -> no tessellation return render_mesh_turntable_webp(mesh) def _commit_in_chunks( api: HfApi, repo_id: str, ops: list[CommitOperationAdd], ) -> None: for i in range(0, len(ops), COMMIT_CHUNK): chunk = ops[i:i + COMMIT_CHUNK] api.create_commit( repo_id=repo_id, repo_type="dataset", operations=chunk, commit_message=( f"add GT turntable webp(s) [{i + 1}-{i + len(chunk)}]" ), ) print(f" committed {len(chunk)} file(s)", flush=True) def main() -> int: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--gt-root", type=Path, default=None, help="Local cadgenbench-data-gt checkout. Omit to download from the Hub.", ) parser.add_argument("--repo-id", default=_default_repo_id()) parser.add_argument( "--fixtures", help="Comma-separated fixture ids. Omit for every GT fixture.", ) parser.add_argument("--limit", type=int, default=None) parser.add_argument( "--out-dir", type=Path, default=None, help="Also write each webp here (e.g. for local inspection).", ) parser.add_argument( "--no-upload", action="store_true", help="Render only; do not commit to the GT dataset.", ) args = parser.parse_args() token = os.environ.get("HF_TOKEN") api = HfApi(token=token) gt_root = args.gt_root.resolve() if args.gt_root else None if gt_root is not None and not gt_root.is_dir(): parser.error(f"--gt-root does not exist: {gt_root}") fixtures = _fixture_ids(api, args.repo_id, gt_root) if args.fixtures: wanted = {f.strip() for f in args.fixtures.split(",") if f.strip()} fixtures = [f for f in fixtures if f in wanted] if args.limit is not None: fixtures = fixtures[: args.limit] if not fixtures: parser.error("No GT fixtures matched.") if not args.no_upload and not token: parser.error("HF_TOKEN required to upload (or pass --no-upload).") print( f"Rendering {len(fixtures)} GT turntable(s) -> {args.repo_id}" + ("" if args.no_upload else " (will upload)"), flush=True, ) ops: list[CommitOperationAdd] = [] with tempfile.TemporaryDirectory(prefix="gt-turntable-") as tmp: cache_dir = Path(tmp) for i, fixture in enumerate(fixtures, start=1): print(f"[{i}/{len(fixtures)}] {fixture}", flush=True) fixture_dir = _materialize_fixture( api, args.repo_id, fixture, gt_root, cache_dir, token, ) webp = _render_fixture_webp(fixture_dir) if args.out_dir is not None: local_out = args.out_dir / fixture / "rotating.webp" local_out.parent.mkdir(parents=True, exist_ok=True) local_out.write_bytes(webp) ops.append( CommitOperationAdd( path_in_repo=f"{fixture}/{RENDER_PATH_IN_FIXTURE}", path_or_fileobj=webp, ) ) if args.no_upload: print(f"Rendered {len(ops)} webp(s); upload skipped.", flush=True) return 0 print(f"Uploading {len(ops)} webp(s) to {args.repo_id}…", flush=True) _commit_in_chunks(api, args.repo_id, ops) print("Done.", flush=True) return 0 if __name__ == "__main__": raise SystemExit(main())