CADGenBench / tools /generate_gt_turntables.py
Michael Rabinovich
Serve rotating WebP turntables + GT generator
c1cb5e4
#!/usr/bin/env python3
"""Generate the permanent ground-truth turntable WebPs (one-time work).
Each GT fixture in ``cadgenbench-data-gt`` ships a trusted mesh sidecar
(``ground_truth.mesh.npz``) next to its ``ground_truth.step``. This tool loads
that **cached mesh** via :class:`cadgenbench.common.artifacts.StepArtifacts`
(so it never re-tessellates) and renders the same rotating WebP the eval
pipeline produces for submissions, then commits each to
``<fixture>/renders/rotating.webp`` in the GT dataset.
GT renders are a property of the data revision, not of any submission, so this
runs once (and again only when the GT geometry changes). The shared renderer
(:func:`cadgenbench.common.viewer.render_mesh_turntable_webp`) guarantees GT and
candidate turntables look identical (same material, lighting, framing, speed).
Run locally (against a checkout)::
python tools/generate_gt_turntables.py --gt-root ../cadgenbench-data-gt --upload
Or on an HF Job (GPU, no checkout — downloads STEP + sidecar from the Hub).
After the eval-gpu image is rebuilt with the updated ``cadgenbench`` package,
dispatch this file's contents on that image, e.g. with the Python API::
from huggingface_hub import run_job
run_job(
image="hf.co/spaces/HuggingAI4Engineering/cadgenbench-eval-gpu",
command=["python", "-c", Path("generate_gt_turntables.py").read_text()
+ "\nimport sys; sys.exit(main())"],
flavor="a10g-large",
namespace="michaelr27",
secrets={"HF_TOKEN": "<write-token>"},
timeout="30m",
)
(The job needs a **write**-scoped token for the private GT dataset.)
"""
from __future__ import annotations
import argparse
import os
import sys
import tempfile
from pathlib import Path
from huggingface_hub import CommitOperationAdd, HfApi, hf_hub_download
# Allow running straight from the repo without installing the leaderboard pkg;
# cadgenbench itself must be importable (installed in the env / eval-gpu image).
_REPO_ROOT = Path(__file__).resolve().parents[2]
_SRC = _REPO_ROOT / "cadgenbench" / "src"
if _SRC.is_dir():
sys.path.insert(0, str(_SRC))
from cadgenbench.common.artifacts import StepArtifacts # noqa: E402
from cadgenbench.common.viewer import render_mesh_turntable_webp # noqa: E402
GT_STEP_NAME = "ground_truth.step"
GT_SIDECAR_NAME = "ground_truth.mesh.npz"
RENDER_PATH_IN_FIXTURE = "renders/rotating.webp"
# One commit per this many files: ~80 GT fixtures fit in a single commit, but
# chunking keeps an individual commit small and rate-limit friendly.
COMMIT_CHUNK = 60
def _default_repo_id() -> str:
return os.getenv(
"HF_DATA_GT_REPO",
f"{os.getenv('HF_ORG', 'HuggingAI4Engineering')}/cadgenbench-data-gt",
)
def _fixture_ids(api: HfApi, repo_id: str, gt_root: Path | None) -> list[str]:
"""Fixture ids that have a ``ground_truth.step`` (local checkout or Hub)."""
if gt_root is not None:
ids = [
p.name for p in gt_root.iterdir()
if p.is_dir() and (p / GT_STEP_NAME).is_file()
]
else:
files = api.list_repo_files(repo_id, repo_type="dataset")
ids = [
f.split("/", 1)[0] for f in files if f.endswith("/" + GT_STEP_NAME)
]
return sorted(set(ids), key=lambda s: (len(s), s))
def _materialize_fixture(
api: HfApi,
repo_id: str,
fixture: str,
gt_root: Path | None,
cache_dir: Path,
token: str | None,
) -> Path:
"""Return a local dir holding this fixture's STEP + trusted mesh sidecar.
The sidecar must sit next to the STEP so ``StepArtifacts`` takes the
trusted-mesh path (no tessellation, no validation).
"""
if gt_root is not None:
return gt_root / fixture
dest = cache_dir / fixture
dest.mkdir(parents=True, exist_ok=True)
for name in (GT_STEP_NAME, GT_SIDECAR_NAME):
local = hf_hub_download(
repo_id=repo_id,
filename=f"{fixture}/{name}",
repo_type="dataset",
token=token,
)
target = dest / name
if not target.exists():
target.write_bytes(Path(local).read_bytes())
return dest
def _render_fixture_webp(fixture_dir: Path) -> bytes:
"""Render the turntable WebP from the fixture's cached GT mesh."""
art = StepArtifacts(fixture_dir / GT_STEP_NAME, is_ground_truth=True)
mesh = art.mesh() # trusted sidecar -> no tessellation
return render_mesh_turntable_webp(mesh)
def _commit_in_chunks(
api: HfApi, repo_id: str, ops: list[CommitOperationAdd],
) -> None:
for i in range(0, len(ops), COMMIT_CHUNK):
chunk = ops[i:i + COMMIT_CHUNK]
api.create_commit(
repo_id=repo_id,
repo_type="dataset",
operations=chunk,
commit_message=(
f"add GT turntable webp(s) [{i + 1}-{i + len(chunk)}]"
),
)
print(f" committed {len(chunk)} file(s)", flush=True)
def main() -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--gt-root", type=Path, default=None,
help="Local cadgenbench-data-gt checkout. Omit to download from the Hub.",
)
parser.add_argument("--repo-id", default=_default_repo_id())
parser.add_argument(
"--fixtures",
help="Comma-separated fixture ids. Omit for every GT fixture.",
)
parser.add_argument("--limit", type=int, default=None)
parser.add_argument(
"--out-dir", type=Path, default=None,
help="Also write each webp here (e.g. for local inspection).",
)
parser.add_argument(
"--no-upload", action="store_true",
help="Render only; do not commit to the GT dataset.",
)
args = parser.parse_args()
token = os.environ.get("HF_TOKEN")
api = HfApi(token=token)
gt_root = args.gt_root.resolve() if args.gt_root else None
if gt_root is not None and not gt_root.is_dir():
parser.error(f"--gt-root does not exist: {gt_root}")
fixtures = _fixture_ids(api, args.repo_id, gt_root)
if args.fixtures:
wanted = {f.strip() for f in args.fixtures.split(",") if f.strip()}
fixtures = [f for f in fixtures if f in wanted]
if args.limit is not None:
fixtures = fixtures[: args.limit]
if not fixtures:
parser.error("No GT fixtures matched.")
if not args.no_upload and not token:
parser.error("HF_TOKEN required to upload (or pass --no-upload).")
print(
f"Rendering {len(fixtures)} GT turntable(s) -> {args.repo_id}"
+ ("" if args.no_upload else " (will upload)"),
flush=True,
)
ops: list[CommitOperationAdd] = []
with tempfile.TemporaryDirectory(prefix="gt-turntable-") as tmp:
cache_dir = Path(tmp)
for i, fixture in enumerate(fixtures, start=1):
print(f"[{i}/{len(fixtures)}] {fixture}", flush=True)
fixture_dir = _materialize_fixture(
api, args.repo_id, fixture, gt_root, cache_dir, token,
)
webp = _render_fixture_webp(fixture_dir)
if args.out_dir is not None:
local_out = args.out_dir / fixture / "rotating.webp"
local_out.parent.mkdir(parents=True, exist_ok=True)
local_out.write_bytes(webp)
ops.append(
CommitOperationAdd(
path_in_repo=f"{fixture}/{RENDER_PATH_IN_FIXTURE}",
path_or_fileobj=webp,
)
)
if args.no_upload:
print(f"Rendered {len(ops)} webp(s); upload skipped.", flush=True)
return 0
print(f"Uploading {len(ops)} webp(s) to {args.repo_id}…", flush=True)
_commit_in_chunks(api, args.repo_id, ops)
print("Done.", flush=True)
return 0
if __name__ == "__main__":
raise SystemExit(main())