#!/usr/bin/env python3 """Generate the ground-truth "answer key" edit-diff turntables (editing fixtures). For each *editing* fixture (one that ships an ``input.step`` seed) this renders the reference companion to the per-submission edit diff: the GT drawn as a translucent ghost with the **correct change painted blue** (added material on the GT body, removed material as a blue phantom of the input). See :func:`cadgenbench.common.edit_diff.build_gt_edit_diff_shapes`. Like :mod:`generate_gt_turntables`, the result is a property of the **data revision** (GT vs input), not of any submission, so this runs once per data revision and both the gallery's ground-truth row and every per-submission report reference the same webp via the GT proxy. One clip is written per fixture: - ``/renders/edit_diff_gt.webp`` -- full turntable. The GT mesh comes from the trusted sidecar (no tessellation); the input mesh is tessellated once at the GT's deflection so the GT-vs-input edit region is found at one consistent scale (mirrors the eval's ``_editing_input_mesh``). Run locally (against checkouts), render only:: python tools/generate_gt_edit_diff.py \ --gt-root ../cadgenbench-data-gt --inputs-root ../cadgenbench-data \ --out-dir ../out/gt_edit_diff --no-upload Add ``--upload`` (and an ``HF_TOKEN`` with **write** scope on the private GT dataset) to commit the webps, or run it on an HF GPU job exactly like ``generate_gt_turntables.py``. """ from __future__ import annotations import argparse import os import subprocess import sys import tempfile from pathlib import Path from huggingface_hub import CommitOperationAdd, HfApi, hf_hub_download # Allow running straight from the repo without installing the leaderboard pkg; # cadgenbench itself must be importable (installed in the env / eval-gpu image). _REPO_ROOT = Path(__file__).resolve().parents[2] _SRC = _REPO_ROOT / "cadgenbench" / "src" if _SRC.is_dir(): sys.path.insert(0, str(_SRC)) from cadgenbench.common.artifacts import StepArtifacts # noqa: E402 from cadgenbench.common.edit_diff import render_gt_edit_diff_turntable # noqa: E402 GT_STEP_NAME = "ground_truth.step" GT_SIDECAR_NAME = "ground_truth.mesh.npz" INPUT_STEP_NAME = "input.step" FULL_NAME = "renders/edit_diff_gt.webp" # One commit per this many files: keeps an individual commit small and # rate-limit friendly. COMMIT_CHUNK = 60 def _default_repo_id() -> str: return os.getenv( "HF_DATA_GT_REPO", f"{os.getenv('HF_ORG', 'HuggingAI4Engineering')}/cadgenbench-data-gt", ) def _default_inputs_repo_id() -> str: return os.getenv( "HF_DATA_REPO", f"{os.getenv('HF_ORG', 'HuggingAI4Engineering')}/cadgenbench-data", ) def _editing_fixture_ids( api: HfApi, gt_repo: str, inputs_repo: str, gt_root: Path | None, inputs_root: Path | None, ) -> list[str]: """Fixture ids with BOTH a ``ground_truth.step`` and an ``input.step``. The ``input.step`` is what defines an editing fixture, so the intersection of the two repos (or two checkouts) is exactly the editing set. """ if gt_root is not None: gt_ids = { p.name for p in gt_root.iterdir() if p.is_dir() and (p / GT_STEP_NAME).is_file() } else: files = api.list_repo_files(gt_repo, repo_type="dataset") gt_ids = {f.split("/", 1)[0] for f in files if f.endswith("/" + GT_STEP_NAME)} if inputs_root is not None: in_ids = { p.name for p in inputs_root.iterdir() if p.is_dir() and (p / INPUT_STEP_NAME).is_file() } else: files = api.list_repo_files(inputs_repo, repo_type="dataset") in_ids = {f.split("/", 1)[0] for f in files if f.endswith("/" + INPUT_STEP_NAME)} return sorted(gt_ids & in_ids, key=lambda s: (len(s), s)) def _materialize_gt( api: HfApi, repo_id: str, fixture: str, gt_root: Path | None, cache_dir: Path, token: str | None, ) -> Path: """Local dir holding this fixture's GT STEP + trusted mesh sidecar. The sidecar must sit next to the STEP so ``StepArtifacts`` takes the trusted-mesh path (no tessellation, no validation). """ if gt_root is not None: return gt_root / fixture dest = cache_dir / "gt" / fixture dest.mkdir(parents=True, exist_ok=True) for name in (GT_STEP_NAME, GT_SIDECAR_NAME): local = hf_hub_download( repo_id=repo_id, filename=f"{fixture}/{name}", repo_type="dataset", token=token, ) target = dest / name if not target.exists(): target.write_bytes(Path(local).read_bytes()) return dest def _materialize_input( api: HfApi, repo_id: str, fixture: str, inputs_root: Path | None, cache_dir: Path, token: str | None, ) -> Path: """Local path to this fixture's ``input.step`` (checkout or Hub download).""" if inputs_root is not None: return inputs_root / fixture / INPUT_STEP_NAME local = hf_hub_download( repo_id=repo_id, filename=f"{fixture}/{INPUT_STEP_NAME}", repo_type="dataset", token=token, ) return Path(local) def _render_fixture(gt_dir: Path, input_step: Path) -> bytes: """Render the full answer-key turntable WebP for one editing fixture.""" gt_mesh = StepArtifacts(gt_dir / GT_STEP_NAME, is_ground_truth=True).mesh() input_mesh = StepArtifacts( input_step, deflection_override=gt_mesh.linear_deflection_mm, ).mesh() return render_gt_edit_diff_turntable(gt_mesh, input_mesh) def _commit_in_chunks(api: HfApi, repo_id: str, ops: list[CommitOperationAdd]) -> None: for i in range(0, len(ops), COMMIT_CHUNK): chunk = ops[i:i + COMMIT_CHUNK] api.create_commit( repo_id=repo_id, repo_type="dataset", operations=chunk, commit_message=f"add GT edit-diff answer-key webp(s) [{i + 1}-{i + len(chunk)}]", ) print(f" committed {len(chunk)} file(s)", flush=True) def _resolved_fixtures( parser: argparse.ArgumentParser, args: argparse.Namespace, api: HfApi, gt_root: Path | None, inputs_root: Path | None, ) -> list[str]: fixtures = _editing_fixture_ids( api, args.repo_id, args.inputs_repo_id, gt_root, inputs_root, ) if args.fixtures: wanted = {f.strip() for f in args.fixtures.split(",") if f.strip()} fixtures = [f for f in fixtures if f in wanted] if args.limit is not None: fixtures = fixtures[: args.limit] if not fixtures: parser.error("No editing fixtures matched.") return fixtures def _upload_from_out_dir(api: HfApi, repo_id: str, out_dir: Path, fixtures: list[str]) -> None: """Commit already-rendered webps/pngs under *out_dir* to the GT dataset.""" ops: list[CommitOperationAdd] = [] for fixture in fixtures: local = out_dir / fixture / "renders" / "edit_diff_gt.webp" if local.exists(): ops.append(CommitOperationAdd(f"{fixture}/{FULL_NAME}", local.read_bytes())) if not ops: print("Nothing to upload (no rendered files found in --out-dir).", flush=True) return print(f"Uploading {len(ops)} file(s) to {repo_id} ...", flush=True) _commit_in_chunks(api, repo_id, ops) def _run_upload_only(parser: argparse.ArgumentParser, args: argparse.Namespace) -> int: """Commit already-rendered ``edit_diff_gt.webp`` files from --out-dir.""" if args.out_dir is None or not args.out_dir.is_dir(): parser.error("--upload-only requires an existing --out-dir.") token = os.environ.get("HF_TOKEN") api = HfApi(token=token) # falls back to the stored CLI token when env unset out_dir = args.out_dir.resolve() fixtures = sorted( (p.parent.parent.name for p in out_dir.glob("*/renders/edit_diff_gt.webp")), key=lambda s: (len(s), s), ) if not fixtures: parser.error(f"No edit_diff_gt.webp found under {out_dir}") print(f"Uploading {len(fixtures)} fixture webp(s) from {out_dir} -> {args.repo_id}", flush=True) print(f"FIXTURES: {' '.join(fixtures)}", flush=True) _upload_from_out_dir(api, args.repo_id, out_dir, fixtures) print("Done.", flush=True) return 0 def _run_isolated(parser: argparse.ArgumentParser, args: argparse.Namespace) -> int: """Render each fixture in a fresh subprocess (one fixture == ~240 plotters). Spawns this same tool with ``--fixtures --no-upload`` per fixture so the GL context is fully released between fixtures, then (optionally) uploads once from ``--out-dir``. Worker stdout/stderr inherit the parent's, so progress and the VTK noise land in the same streams the non-isolated path uses. """ if args.out_dir is None: parser.error("--isolate requires --out-dir (workers render to disk).") token = os.environ.get("HF_TOKEN") if not args.no_upload and not token: parser.error("HF_TOKEN required to upload (or pass --no-upload).") api = HfApi(token=token) gt_root = args.gt_root.resolve() if args.gt_root else None inputs_root = args.inputs_root.resolve() if args.inputs_root else None for label, root in (("--gt-root", gt_root), ("--inputs-root", inputs_root)): if root is not None and not root.is_dir(): parser.error(f"{label} does not exist: {root}") fixtures = _resolved_fixtures(parser, args, api, gt_root, inputs_root) print(f"Isolated render of {len(fixtures)} editing fixture(s) (one subprocess each).", flush=True) print(f"FIXTURES: {' '.join(fixtures)}", flush=True) base_cmd = [sys.executable, str(Path(__file__).resolve()), "--out-dir", str(args.out_dir), "--no-upload", "--repo-id", args.repo_id, "--inputs-repo-id", args.inputs_repo_id] if gt_root is not None: base_cmd += ["--gt-root", str(gt_root)] if inputs_root is not None: base_cmd += ["--inputs-root", str(inputs_root)] failures: list[str] = [] for i, fixture in enumerate(fixtures, start=1): print(f"=== [{i}/{len(fixtures)}] {fixture} ===", flush=True) proc = subprocess.run([*base_cmd, "--fixtures", fixture]) # noqa: S603, PLW1510 if proc.returncode != 0: failures.append(fixture) done = len(fixtures) - len(failures) print(f"Isolated render complete: {done}/{len(fixtures)} ok, {len(failures)} failed.", flush=True) if failures: print(f"FAILED: {' '.join(failures)}", flush=True) if not args.no_upload: _upload_from_out_dir(api, args.repo_id, args.out_dir, fixtures) print("Done.", flush=True) return 1 if failures else 0 def main() -> int: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--gt-root", type=Path, default=None, help="Local cadgenbench-data-gt checkout. Omit to download from the Hub.", ) parser.add_argument( "--inputs-root", type=Path, default=None, help="Local cadgenbench-data checkout (holds input.step). Omit for Hub.", ) parser.add_argument("--repo-id", default=_default_repo_id()) parser.add_argument("--inputs-repo-id", default=_default_inputs_repo_id()) parser.add_argument("--fixtures", help="Comma-separated fixture ids. Omit for all editing fixtures.") parser.add_argument("--limit", type=int, default=None) parser.add_argument( "--out-dir", type=Path, default=None, help="Also write each webp/png here (e.g. for local inspection).", ) parser.add_argument( "--no-upload", action="store_true", help="Render only; do not commit to the GT dataset.", ) parser.add_argument( "--upload-only", action="store_true", help=( "Skip rendering; commit the ``edit_diff_gt.webp`` files already under " "--out-dir to the GT dataset. Use after an isolated render run." ), ) parser.add_argument( "--isolate", action="store_true", help=( "Render each fixture in its own subprocess. Works around macOS " "offscreen VTK losing its GL context after many sequential Plotter " "create/close cycles (not needed on the Linux EGL eval job). Implies " "render-to-out-dir; upload, if requested, runs once from --out-dir." ), ) args = parser.parse_args() if args.upload_only: return _run_upload_only(parser, args) if args.isolate: return _run_isolated(parser, args) token = os.environ.get("HF_TOKEN") api = HfApi(token=token) gt_root = args.gt_root.resolve() if args.gt_root else None inputs_root = args.inputs_root.resolve() if args.inputs_root else None for label, root in (("--gt-root", gt_root), ("--inputs-root", inputs_root)): if root is not None and not root.is_dir(): parser.error(f"{label} does not exist: {root}") fixtures = _editing_fixture_ids( api, args.repo_id, args.inputs_repo_id, gt_root, inputs_root, ) if args.fixtures: wanted = {f.strip() for f in args.fixtures.split(",") if f.strip()} fixtures = [f for f in fixtures if f in wanted] if args.limit is not None: fixtures = fixtures[: args.limit] if not fixtures: parser.error("No editing fixtures matched.") if not args.no_upload and not token: parser.error("HF_TOKEN required to upload (or pass --no-upload).") print( f"Rendering {len(fixtures)} editing GT answer-key turntable(s)" + ("" if args.no_upload else f" -> {args.repo_id} (will upload)"), flush=True, ) print(f"FIXTURES: {' '.join(fixtures)}", flush=True) ops: list[CommitOperationAdd] = [] failures: list[str] = [] with tempfile.TemporaryDirectory(prefix="gt-edit-diff-") as tmp: cache_dir = Path(tmp) for i, fixture in enumerate(fixtures, start=1): print(f"[{i}/{len(fixtures)}] {fixture} ...", flush=True) try: gt_dir = _materialize_gt( api, args.repo_id, fixture, gt_root, cache_dir, token, ) input_step = _materialize_input( api, args.inputs_repo_id, fixture, inputs_root, cache_dir, token, ) full = _render_fixture(gt_dir, input_step) except Exception as e: # noqa: BLE001 - log and keep going print(f" FAILED {type(e).__name__}: {e}", flush=True) failures.append(fixture) continue print(f" ok: full={len(full) // 1024}KB", flush=True) if args.out_dir is not None: fx_out = args.out_dir / fixture / "renders" fx_out.mkdir(parents=True, exist_ok=True) (fx_out / "edit_diff_gt.webp").write_bytes(full) ops.append(CommitOperationAdd(f"{fixture}/{FULL_NAME}", full)) done = len(fixtures) - len(failures) print( f"Rendered {done}/{len(fixtures)} fixture(s) ({len(failures)} failed).", flush=True, ) if failures: print(f"FAILED: {' '.join(failures)}", flush=True) if args.no_upload: print("Upload skipped (--no-upload).", flush=True) return 1 if failures else 0 print(f"Uploading {len(ops)} file(s) to {args.repo_id} ...", flush=True) _commit_in_chunks(api, args.repo_id, ops) print("Done.", flush=True) return 1 if failures else 0 if __name__ == "__main__": raise SystemExit(main())