File size: 15,555 Bytes
49e27be | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 | #!/usr/bin/env python3
"""Generate the ground-truth "answer key" edit-diff turntables (editing fixtures).
For each *editing* fixture (one that ships an ``input.step`` seed) this renders
the reference companion to the per-submission edit diff: the GT drawn as a
translucent ghost with the **correct change painted blue** (added material on the
GT body, removed material as a blue phantom of the input). See
:func:`cadgenbench.common.edit_diff.build_gt_edit_diff_shapes`.
Like :mod:`generate_gt_turntables`, the result is a property of the **data
revision** (GT vs input), not of any submission, so this runs once per data
revision and both the gallery's ground-truth row and every per-submission report
reference the same webp via the GT proxy. One clip is written per fixture:
- ``<fixture>/renders/edit_diff_gt.webp`` -- full turntable.
The GT mesh comes from the trusted sidecar (no tessellation); the input mesh is
tessellated once at the GT's deflection so the GT-vs-input edit region is found
at one consistent scale (mirrors the eval's ``_editing_input_mesh``).
Run locally (against checkouts), render only::
python tools/generate_gt_edit_diff.py \
--gt-root ../cadgenbench-data-gt --inputs-root ../cadgenbench-data \
--out-dir ../out/gt_edit_diff --no-upload
Add ``--upload`` (and an ``HF_TOKEN`` with **write** scope on the private GT
dataset) to commit the webps, or run it on an HF GPU job exactly like
``generate_gt_turntables.py``.
"""
from __future__ import annotations
import argparse
import os
import subprocess
import sys
import tempfile
from pathlib import Path
from huggingface_hub import CommitOperationAdd, HfApi, hf_hub_download
# Allow running straight from the repo without installing the leaderboard pkg;
# cadgenbench itself must be importable (installed in the env / eval-gpu image).
_REPO_ROOT = Path(__file__).resolve().parents[2]
_SRC = _REPO_ROOT / "cadgenbench" / "src"
if _SRC.is_dir():
sys.path.insert(0, str(_SRC))
from cadgenbench.common.artifacts import StepArtifacts # noqa: E402
from cadgenbench.common.edit_diff import render_gt_edit_diff_turntable # noqa: E402
GT_STEP_NAME = "ground_truth.step"
GT_SIDECAR_NAME = "ground_truth.mesh.npz"
INPUT_STEP_NAME = "input.step"
FULL_NAME = "renders/edit_diff_gt.webp"
# One commit per this many files: keeps an individual commit small and
# rate-limit friendly.
COMMIT_CHUNK = 60
def _default_repo_id() -> str:
return os.getenv(
"HF_DATA_GT_REPO",
f"{os.getenv('HF_ORG', 'HuggingAI4Engineering')}/cadgenbench-data-gt",
)
def _default_inputs_repo_id() -> str:
return os.getenv(
"HF_DATA_REPO",
f"{os.getenv('HF_ORG', 'HuggingAI4Engineering')}/cadgenbench-data",
)
def _editing_fixture_ids(
api: HfApi,
gt_repo: str,
inputs_repo: str,
gt_root: Path | None,
inputs_root: Path | None,
) -> list[str]:
"""Fixture ids with BOTH a ``ground_truth.step`` and an ``input.step``.
The ``input.step`` is what defines an editing fixture, so the intersection
of the two repos (or two checkouts) is exactly the editing set.
"""
if gt_root is not None:
gt_ids = {
p.name for p in gt_root.iterdir()
if p.is_dir() and (p / GT_STEP_NAME).is_file()
}
else:
files = api.list_repo_files(gt_repo, repo_type="dataset")
gt_ids = {f.split("/", 1)[0] for f in files if f.endswith("/" + GT_STEP_NAME)}
if inputs_root is not None:
in_ids = {
p.name for p in inputs_root.iterdir()
if p.is_dir() and (p / INPUT_STEP_NAME).is_file()
}
else:
files = api.list_repo_files(inputs_repo, repo_type="dataset")
in_ids = {f.split("/", 1)[0] for f in files if f.endswith("/" + INPUT_STEP_NAME)}
return sorted(gt_ids & in_ids, key=lambda s: (len(s), s))
def _materialize_gt(
api: HfApi, repo_id: str, fixture: str, gt_root: Path | None,
cache_dir: Path, token: str | None,
) -> Path:
"""Local dir holding this fixture's GT STEP + trusted mesh sidecar.
The sidecar must sit next to the STEP so ``StepArtifacts`` takes the
trusted-mesh path (no tessellation, no validation).
"""
if gt_root is not None:
return gt_root / fixture
dest = cache_dir / "gt" / fixture
dest.mkdir(parents=True, exist_ok=True)
for name in (GT_STEP_NAME, GT_SIDECAR_NAME):
local = hf_hub_download(
repo_id=repo_id, filename=f"{fixture}/{name}",
repo_type="dataset", token=token,
)
target = dest / name
if not target.exists():
target.write_bytes(Path(local).read_bytes())
return dest
def _materialize_input(
api: HfApi, repo_id: str, fixture: str, inputs_root: Path | None,
cache_dir: Path, token: str | None,
) -> Path:
"""Local path to this fixture's ``input.step`` (checkout or Hub download)."""
if inputs_root is not None:
return inputs_root / fixture / INPUT_STEP_NAME
local = hf_hub_download(
repo_id=repo_id, filename=f"{fixture}/{INPUT_STEP_NAME}",
repo_type="dataset", token=token,
)
return Path(local)
def _render_fixture(gt_dir: Path, input_step: Path) -> bytes:
"""Render the full answer-key turntable WebP for one editing fixture."""
gt_mesh = StepArtifacts(gt_dir / GT_STEP_NAME, is_ground_truth=True).mesh()
input_mesh = StepArtifacts(
input_step, deflection_override=gt_mesh.linear_deflection_mm,
).mesh()
return render_gt_edit_diff_turntable(gt_mesh, input_mesh)
def _commit_in_chunks(api: HfApi, repo_id: str, ops: list[CommitOperationAdd]) -> None:
for i in range(0, len(ops), COMMIT_CHUNK):
chunk = ops[i:i + COMMIT_CHUNK]
api.create_commit(
repo_id=repo_id, repo_type="dataset", operations=chunk,
commit_message=f"add GT edit-diff answer-key webp(s) [{i + 1}-{i + len(chunk)}]",
)
print(f" committed {len(chunk)} file(s)", flush=True)
def _resolved_fixtures(
parser: argparse.ArgumentParser, args: argparse.Namespace,
api: HfApi, gt_root: Path | None, inputs_root: Path | None,
) -> list[str]:
fixtures = _editing_fixture_ids(
api, args.repo_id, args.inputs_repo_id, gt_root, inputs_root,
)
if args.fixtures:
wanted = {f.strip() for f in args.fixtures.split(",") if f.strip()}
fixtures = [f for f in fixtures if f in wanted]
if args.limit is not None:
fixtures = fixtures[: args.limit]
if not fixtures:
parser.error("No editing fixtures matched.")
return fixtures
def _upload_from_out_dir(api: HfApi, repo_id: str, out_dir: Path, fixtures: list[str]) -> None:
"""Commit already-rendered webps/pngs under *out_dir* to the GT dataset."""
ops: list[CommitOperationAdd] = []
for fixture in fixtures:
local = out_dir / fixture / "renders" / "edit_diff_gt.webp"
if local.exists():
ops.append(CommitOperationAdd(f"{fixture}/{FULL_NAME}", local.read_bytes()))
if not ops:
print("Nothing to upload (no rendered files found in --out-dir).", flush=True)
return
print(f"Uploading {len(ops)} file(s) to {repo_id} ...", flush=True)
_commit_in_chunks(api, repo_id, ops)
def _run_upload_only(parser: argparse.ArgumentParser, args: argparse.Namespace) -> int:
"""Commit already-rendered ``edit_diff_gt.webp`` files from --out-dir."""
if args.out_dir is None or not args.out_dir.is_dir():
parser.error("--upload-only requires an existing --out-dir.")
token = os.environ.get("HF_TOKEN")
api = HfApi(token=token) # falls back to the stored CLI token when env unset
out_dir = args.out_dir.resolve()
fixtures = sorted(
(p.parent.parent.name for p in out_dir.glob("*/renders/edit_diff_gt.webp")),
key=lambda s: (len(s), s),
)
if not fixtures:
parser.error(f"No edit_diff_gt.webp found under {out_dir}")
print(f"Uploading {len(fixtures)} fixture webp(s) from {out_dir} -> {args.repo_id}", flush=True)
print(f"FIXTURES: {' '.join(fixtures)}", flush=True)
_upload_from_out_dir(api, args.repo_id, out_dir, fixtures)
print("Done.", flush=True)
return 0
def _run_isolated(parser: argparse.ArgumentParser, args: argparse.Namespace) -> int:
"""Render each fixture in a fresh subprocess (one fixture == ~240 plotters).
Spawns this same tool with ``--fixtures <id> --no-upload`` per fixture so the
GL context is fully released between fixtures, then (optionally) uploads once
from ``--out-dir``. Worker stdout/stderr inherit the parent's, so progress
and the VTK noise land in the same streams the non-isolated path uses.
"""
if args.out_dir is None:
parser.error("--isolate requires --out-dir (workers render to disk).")
token = os.environ.get("HF_TOKEN")
if not args.no_upload and not token:
parser.error("HF_TOKEN required to upload (or pass --no-upload).")
api = HfApi(token=token)
gt_root = args.gt_root.resolve() if args.gt_root else None
inputs_root = args.inputs_root.resolve() if args.inputs_root else None
for label, root in (("--gt-root", gt_root), ("--inputs-root", inputs_root)):
if root is not None and not root.is_dir():
parser.error(f"{label} does not exist: {root}")
fixtures = _resolved_fixtures(parser, args, api, gt_root, inputs_root)
print(f"Isolated render of {len(fixtures)} editing fixture(s) (one subprocess each).", flush=True)
print(f"FIXTURES: {' '.join(fixtures)}", flush=True)
base_cmd = [sys.executable, str(Path(__file__).resolve()),
"--out-dir", str(args.out_dir), "--no-upload",
"--repo-id", args.repo_id, "--inputs-repo-id", args.inputs_repo_id]
if gt_root is not None:
base_cmd += ["--gt-root", str(gt_root)]
if inputs_root is not None:
base_cmd += ["--inputs-root", str(inputs_root)]
failures: list[str] = []
for i, fixture in enumerate(fixtures, start=1):
print(f"=== [{i}/{len(fixtures)}] {fixture} ===", flush=True)
proc = subprocess.run([*base_cmd, "--fixtures", fixture]) # noqa: S603, PLW1510
if proc.returncode != 0:
failures.append(fixture)
done = len(fixtures) - len(failures)
print(f"Isolated render complete: {done}/{len(fixtures)} ok, {len(failures)} failed.", flush=True)
if failures:
print(f"FAILED: {' '.join(failures)}", flush=True)
if not args.no_upload:
_upload_from_out_dir(api, args.repo_id, args.out_dir, fixtures)
print("Done.", flush=True)
return 1 if failures else 0
def main() -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--gt-root", type=Path, default=None,
help="Local cadgenbench-data-gt checkout. Omit to download from the Hub.",
)
parser.add_argument(
"--inputs-root", type=Path, default=None,
help="Local cadgenbench-data checkout (holds input.step). Omit for Hub.",
)
parser.add_argument("--repo-id", default=_default_repo_id())
parser.add_argument("--inputs-repo-id", default=_default_inputs_repo_id())
parser.add_argument("--fixtures", help="Comma-separated fixture ids. Omit for all editing fixtures.")
parser.add_argument("--limit", type=int, default=None)
parser.add_argument(
"--out-dir", type=Path, default=None,
help="Also write each webp/png here (e.g. for local inspection).",
)
parser.add_argument(
"--no-upload", action="store_true",
help="Render only; do not commit to the GT dataset.",
)
parser.add_argument(
"--upload-only", action="store_true",
help=(
"Skip rendering; commit the ``edit_diff_gt.webp`` files already under "
"--out-dir to the GT dataset. Use after an isolated render run."
),
)
parser.add_argument(
"--isolate", action="store_true",
help=(
"Render each fixture in its own subprocess. Works around macOS "
"offscreen VTK losing its GL context after many sequential Plotter "
"create/close cycles (not needed on the Linux EGL eval job). Implies "
"render-to-out-dir; upload, if requested, runs once from --out-dir."
),
)
args = parser.parse_args()
if args.upload_only:
return _run_upload_only(parser, args)
if args.isolate:
return _run_isolated(parser, args)
token = os.environ.get("HF_TOKEN")
api = HfApi(token=token)
gt_root = args.gt_root.resolve() if args.gt_root else None
inputs_root = args.inputs_root.resolve() if args.inputs_root else None
for label, root in (("--gt-root", gt_root), ("--inputs-root", inputs_root)):
if root is not None and not root.is_dir():
parser.error(f"{label} does not exist: {root}")
fixtures = _editing_fixture_ids(
api, args.repo_id, args.inputs_repo_id, gt_root, inputs_root,
)
if args.fixtures:
wanted = {f.strip() for f in args.fixtures.split(",") if f.strip()}
fixtures = [f for f in fixtures if f in wanted]
if args.limit is not None:
fixtures = fixtures[: args.limit]
if not fixtures:
parser.error("No editing fixtures matched.")
if not args.no_upload and not token:
parser.error("HF_TOKEN required to upload (or pass --no-upload).")
print(
f"Rendering {len(fixtures)} editing GT answer-key turntable(s)"
+ ("" if args.no_upload else f" -> {args.repo_id} (will upload)"),
flush=True,
)
print(f"FIXTURES: {' '.join(fixtures)}", flush=True)
ops: list[CommitOperationAdd] = []
failures: list[str] = []
with tempfile.TemporaryDirectory(prefix="gt-edit-diff-") as tmp:
cache_dir = Path(tmp)
for i, fixture in enumerate(fixtures, start=1):
print(f"[{i}/{len(fixtures)}] {fixture} ...", flush=True)
try:
gt_dir = _materialize_gt(
api, args.repo_id, fixture, gt_root, cache_dir, token,
)
input_step = _materialize_input(
api, args.inputs_repo_id, fixture, inputs_root, cache_dir, token,
)
full = _render_fixture(gt_dir, input_step)
except Exception as e: # noqa: BLE001 - log and keep going
print(f" FAILED {type(e).__name__}: {e}", flush=True)
failures.append(fixture)
continue
print(f" ok: full={len(full) // 1024}KB", flush=True)
if args.out_dir is not None:
fx_out = args.out_dir / fixture / "renders"
fx_out.mkdir(parents=True, exist_ok=True)
(fx_out / "edit_diff_gt.webp").write_bytes(full)
ops.append(CommitOperationAdd(f"{fixture}/{FULL_NAME}", full))
done = len(fixtures) - len(failures)
print(
f"Rendered {done}/{len(fixtures)} fixture(s) ({len(failures)} failed).",
flush=True,
)
if failures:
print(f"FAILED: {' '.join(failures)}", flush=True)
if args.no_upload:
print("Upload skipped (--no-upload).", flush=True)
return 1 if failures else 0
print(f"Uploading {len(ops)} file(s) to {args.repo_id} ...", flush=True)
_commit_in_chunks(api, args.repo_id, ops)
print("Done.", flush=True)
return 1 if failures else 0
if __name__ == "__main__":
raise SystemExit(main())
|