File size: 15,555 Bytes
49e27be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
#!/usr/bin/env python3
"""Generate the ground-truth "answer key" edit-diff turntables (editing fixtures).

For each *editing* fixture (one that ships an ``input.step`` seed) this renders
the reference companion to the per-submission edit diff: the GT drawn as a
translucent ghost with the **correct change painted blue** (added material on the
GT body, removed material as a blue phantom of the input). See
:func:`cadgenbench.common.edit_diff.build_gt_edit_diff_shapes`.

Like :mod:`generate_gt_turntables`, the result is a property of the **data
revision** (GT vs input), not of any submission, so this runs once per data
revision and both the gallery's ground-truth row and every per-submission report
reference the same webp via the GT proxy. One clip is written per fixture:

- ``<fixture>/renders/edit_diff_gt.webp`` -- full turntable.

The GT mesh comes from the trusted sidecar (no tessellation); the input mesh is
tessellated once at the GT's deflection so the GT-vs-input edit region is found
at one consistent scale (mirrors the eval's ``_editing_input_mesh``).

Run locally (against checkouts), render only::

    python tools/generate_gt_edit_diff.py \
        --gt-root ../cadgenbench-data-gt --inputs-root ../cadgenbench-data \
        --out-dir ../out/gt_edit_diff --no-upload

Add ``--upload`` (and an ``HF_TOKEN`` with **write** scope on the private GT
dataset) to commit the webps, or run it on an HF GPU job exactly like
``generate_gt_turntables.py``.
"""
from __future__ import annotations

import argparse
import os
import subprocess
import sys
import tempfile
from pathlib import Path

from huggingface_hub import CommitOperationAdd, HfApi, hf_hub_download

# Allow running straight from the repo without installing the leaderboard pkg;
# cadgenbench itself must be importable (installed in the env / eval-gpu image).
_REPO_ROOT = Path(__file__).resolve().parents[2]
_SRC = _REPO_ROOT / "cadgenbench" / "src"
if _SRC.is_dir():
    sys.path.insert(0, str(_SRC))

from cadgenbench.common.artifacts import StepArtifacts  # noqa: E402
from cadgenbench.common.edit_diff import render_gt_edit_diff_turntable  # noqa: E402

GT_STEP_NAME = "ground_truth.step"
GT_SIDECAR_NAME = "ground_truth.mesh.npz"
INPUT_STEP_NAME = "input.step"
FULL_NAME = "renders/edit_diff_gt.webp"
# One commit per this many files: keeps an individual commit small and
# rate-limit friendly.
COMMIT_CHUNK = 60


def _default_repo_id() -> str:
    return os.getenv(
        "HF_DATA_GT_REPO",
        f"{os.getenv('HF_ORG', 'HuggingAI4Engineering')}/cadgenbench-data-gt",
    )


def _default_inputs_repo_id() -> str:
    return os.getenv(
        "HF_DATA_REPO",
        f"{os.getenv('HF_ORG', 'HuggingAI4Engineering')}/cadgenbench-data",
    )


def _editing_fixture_ids(
    api: HfApi,
    gt_repo: str,
    inputs_repo: str,
    gt_root: Path | None,
    inputs_root: Path | None,
) -> list[str]:
    """Fixture ids with BOTH a ``ground_truth.step`` and an ``input.step``.

    The ``input.step`` is what defines an editing fixture, so the intersection
    of the two repos (or two checkouts) is exactly the editing set.
    """
    if gt_root is not None:
        gt_ids = {
            p.name for p in gt_root.iterdir()
            if p.is_dir() and (p / GT_STEP_NAME).is_file()
        }
    else:
        files = api.list_repo_files(gt_repo, repo_type="dataset")
        gt_ids = {f.split("/", 1)[0] for f in files if f.endswith("/" + GT_STEP_NAME)}

    if inputs_root is not None:
        in_ids = {
            p.name for p in inputs_root.iterdir()
            if p.is_dir() and (p / INPUT_STEP_NAME).is_file()
        }
    else:
        files = api.list_repo_files(inputs_repo, repo_type="dataset")
        in_ids = {f.split("/", 1)[0] for f in files if f.endswith("/" + INPUT_STEP_NAME)}

    return sorted(gt_ids & in_ids, key=lambda s: (len(s), s))


def _materialize_gt(
    api: HfApi, repo_id: str, fixture: str, gt_root: Path | None,
    cache_dir: Path, token: str | None,
) -> Path:
    """Local dir holding this fixture's GT STEP + trusted mesh sidecar.

    The sidecar must sit next to the STEP so ``StepArtifacts`` takes the
    trusted-mesh path (no tessellation, no validation).
    """
    if gt_root is not None:
        return gt_root / fixture
    dest = cache_dir / "gt" / fixture
    dest.mkdir(parents=True, exist_ok=True)
    for name in (GT_STEP_NAME, GT_SIDECAR_NAME):
        local = hf_hub_download(
            repo_id=repo_id, filename=f"{fixture}/{name}",
            repo_type="dataset", token=token,
        )
        target = dest / name
        if not target.exists():
            target.write_bytes(Path(local).read_bytes())
    return dest


def _materialize_input(
    api: HfApi, repo_id: str, fixture: str, inputs_root: Path | None,
    cache_dir: Path, token: str | None,
) -> Path:
    """Local path to this fixture's ``input.step`` (checkout or Hub download)."""
    if inputs_root is not None:
        return inputs_root / fixture / INPUT_STEP_NAME
    local = hf_hub_download(
        repo_id=repo_id, filename=f"{fixture}/{INPUT_STEP_NAME}",
        repo_type="dataset", token=token,
    )
    return Path(local)


def _render_fixture(gt_dir: Path, input_step: Path) -> bytes:
    """Render the full answer-key turntable WebP for one editing fixture."""
    gt_mesh = StepArtifacts(gt_dir / GT_STEP_NAME, is_ground_truth=True).mesh()
    input_mesh = StepArtifacts(
        input_step, deflection_override=gt_mesh.linear_deflection_mm,
    ).mesh()
    return render_gt_edit_diff_turntable(gt_mesh, input_mesh)


def _commit_in_chunks(api: HfApi, repo_id: str, ops: list[CommitOperationAdd]) -> None:
    for i in range(0, len(ops), COMMIT_CHUNK):
        chunk = ops[i:i + COMMIT_CHUNK]
        api.create_commit(
            repo_id=repo_id, repo_type="dataset", operations=chunk,
            commit_message=f"add GT edit-diff answer-key webp(s) [{i + 1}-{i + len(chunk)}]",
        )
        print(f"  committed {len(chunk)} file(s)", flush=True)


def _resolved_fixtures(
    parser: argparse.ArgumentParser, args: argparse.Namespace,
    api: HfApi, gt_root: Path | None, inputs_root: Path | None,
) -> list[str]:
    fixtures = _editing_fixture_ids(
        api, args.repo_id, args.inputs_repo_id, gt_root, inputs_root,
    )
    if args.fixtures:
        wanted = {f.strip() for f in args.fixtures.split(",") if f.strip()}
        fixtures = [f for f in fixtures if f in wanted]
    if args.limit is not None:
        fixtures = fixtures[: args.limit]
    if not fixtures:
        parser.error("No editing fixtures matched.")
    return fixtures


def _upload_from_out_dir(api: HfApi, repo_id: str, out_dir: Path, fixtures: list[str]) -> None:
    """Commit already-rendered webps/pngs under *out_dir* to the GT dataset."""
    ops: list[CommitOperationAdd] = []
    for fixture in fixtures:
        local = out_dir / fixture / "renders" / "edit_diff_gt.webp"
        if local.exists():
            ops.append(CommitOperationAdd(f"{fixture}/{FULL_NAME}", local.read_bytes()))
    if not ops:
        print("Nothing to upload (no rendered files found in --out-dir).", flush=True)
        return
    print(f"Uploading {len(ops)} file(s) to {repo_id} ...", flush=True)
    _commit_in_chunks(api, repo_id, ops)


def _run_upload_only(parser: argparse.ArgumentParser, args: argparse.Namespace) -> int:
    """Commit already-rendered ``edit_diff_gt.webp`` files from --out-dir."""
    if args.out_dir is None or not args.out_dir.is_dir():
        parser.error("--upload-only requires an existing --out-dir.")
    token = os.environ.get("HF_TOKEN")
    api = HfApi(token=token)  # falls back to the stored CLI token when env unset
    out_dir = args.out_dir.resolve()
    fixtures = sorted(
        (p.parent.parent.name for p in out_dir.glob("*/renders/edit_diff_gt.webp")),
        key=lambda s: (len(s), s),
    )
    if not fixtures:
        parser.error(f"No edit_diff_gt.webp found under {out_dir}")
    print(f"Uploading {len(fixtures)} fixture webp(s) from {out_dir} -> {args.repo_id}", flush=True)
    print(f"FIXTURES: {' '.join(fixtures)}", flush=True)
    _upload_from_out_dir(api, args.repo_id, out_dir, fixtures)
    print("Done.", flush=True)
    return 0


def _run_isolated(parser: argparse.ArgumentParser, args: argparse.Namespace) -> int:
    """Render each fixture in a fresh subprocess (one fixture == ~240 plotters).

    Spawns this same tool with ``--fixtures <id> --no-upload`` per fixture so the
    GL context is fully released between fixtures, then (optionally) uploads once
    from ``--out-dir``. Worker stdout/stderr inherit the parent's, so progress
    and the VTK noise land in the same streams the non-isolated path uses.
    """
    if args.out_dir is None:
        parser.error("--isolate requires --out-dir (workers render to disk).")
    token = os.environ.get("HF_TOKEN")
    if not args.no_upload and not token:
        parser.error("HF_TOKEN required to upload (or pass --no-upload).")
    api = HfApi(token=token)
    gt_root = args.gt_root.resolve() if args.gt_root else None
    inputs_root = args.inputs_root.resolve() if args.inputs_root else None
    for label, root in (("--gt-root", gt_root), ("--inputs-root", inputs_root)):
        if root is not None and not root.is_dir():
            parser.error(f"{label} does not exist: {root}")

    fixtures = _resolved_fixtures(parser, args, api, gt_root, inputs_root)
    print(f"Isolated render of {len(fixtures)} editing fixture(s) (one subprocess each).", flush=True)
    print(f"FIXTURES: {' '.join(fixtures)}", flush=True)

    base_cmd = [sys.executable, str(Path(__file__).resolve()),
                "--out-dir", str(args.out_dir), "--no-upload",
                "--repo-id", args.repo_id, "--inputs-repo-id", args.inputs_repo_id]
    if gt_root is not None:
        base_cmd += ["--gt-root", str(gt_root)]
    if inputs_root is not None:
        base_cmd += ["--inputs-root", str(inputs_root)]

    failures: list[str] = []
    for i, fixture in enumerate(fixtures, start=1):
        print(f"=== [{i}/{len(fixtures)}] {fixture} ===", flush=True)
        proc = subprocess.run([*base_cmd, "--fixtures", fixture])  # noqa: S603, PLW1510
        if proc.returncode != 0:
            failures.append(fixture)

    done = len(fixtures) - len(failures)
    print(f"Isolated render complete: {done}/{len(fixtures)} ok, {len(failures)} failed.", flush=True)
    if failures:
        print(f"FAILED: {' '.join(failures)}", flush=True)
    if not args.no_upload:
        _upload_from_out_dir(api, args.repo_id, args.out_dir, fixtures)
        print("Done.", flush=True)
    return 1 if failures else 0


def main() -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "--gt-root", type=Path, default=None,
        help="Local cadgenbench-data-gt checkout. Omit to download from the Hub.",
    )
    parser.add_argument(
        "--inputs-root", type=Path, default=None,
        help="Local cadgenbench-data checkout (holds input.step). Omit for Hub.",
    )
    parser.add_argument("--repo-id", default=_default_repo_id())
    parser.add_argument("--inputs-repo-id", default=_default_inputs_repo_id())
    parser.add_argument("--fixtures", help="Comma-separated fixture ids. Omit for all editing fixtures.")
    parser.add_argument("--limit", type=int, default=None)
    parser.add_argument(
        "--out-dir", type=Path, default=None,
        help="Also write each webp/png here (e.g. for local inspection).",
    )
    parser.add_argument(
        "--no-upload", action="store_true",
        help="Render only; do not commit to the GT dataset.",
    )
    parser.add_argument(
        "--upload-only", action="store_true",
        help=(
            "Skip rendering; commit the ``edit_diff_gt.webp`` files already under "
            "--out-dir to the GT dataset. Use after an isolated render run."
        ),
    )
    parser.add_argument(
        "--isolate", action="store_true",
        help=(
            "Render each fixture in its own subprocess. Works around macOS "
            "offscreen VTK losing its GL context after many sequential Plotter "
            "create/close cycles (not needed on the Linux EGL eval job). Implies "
            "render-to-out-dir; upload, if requested, runs once from --out-dir."
        ),
    )
    args = parser.parse_args()

    if args.upload_only:
        return _run_upload_only(parser, args)
    if args.isolate:
        return _run_isolated(parser, args)

    token = os.environ.get("HF_TOKEN")
    api = HfApi(token=token)
    gt_root = args.gt_root.resolve() if args.gt_root else None
    inputs_root = args.inputs_root.resolve() if args.inputs_root else None
    for label, root in (("--gt-root", gt_root), ("--inputs-root", inputs_root)):
        if root is not None and not root.is_dir():
            parser.error(f"{label} does not exist: {root}")

    fixtures = _editing_fixture_ids(
        api, args.repo_id, args.inputs_repo_id, gt_root, inputs_root,
    )
    if args.fixtures:
        wanted = {f.strip() for f in args.fixtures.split(",") if f.strip()}
        fixtures = [f for f in fixtures if f in wanted]
    if args.limit is not None:
        fixtures = fixtures[: args.limit]
    if not fixtures:
        parser.error("No editing fixtures matched.")

    if not args.no_upload and not token:
        parser.error("HF_TOKEN required to upload (or pass --no-upload).")

    print(
        f"Rendering {len(fixtures)} editing GT answer-key turntable(s)"
        + ("" if args.no_upload else f" -> {args.repo_id} (will upload)"),
        flush=True,
    )
    print(f"FIXTURES: {' '.join(fixtures)}", flush=True)

    ops: list[CommitOperationAdd] = []
    failures: list[str] = []
    with tempfile.TemporaryDirectory(prefix="gt-edit-diff-") as tmp:
        cache_dir = Path(tmp)
        for i, fixture in enumerate(fixtures, start=1):
            print(f"[{i}/{len(fixtures)}] {fixture} ...", flush=True)
            try:
                gt_dir = _materialize_gt(
                    api, args.repo_id, fixture, gt_root, cache_dir, token,
                )
                input_step = _materialize_input(
                    api, args.inputs_repo_id, fixture, inputs_root, cache_dir, token,
                )
                full = _render_fixture(gt_dir, input_step)
            except Exception as e:  # noqa: BLE001 - log and keep going
                print(f"    FAILED {type(e).__name__}: {e}", flush=True)
                failures.append(fixture)
                continue

            print(f"    ok: full={len(full) // 1024}KB", flush=True)

            if args.out_dir is not None:
                fx_out = args.out_dir / fixture / "renders"
                fx_out.mkdir(parents=True, exist_ok=True)
                (fx_out / "edit_diff_gt.webp").write_bytes(full)

            ops.append(CommitOperationAdd(f"{fixture}/{FULL_NAME}", full))

        done = len(fixtures) - len(failures)
        print(
            f"Rendered {done}/{len(fixtures)} fixture(s) ({len(failures)} failed).",
            flush=True,
        )
        if failures:
            print(f"FAILED: {' '.join(failures)}", flush=True)
        if args.no_upload:
            print("Upload skipped (--no-upload).", flush=True)
            return 1 if failures else 0
        print(f"Uploading {len(ops)} file(s) to {args.repo_id} ...", flush=True)
        _commit_in_chunks(api, args.repo_id, ops)
    print("Done.", flush=True)
    return 1 if failures else 0


if __name__ == "__main__":
    raise SystemExit(main())