File size: 7,886 Bytes
c1cb5e4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 | #!/usr/bin/env python3
"""Generate the permanent ground-truth turntable WebPs (one-time work).
Each GT fixture in ``cadgenbench-data-gt`` ships a trusted mesh sidecar
(``ground_truth.mesh.npz``) next to its ``ground_truth.step``. This tool loads
that **cached mesh** via :class:`cadgenbench.common.artifacts.StepArtifacts`
(so it never re-tessellates) and renders the same rotating WebP the eval
pipeline produces for submissions, then commits each to
``<fixture>/renders/rotating.webp`` in the GT dataset.
GT renders are a property of the data revision, not of any submission, so this
runs once (and again only when the GT geometry changes). The shared renderer
(:func:`cadgenbench.common.viewer.render_mesh_turntable_webp`) guarantees GT and
candidate turntables look identical (same material, lighting, framing, speed).
Run locally (against a checkout)::
python tools/generate_gt_turntables.py --gt-root ../cadgenbench-data-gt --upload
Or on an HF Job (GPU, no checkout — downloads STEP + sidecar from the Hub).
After the eval-gpu image is rebuilt with the updated ``cadgenbench`` package,
dispatch this file's contents on that image, e.g. with the Python API::
from huggingface_hub import run_job
run_job(
image="hf.co/spaces/HuggingAI4Engineering/cadgenbench-eval-gpu",
command=["python", "-c", Path("generate_gt_turntables.py").read_text()
+ "\nimport sys; sys.exit(main())"],
flavor="a10g-large",
namespace="michaelr27",
secrets={"HF_TOKEN": "<write-token>"},
timeout="30m",
)
(The job needs a **write**-scoped token for the private GT dataset.)
"""
from __future__ import annotations
import argparse
import os
import sys
import tempfile
from pathlib import Path
from huggingface_hub import CommitOperationAdd, HfApi, hf_hub_download
# Allow running straight from the repo without installing the leaderboard pkg;
# cadgenbench itself must be importable (installed in the env / eval-gpu image).
_REPO_ROOT = Path(__file__).resolve().parents[2]
_SRC = _REPO_ROOT / "cadgenbench" / "src"
if _SRC.is_dir():
sys.path.insert(0, str(_SRC))
from cadgenbench.common.artifacts import StepArtifacts # noqa: E402
from cadgenbench.common.viewer import render_mesh_turntable_webp # noqa: E402
GT_STEP_NAME = "ground_truth.step"
GT_SIDECAR_NAME = "ground_truth.mesh.npz"
RENDER_PATH_IN_FIXTURE = "renders/rotating.webp"
# One commit per this many files: ~80 GT fixtures fit in a single commit, but
# chunking keeps an individual commit small and rate-limit friendly.
COMMIT_CHUNK = 60
def _default_repo_id() -> str:
return os.getenv(
"HF_DATA_GT_REPO",
f"{os.getenv('HF_ORG', 'HuggingAI4Engineering')}/cadgenbench-data-gt",
)
def _fixture_ids(api: HfApi, repo_id: str, gt_root: Path | None) -> list[str]:
"""Fixture ids that have a ``ground_truth.step`` (local checkout or Hub)."""
if gt_root is not None:
ids = [
p.name for p in gt_root.iterdir()
if p.is_dir() and (p / GT_STEP_NAME).is_file()
]
else:
files = api.list_repo_files(repo_id, repo_type="dataset")
ids = [
f.split("/", 1)[0] for f in files if f.endswith("/" + GT_STEP_NAME)
]
return sorted(set(ids), key=lambda s: (len(s), s))
def _materialize_fixture(
api: HfApi,
repo_id: str,
fixture: str,
gt_root: Path | None,
cache_dir: Path,
token: str | None,
) -> Path:
"""Return a local dir holding this fixture's STEP + trusted mesh sidecar.
The sidecar must sit next to the STEP so ``StepArtifacts`` takes the
trusted-mesh path (no tessellation, no validation).
"""
if gt_root is not None:
return gt_root / fixture
dest = cache_dir / fixture
dest.mkdir(parents=True, exist_ok=True)
for name in (GT_STEP_NAME, GT_SIDECAR_NAME):
local = hf_hub_download(
repo_id=repo_id,
filename=f"{fixture}/{name}",
repo_type="dataset",
token=token,
)
target = dest / name
if not target.exists():
target.write_bytes(Path(local).read_bytes())
return dest
def _render_fixture_webp(fixture_dir: Path) -> bytes:
"""Render the turntable WebP from the fixture's cached GT mesh."""
art = StepArtifacts(fixture_dir / GT_STEP_NAME, is_ground_truth=True)
mesh = art.mesh() # trusted sidecar -> no tessellation
return render_mesh_turntable_webp(mesh)
def _commit_in_chunks(
api: HfApi, repo_id: str, ops: list[CommitOperationAdd],
) -> None:
for i in range(0, len(ops), COMMIT_CHUNK):
chunk = ops[i:i + COMMIT_CHUNK]
api.create_commit(
repo_id=repo_id,
repo_type="dataset",
operations=chunk,
commit_message=(
f"add GT turntable webp(s) [{i + 1}-{i + len(chunk)}]"
),
)
print(f" committed {len(chunk)} file(s)", flush=True)
def main() -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--gt-root", type=Path, default=None,
help="Local cadgenbench-data-gt checkout. Omit to download from the Hub.",
)
parser.add_argument("--repo-id", default=_default_repo_id())
parser.add_argument(
"--fixtures",
help="Comma-separated fixture ids. Omit for every GT fixture.",
)
parser.add_argument("--limit", type=int, default=None)
parser.add_argument(
"--out-dir", type=Path, default=None,
help="Also write each webp here (e.g. for local inspection).",
)
parser.add_argument(
"--no-upload", action="store_true",
help="Render only; do not commit to the GT dataset.",
)
args = parser.parse_args()
token = os.environ.get("HF_TOKEN")
api = HfApi(token=token)
gt_root = args.gt_root.resolve() if args.gt_root else None
if gt_root is not None and not gt_root.is_dir():
parser.error(f"--gt-root does not exist: {gt_root}")
fixtures = _fixture_ids(api, args.repo_id, gt_root)
if args.fixtures:
wanted = {f.strip() for f in args.fixtures.split(",") if f.strip()}
fixtures = [f for f in fixtures if f in wanted]
if args.limit is not None:
fixtures = fixtures[: args.limit]
if not fixtures:
parser.error("No GT fixtures matched.")
if not args.no_upload and not token:
parser.error("HF_TOKEN required to upload (or pass --no-upload).")
print(
f"Rendering {len(fixtures)} GT turntable(s) -> {args.repo_id}"
+ ("" if args.no_upload else " (will upload)"),
flush=True,
)
ops: list[CommitOperationAdd] = []
with tempfile.TemporaryDirectory(prefix="gt-turntable-") as tmp:
cache_dir = Path(tmp)
for i, fixture in enumerate(fixtures, start=1):
print(f"[{i}/{len(fixtures)}] {fixture}", flush=True)
fixture_dir = _materialize_fixture(
api, args.repo_id, fixture, gt_root, cache_dir, token,
)
webp = _render_fixture_webp(fixture_dir)
if args.out_dir is not None:
local_out = args.out_dir / fixture / "rotating.webp"
local_out.parent.mkdir(parents=True, exist_ok=True)
local_out.write_bytes(webp)
ops.append(
CommitOperationAdd(
path_in_repo=f"{fixture}/{RENDER_PATH_IN_FIXTURE}",
path_or_fileobj=webp,
)
)
if args.no_upload:
print(f"Rendered {len(ops)} webp(s); upload skipped.", flush=True)
return 0
print(f"Uploading {len(ops)} webp(s) to {args.repo_id}…", flush=True)
_commit_in_chunks(api, args.repo_id, ops)
print("Done.", flush=True)
return 0
if __name__ == "__main__":
raise SystemExit(main())
|