| |
| """Backfill the static edit-diff still (``edit_diff.png``) into the render bucket. |
| |
| Editing samples ship an *animated* ``edit_diff.webp`` turntable but no static |
| frame. The grid thumbnail needs a still (an animated WebP can't be frozen to one |
| angle in HTML), so the eval pipeline now also writes ``edit_diff.png`` (frame 0) |
| beside the clip — but submissions evaluated *before* that change only have the |
| WebP in the bucket. |
| |
| This one-time tool closes that gap **without re-evaluating or re-rendering**: it |
| lists the public render bucket, finds every ``.../<fixture>/edit_diff.webp`` that |
| has no sibling ``edit_diff.png``, downloads the WebP, extracts frame 0 via the |
| shared :func:`cadgenbench.common.viewer.first_frame_png` (the exact frame the |
| forward pipeline now saves), and uploads ``edit_diff.png`` next to it in the same |
| bucket prefix — so it serves by the same anonymous render URL as every other |
| render. |
| |
| Run (needs a write-scoped ``HF_TOKEN`` for the bucket):: |
| |
| HF_TOKEN=<write-token> python tools/backfill_edit_diff_still.py # all submissions |
| HF_TOKEN=<write-token> python tools/backfill_edit_diff_still.py --submission <id> |
| python tools/backfill_edit_diff_still.py --dry-run # list only, no token needed |
| """ |
| from __future__ import annotations |
|
|
| import argparse |
| import os |
| import sys |
| import tempfile |
| import urllib.request |
| from pathlib import Path |
|
|
| from huggingface_hub import HfApi |
|
|
| |
| |
| _REPO_ROOT = Path(__file__).resolve().parents[2] |
| _SRC = _REPO_ROOT / "cadgenbench" / "src" |
| if _SRC.is_dir(): |
| sys.path.insert(0, str(_SRC)) |
|
|
| from cadgenbench.common.imaging import first_frame_png |
|
|
| HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co").rstrip("/") |
| HF_ORG = os.getenv("HF_ORG", "HuggingAI4Engineering") |
| |
| |
| RENDER_BUCKET = os.getenv("HF_RENDER_BUCKET", f"{HF_ORG}/cadgenbench-eval-staging") |
| RENDER_PREFIX = "renders" |
| WEBP_NAME = "edit_diff.webp" |
| PNG_NAME = "edit_diff.png" |
| |
| |
| UPLOAD_CHUNK = 50 |
|
|
|
|
| def _resolve_url(path: str) -> str: |
| """Anonymous bucket resolve URL for a bucket-relative object path.""" |
| return f"{HF_ENDPOINT}/buckets/{RENDER_BUCKET}/resolve/{path}" |
|
|
|
|
| def _download(path: str, token: str | None) -> bytes: |
| req = urllib.request.Request(_resolve_url(path)) |
| if token: |
| req.add_header("Authorization", f"Bearer {token}") |
| with urllib.request.urlopen(req, timeout=60) as resp: |
| return resp.read() |
|
|
|
|
| def _list_entries(api: HfApi, prefix: str, token: str | None) -> list[str]: |
| """Bucket-relative file paths under *prefix* (folders filtered out).""" |
| return [ |
| e.path |
| for e in api.list_bucket_tree( |
| RENDER_BUCKET, prefix=prefix, recursive=True, token=token, |
| ) |
| if getattr(e, "path", None) |
| and not getattr(e, "is_folder", False) |
| and not e.path.endswith("/") |
| ] |
|
|
|
|
| def _missing_stills(paths: list[str]) -> list[str]: |
| """WebP paths whose sibling ``edit_diff.png`` is absent from the bucket.""" |
| present = set(paths) |
| out = [] |
| for p in paths: |
| if p.endswith("/" + WEBP_NAME): |
| sibling = p[: -len(WEBP_NAME)] + PNG_NAME |
| if sibling not in present: |
| out.append(p) |
| return sorted(out) |
|
|
|
|
| def main() -> int: |
| parser = argparse.ArgumentParser(description=__doc__) |
| parser.add_argument( |
| "--submission", |
| help="Limit to one submission id (the prefix is renders/<id>). " |
| "Omit to scan every submission in the bucket.", |
| ) |
| parser.add_argument("--limit", type=int, default=None, |
| help="Process at most N stills (after listing).") |
| parser.add_argument( |
| "--dry-run", action="store_true", |
| help="List what would be created; download/upload nothing.", |
| ) |
| args = parser.parse_args() |
|
|
| token = os.environ.get("HF_TOKEN") |
| api = HfApi(token=token) |
| prefix = ( |
| f"{RENDER_PREFIX}/{args.submission}" if args.submission else RENDER_PREFIX |
| ) |
|
|
| print(f"Scanning bucket {RENDER_BUCKET} under {prefix}/ …", flush=True) |
| paths = _list_entries(api, prefix, token) |
| todo = _missing_stills(paths) |
| if args.limit is not None: |
| todo = todo[: args.limit] |
|
|
| n_webp = sum(1 for p in paths if p.endswith("/" + WEBP_NAME)) |
| print( |
| f"Found {n_webp} edit_diff.webp; {len(todo)} missing a still.", |
| flush=True, |
| ) |
| if not todo: |
| print("Nothing to backfill.") |
| return 0 |
| if args.dry_run: |
| for p in todo: |
| print(" would create", p[: -len(WEBP_NAME)] + PNG_NAME) |
| return 0 |
| if not token: |
| parser.error("HF_TOKEN required to upload (or pass --dry-run).") |
|
|
| created = 0 |
| with tempfile.TemporaryDirectory(prefix="edit-diff-still-") as tmp: |
| tmp_dir = Path(tmp) |
| add: list[tuple[str, str]] = [] |
| for i, webp_path in enumerate(todo, start=1): |
| dest = webp_path[: -len(WEBP_NAME)] + PNG_NAME |
| try: |
| png = first_frame_png(_download(webp_path, token)) |
| except Exception as e: |
| print(f" [{i}/{len(todo)}] SKIP {webp_path} ({type(e).__name__}: {e})") |
| continue |
| local = tmp_dir / f"{i}.png" |
| local.write_bytes(png) |
| add.append((str(local), dest)) |
| print(f" [{i}/{len(todo)}] {dest} ({len(png) // 1024} KB)", flush=True) |
| if len(add) >= UPLOAD_CHUNK: |
| api.batch_bucket_files(RENDER_BUCKET, add=add, token=token) |
| created += len(add) |
| add = [] |
| if add: |
| api.batch_bucket_files(RENDER_BUCKET, add=add, token=token) |
| created += len(add) |
|
|
| print(f"Done. Uploaded {created} edit_diff.png still(s).", flush=True) |
| return 0 |
|
|
|
|
| if __name__ == "__main__": |
| raise SystemExit(main()) |
|
|