#!/usr/bin/env python3 """Backfill the static edit-diff still (``edit_diff.png``) into the render bucket. Editing samples ship an *animated* ``edit_diff.webp`` turntable but no static frame. The grid thumbnail needs a still (an animated WebP can't be frozen to one angle in HTML), so the eval pipeline now also writes ``edit_diff.png`` (frame 0) beside the clip — but submissions evaluated *before* that change only have the WebP in the bucket. This one-time tool closes that gap **without re-evaluating or re-rendering**: it lists the public render bucket, finds every ``...//edit_diff.webp`` that has no sibling ``edit_diff.png``, downloads the WebP, extracts frame 0 via the shared :func:`cadgenbench.common.viewer.first_frame_png` (the exact frame the forward pipeline now saves), and uploads ``edit_diff.png`` next to it in the same bucket prefix — so it serves by the same anonymous render URL as every other render. Run (needs a write-scoped ``HF_TOKEN`` for the bucket):: HF_TOKEN= python tools/backfill_edit_diff_still.py # all submissions HF_TOKEN= python tools/backfill_edit_diff_still.py --submission python tools/backfill_edit_diff_still.py --dry-run # list only, no token needed """ from __future__ import annotations import argparse import os import sys import tempfile import urllib.request from pathlib import Path from huggingface_hub import HfApi # cadgenbench (for the shared frame extractor) must be importable; allow running # straight from the repo without installing the leaderboard package. _REPO_ROOT = Path(__file__).resolve().parents[2] _SRC = _REPO_ROOT / "cadgenbench" / "src" if _SRC.is_dir(): sys.path.insert(0, str(_SRC)) from cadgenbench.common.imaging import first_frame_png # noqa: E402 HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co").rstrip("/") HF_ORG = os.getenv("HF_ORG", "HuggingAI4Engineering") # Mirror leaderboard.py's defaults so the tool targets the same bucket the eval # job uploads to and the report/gallery read from. RENDER_BUCKET = os.getenv("HF_RENDER_BUCKET", f"{HF_ORG}/cadgenbench-eval-staging") RENDER_PREFIX = "renders" WEBP_NAME = "edit_diff.webp" PNG_NAME = "edit_diff.png" # One upload call per this many files: keeps an individual batch small and # rate-limit friendly while still amortising the request overhead. UPLOAD_CHUNK = 50 def _resolve_url(path: str) -> str: """Anonymous bucket resolve URL for a bucket-relative object path.""" return f"{HF_ENDPOINT}/buckets/{RENDER_BUCKET}/resolve/{path}" def _download(path: str, token: str | None) -> bytes: req = urllib.request.Request(_resolve_url(path)) if token: req.add_header("Authorization", f"Bearer {token}") with urllib.request.urlopen(req, timeout=60) as resp: return resp.read() def _list_entries(api: HfApi, prefix: str, token: str | None) -> list[str]: """Bucket-relative file paths under *prefix* (folders filtered out).""" return [ e.path for e in api.list_bucket_tree( RENDER_BUCKET, prefix=prefix, recursive=True, token=token, ) if getattr(e, "path", None) and not getattr(e, "is_folder", False) and not e.path.endswith("/") ] def _missing_stills(paths: list[str]) -> list[str]: """WebP paths whose sibling ``edit_diff.png`` is absent from the bucket.""" present = set(paths) out = [] for p in paths: if p.endswith("/" + WEBP_NAME): sibling = p[: -len(WEBP_NAME)] + PNG_NAME if sibling not in present: out.append(p) return sorted(out) def main() -> int: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--submission", help="Limit to one submission id (the prefix is renders/). " "Omit to scan every submission in the bucket.", ) parser.add_argument("--limit", type=int, default=None, help="Process at most N stills (after listing).") parser.add_argument( "--dry-run", action="store_true", help="List what would be created; download/upload nothing.", ) args = parser.parse_args() token = os.environ.get("HF_TOKEN") api = HfApi(token=token) prefix = ( f"{RENDER_PREFIX}/{args.submission}" if args.submission else RENDER_PREFIX ) print(f"Scanning bucket {RENDER_BUCKET} under {prefix}/ …", flush=True) paths = _list_entries(api, prefix, token) todo = _missing_stills(paths) if args.limit is not None: todo = todo[: args.limit] n_webp = sum(1 for p in paths if p.endswith("/" + WEBP_NAME)) print( f"Found {n_webp} edit_diff.webp; {len(todo)} missing a still.", flush=True, ) if not todo: print("Nothing to backfill.") return 0 if args.dry_run: for p in todo: print(" would create", p[: -len(WEBP_NAME)] + PNG_NAME) return 0 if not token: parser.error("HF_TOKEN required to upload (or pass --dry-run).") created = 0 with tempfile.TemporaryDirectory(prefix="edit-diff-still-") as tmp: tmp_dir = Path(tmp) add: list[tuple[str, str]] = [] for i, webp_path in enumerate(todo, start=1): dest = webp_path[: -len(WEBP_NAME)] + PNG_NAME try: png = first_frame_png(_download(webp_path, token)) except Exception as e: # noqa: BLE001 - skip a bad clip, keep going print(f" [{i}/{len(todo)}] SKIP {webp_path} ({type(e).__name__}: {e})") continue local = tmp_dir / f"{i}.png" local.write_bytes(png) add.append((str(local), dest)) print(f" [{i}/{len(todo)}] {dest} ({len(png) // 1024} KB)", flush=True) if len(add) >= UPLOAD_CHUNK: api.batch_bucket_files(RENDER_BUCKET, add=add, token=token) created += len(add) add = [] if add: api.batch_bucket_files(RENDER_BUCKET, add=add, token=token) created += len(add) print(f"Done. Uploaded {created} edit_diff.png still(s).", flush=True) return 0 if __name__ == "__main__": raise SystemExit(main())