Michael Rabinovich Cursor commited on
Commit ·
c7f83a5
1
Parent(s): 1a24d4a
leaderboard: backfill tool for edit_diff.png stills in render bucket
Browse filesOne-time tool to close the gap for submissions evaluated before the eval
pipeline started writing edit_diff.png: lists the render bucket, finds every
<fixture>/edit_diff.webp lacking a sibling edit_diff.png, extracts frame 0
via cadgenbench.common.imaging.first_frame_png (the exact frame the pipeline
now saves), and uploads the still to the same bucket prefix. Idempotent
(skips existing stills); --dry-run lists without a token.
Co-authored-by: Cursor <cursoragent@cursor.com>
tools/backfill_edit_diff_still.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Backfill the static edit-diff still (``edit_diff.png``) into the render bucket.
|
| 3 |
+
|
| 4 |
+
Editing samples ship an *animated* ``edit_diff.webp`` turntable but no static
|
| 5 |
+
frame. The grid thumbnail needs a still (an animated WebP can't be frozen to one
|
| 6 |
+
angle in HTML), so the eval pipeline now also writes ``edit_diff.png`` (frame 0)
|
| 7 |
+
beside the clip — but submissions evaluated *before* that change only have the
|
| 8 |
+
WebP in the bucket.
|
| 9 |
+
|
| 10 |
+
This one-time tool closes that gap **without re-evaluating or re-rendering**: it
|
| 11 |
+
lists the public render bucket, finds every ``.../<fixture>/edit_diff.webp`` that
|
| 12 |
+
has no sibling ``edit_diff.png``, downloads the WebP, extracts frame 0 via the
|
| 13 |
+
shared :func:`cadgenbench.common.viewer.first_frame_png` (the exact frame the
|
| 14 |
+
forward pipeline now saves), and uploads ``edit_diff.png`` next to it in the same
|
| 15 |
+
bucket prefix — so it serves by the same anonymous render URL as every other
|
| 16 |
+
render.
|
| 17 |
+
|
| 18 |
+
Run (needs a write-scoped ``HF_TOKEN`` for the bucket)::
|
| 19 |
+
|
| 20 |
+
HF_TOKEN=<write-token> python tools/backfill_edit_diff_still.py # all submissions
|
| 21 |
+
HF_TOKEN=<write-token> python tools/backfill_edit_diff_still.py --submission <id>
|
| 22 |
+
python tools/backfill_edit_diff_still.py --dry-run # list only, no token needed
|
| 23 |
+
"""
|
| 24 |
+
from __future__ import annotations
|
| 25 |
+
|
| 26 |
+
import argparse
|
| 27 |
+
import os
|
| 28 |
+
import sys
|
| 29 |
+
import tempfile
|
| 30 |
+
import urllib.request
|
| 31 |
+
from pathlib import Path
|
| 32 |
+
|
| 33 |
+
from huggingface_hub import HfApi
|
| 34 |
+
|
| 35 |
+
# cadgenbench (for the shared frame extractor) must be importable; allow running
|
| 36 |
+
# straight from the repo without installing the leaderboard package.
|
| 37 |
+
_REPO_ROOT = Path(__file__).resolve().parents[2]
|
| 38 |
+
_SRC = _REPO_ROOT / "cadgenbench" / "src"
|
| 39 |
+
if _SRC.is_dir():
|
| 40 |
+
sys.path.insert(0, str(_SRC))
|
| 41 |
+
|
| 42 |
+
from cadgenbench.common.imaging import first_frame_png # noqa: E402
|
| 43 |
+
|
| 44 |
+
HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co").rstrip("/")
|
| 45 |
+
HF_ORG = os.getenv("HF_ORG", "HuggingAI4Engineering")
|
| 46 |
+
# Mirror leaderboard.py's defaults so the tool targets the same bucket the eval
|
| 47 |
+
# job uploads to and the report/gallery read from.
|
| 48 |
+
RENDER_BUCKET = os.getenv("HF_RENDER_BUCKET", f"{HF_ORG}/cadgenbench-eval-staging")
|
| 49 |
+
RENDER_PREFIX = "renders"
|
| 50 |
+
WEBP_NAME = "edit_diff.webp"
|
| 51 |
+
PNG_NAME = "edit_diff.png"
|
| 52 |
+
# One upload call per this many files: keeps an individual batch small and
|
| 53 |
+
# rate-limit friendly while still amortising the request overhead.
|
| 54 |
+
UPLOAD_CHUNK = 50
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def _resolve_url(path: str) -> str:
|
| 58 |
+
"""Anonymous bucket resolve URL for a bucket-relative object path."""
|
| 59 |
+
return f"{HF_ENDPOINT}/buckets/{RENDER_BUCKET}/resolve/{path}"
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def _download(path: str, token: str | None) -> bytes:
|
| 63 |
+
req = urllib.request.Request(_resolve_url(path))
|
| 64 |
+
if token:
|
| 65 |
+
req.add_header("Authorization", f"Bearer {token}")
|
| 66 |
+
with urllib.request.urlopen(req, timeout=60) as resp:
|
| 67 |
+
return resp.read()
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def _list_entries(api: HfApi, prefix: str, token: str | None) -> list[str]:
|
| 71 |
+
"""Bucket-relative file paths under *prefix* (folders filtered out)."""
|
| 72 |
+
return [
|
| 73 |
+
e.path
|
| 74 |
+
for e in api.list_bucket_tree(
|
| 75 |
+
RENDER_BUCKET, prefix=prefix, recursive=True, token=token,
|
| 76 |
+
)
|
| 77 |
+
if getattr(e, "path", None)
|
| 78 |
+
and not getattr(e, "is_folder", False)
|
| 79 |
+
and not e.path.endswith("/")
|
| 80 |
+
]
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def _missing_stills(paths: list[str]) -> list[str]:
|
| 84 |
+
"""WebP paths whose sibling ``edit_diff.png`` is absent from the bucket."""
|
| 85 |
+
present = set(paths)
|
| 86 |
+
out = []
|
| 87 |
+
for p in paths:
|
| 88 |
+
if p.endswith("/" + WEBP_NAME):
|
| 89 |
+
sibling = p[: -len(WEBP_NAME)] + PNG_NAME
|
| 90 |
+
if sibling not in present:
|
| 91 |
+
out.append(p)
|
| 92 |
+
return sorted(out)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def main() -> int:
|
| 96 |
+
parser = argparse.ArgumentParser(description=__doc__)
|
| 97 |
+
parser.add_argument(
|
| 98 |
+
"--submission",
|
| 99 |
+
help="Limit to one submission id (the prefix is renders/<id>). "
|
| 100 |
+
"Omit to scan every submission in the bucket.",
|
| 101 |
+
)
|
| 102 |
+
parser.add_argument("--limit", type=int, default=None,
|
| 103 |
+
help="Process at most N stills (after listing).")
|
| 104 |
+
parser.add_argument(
|
| 105 |
+
"--dry-run", action="store_true",
|
| 106 |
+
help="List what would be created; download/upload nothing.",
|
| 107 |
+
)
|
| 108 |
+
args = parser.parse_args()
|
| 109 |
+
|
| 110 |
+
token = os.environ.get("HF_TOKEN")
|
| 111 |
+
api = HfApi(token=token)
|
| 112 |
+
prefix = (
|
| 113 |
+
f"{RENDER_PREFIX}/{args.submission}" if args.submission else RENDER_PREFIX
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
print(f"Scanning bucket {RENDER_BUCKET} under {prefix}/ …", flush=True)
|
| 117 |
+
paths = _list_entries(api, prefix, token)
|
| 118 |
+
todo = _missing_stills(paths)
|
| 119 |
+
if args.limit is not None:
|
| 120 |
+
todo = todo[: args.limit]
|
| 121 |
+
|
| 122 |
+
n_webp = sum(1 for p in paths if p.endswith("/" + WEBP_NAME))
|
| 123 |
+
print(
|
| 124 |
+
f"Found {n_webp} edit_diff.webp; {len(todo)} missing a still.",
|
| 125 |
+
flush=True,
|
| 126 |
+
)
|
| 127 |
+
if not todo:
|
| 128 |
+
print("Nothing to backfill.")
|
| 129 |
+
return 0
|
| 130 |
+
if args.dry_run:
|
| 131 |
+
for p in todo:
|
| 132 |
+
print(" would create", p[: -len(WEBP_NAME)] + PNG_NAME)
|
| 133 |
+
return 0
|
| 134 |
+
if not token:
|
| 135 |
+
parser.error("HF_TOKEN required to upload (or pass --dry-run).")
|
| 136 |
+
|
| 137 |
+
created = 0
|
| 138 |
+
with tempfile.TemporaryDirectory(prefix="edit-diff-still-") as tmp:
|
| 139 |
+
tmp_dir = Path(tmp)
|
| 140 |
+
add: list[tuple[str, str]] = []
|
| 141 |
+
for i, webp_path in enumerate(todo, start=1):
|
| 142 |
+
dest = webp_path[: -len(WEBP_NAME)] + PNG_NAME
|
| 143 |
+
try:
|
| 144 |
+
png = first_frame_png(_download(webp_path, token))
|
| 145 |
+
except Exception as e: # noqa: BLE001 - skip a bad clip, keep going
|
| 146 |
+
print(f" [{i}/{len(todo)}] SKIP {webp_path} ({type(e).__name__}: {e})")
|
| 147 |
+
continue
|
| 148 |
+
local = tmp_dir / f"{i}.png"
|
| 149 |
+
local.write_bytes(png)
|
| 150 |
+
add.append((str(local), dest))
|
| 151 |
+
print(f" [{i}/{len(todo)}] {dest} ({len(png) // 1024} KB)", flush=True)
|
| 152 |
+
if len(add) >= UPLOAD_CHUNK:
|
| 153 |
+
api.batch_bucket_files(RENDER_BUCKET, add=add, token=token)
|
| 154 |
+
created += len(add)
|
| 155 |
+
add = []
|
| 156 |
+
if add:
|
| 157 |
+
api.batch_bucket_files(RENDER_BUCKET, add=add, token=token)
|
| 158 |
+
created += len(add)
|
| 159 |
+
|
| 160 |
+
print(f"Done. Uploaded {created} edit_diff.png still(s).", flush=True)
|
| 161 |
+
return 0
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
if __name__ == "__main__":
|
| 165 |
+
raise SystemExit(main())
|