Michael Rabinovich Cursor commited on
Commit
c7f83a5
·
1 Parent(s): 1a24d4a

leaderboard: backfill tool for edit_diff.png stills in render bucket

Browse files

One-time tool to close the gap for submissions evaluated before the eval
pipeline started writing edit_diff.png: lists the render bucket, finds every
<fixture>/edit_diff.webp lacking a sibling edit_diff.png, extracts frame 0
via cadgenbench.common.imaging.first_frame_png (the exact frame the pipeline
now saves), and uploads the still to the same bucket prefix. Idempotent
(skips existing stills); --dry-run lists without a token.

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (1) hide show
  1. tools/backfill_edit_diff_still.py +165 -0
tools/backfill_edit_diff_still.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Backfill the static edit-diff still (``edit_diff.png``) into the render bucket.
3
+
4
+ Editing samples ship an *animated* ``edit_diff.webp`` turntable but no static
5
+ frame. The grid thumbnail needs a still (an animated WebP can't be frozen to one
6
+ angle in HTML), so the eval pipeline now also writes ``edit_diff.png`` (frame 0)
7
+ beside the clip — but submissions evaluated *before* that change only have the
8
+ WebP in the bucket.
9
+
10
+ This one-time tool closes that gap **without re-evaluating or re-rendering**: it
11
+ lists the public render bucket, finds every ``.../<fixture>/edit_diff.webp`` that
12
+ has no sibling ``edit_diff.png``, downloads the WebP, extracts frame 0 via the
13
+ shared :func:`cadgenbench.common.viewer.first_frame_png` (the exact frame the
14
+ forward pipeline now saves), and uploads ``edit_diff.png`` next to it in the same
15
+ bucket prefix — so it serves by the same anonymous render URL as every other
16
+ render.
17
+
18
+ Run (needs a write-scoped ``HF_TOKEN`` for the bucket)::
19
+
20
+ HF_TOKEN=<write-token> python tools/backfill_edit_diff_still.py # all submissions
21
+ HF_TOKEN=<write-token> python tools/backfill_edit_diff_still.py --submission <id>
22
+ python tools/backfill_edit_diff_still.py --dry-run # list only, no token needed
23
+ """
24
+ from __future__ import annotations
25
+
26
+ import argparse
27
+ import os
28
+ import sys
29
+ import tempfile
30
+ import urllib.request
31
+ from pathlib import Path
32
+
33
+ from huggingface_hub import HfApi
34
+
35
+ # cadgenbench (for the shared frame extractor) must be importable; allow running
36
+ # straight from the repo without installing the leaderboard package.
37
+ _REPO_ROOT = Path(__file__).resolve().parents[2]
38
+ _SRC = _REPO_ROOT / "cadgenbench" / "src"
39
+ if _SRC.is_dir():
40
+ sys.path.insert(0, str(_SRC))
41
+
42
+ from cadgenbench.common.imaging import first_frame_png # noqa: E402
43
+
44
+ HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co").rstrip("/")
45
+ HF_ORG = os.getenv("HF_ORG", "HuggingAI4Engineering")
46
+ # Mirror leaderboard.py's defaults so the tool targets the same bucket the eval
47
+ # job uploads to and the report/gallery read from.
48
+ RENDER_BUCKET = os.getenv("HF_RENDER_BUCKET", f"{HF_ORG}/cadgenbench-eval-staging")
49
+ RENDER_PREFIX = "renders"
50
+ WEBP_NAME = "edit_diff.webp"
51
+ PNG_NAME = "edit_diff.png"
52
+ # One upload call per this many files: keeps an individual batch small and
53
+ # rate-limit friendly while still amortising the request overhead.
54
+ UPLOAD_CHUNK = 50
55
+
56
+
57
+ def _resolve_url(path: str) -> str:
58
+ """Anonymous bucket resolve URL for a bucket-relative object path."""
59
+ return f"{HF_ENDPOINT}/buckets/{RENDER_BUCKET}/resolve/{path}"
60
+
61
+
62
+ def _download(path: str, token: str | None) -> bytes:
63
+ req = urllib.request.Request(_resolve_url(path))
64
+ if token:
65
+ req.add_header("Authorization", f"Bearer {token}")
66
+ with urllib.request.urlopen(req, timeout=60) as resp:
67
+ return resp.read()
68
+
69
+
70
+ def _list_entries(api: HfApi, prefix: str, token: str | None) -> list[str]:
71
+ """Bucket-relative file paths under *prefix* (folders filtered out)."""
72
+ return [
73
+ e.path
74
+ for e in api.list_bucket_tree(
75
+ RENDER_BUCKET, prefix=prefix, recursive=True, token=token,
76
+ )
77
+ if getattr(e, "path", None)
78
+ and not getattr(e, "is_folder", False)
79
+ and not e.path.endswith("/")
80
+ ]
81
+
82
+
83
+ def _missing_stills(paths: list[str]) -> list[str]:
84
+ """WebP paths whose sibling ``edit_diff.png`` is absent from the bucket."""
85
+ present = set(paths)
86
+ out = []
87
+ for p in paths:
88
+ if p.endswith("/" + WEBP_NAME):
89
+ sibling = p[: -len(WEBP_NAME)] + PNG_NAME
90
+ if sibling not in present:
91
+ out.append(p)
92
+ return sorted(out)
93
+
94
+
95
+ def main() -> int:
96
+ parser = argparse.ArgumentParser(description=__doc__)
97
+ parser.add_argument(
98
+ "--submission",
99
+ help="Limit to one submission id (the prefix is renders/<id>). "
100
+ "Omit to scan every submission in the bucket.",
101
+ )
102
+ parser.add_argument("--limit", type=int, default=None,
103
+ help="Process at most N stills (after listing).")
104
+ parser.add_argument(
105
+ "--dry-run", action="store_true",
106
+ help="List what would be created; download/upload nothing.",
107
+ )
108
+ args = parser.parse_args()
109
+
110
+ token = os.environ.get("HF_TOKEN")
111
+ api = HfApi(token=token)
112
+ prefix = (
113
+ f"{RENDER_PREFIX}/{args.submission}" if args.submission else RENDER_PREFIX
114
+ )
115
+
116
+ print(f"Scanning bucket {RENDER_BUCKET} under {prefix}/ …", flush=True)
117
+ paths = _list_entries(api, prefix, token)
118
+ todo = _missing_stills(paths)
119
+ if args.limit is not None:
120
+ todo = todo[: args.limit]
121
+
122
+ n_webp = sum(1 for p in paths if p.endswith("/" + WEBP_NAME))
123
+ print(
124
+ f"Found {n_webp} edit_diff.webp; {len(todo)} missing a still.",
125
+ flush=True,
126
+ )
127
+ if not todo:
128
+ print("Nothing to backfill.")
129
+ return 0
130
+ if args.dry_run:
131
+ for p in todo:
132
+ print(" would create", p[: -len(WEBP_NAME)] + PNG_NAME)
133
+ return 0
134
+ if not token:
135
+ parser.error("HF_TOKEN required to upload (or pass --dry-run).")
136
+
137
+ created = 0
138
+ with tempfile.TemporaryDirectory(prefix="edit-diff-still-") as tmp:
139
+ tmp_dir = Path(tmp)
140
+ add: list[tuple[str, str]] = []
141
+ for i, webp_path in enumerate(todo, start=1):
142
+ dest = webp_path[: -len(WEBP_NAME)] + PNG_NAME
143
+ try:
144
+ png = first_frame_png(_download(webp_path, token))
145
+ except Exception as e: # noqa: BLE001 - skip a bad clip, keep going
146
+ print(f" [{i}/{len(todo)}] SKIP {webp_path} ({type(e).__name__}: {e})")
147
+ continue
148
+ local = tmp_dir / f"{i}.png"
149
+ local.write_bytes(png)
150
+ add.append((str(local), dest))
151
+ print(f" [{i}/{len(todo)}] {dest} ({len(png) // 1024} KB)", flush=True)
152
+ if len(add) >= UPLOAD_CHUNK:
153
+ api.batch_bucket_files(RENDER_BUCKET, add=add, token=token)
154
+ created += len(add)
155
+ add = []
156
+ if add:
157
+ api.batch_bucket_files(RENDER_BUCKET, add=add, token=token)
158
+ created += len(add)
159
+
160
+ print(f"Done. Uploaded {created} edit_diff.png still(s).", flush=True)
161
+ return 0
162
+
163
+
164
+ if __name__ == "__main__":
165
+ raise SystemExit(main())