Michael Rabinovich Cursor commited on
Commit ·
ba3eefb
1
Parent(s): 2893b22
leaderboard: backfill tool to grid-ify already-published reports
Browse filesRewrites the summary view of published reports/<id>.html in the submissions
dataset from the old flat table to the thumbnail grid, in place and without
re-evaluating. Parses each report's existing rows (sample/status/CAD) + the
render-bucket base already embedded in the file, classifies editing by the
starting-shape renders (so invalid edits group correctly), and rebuilds the
grid via single_run's shared builders (byte-identical to a fresh report),
pointing editing cards at edit_diff.png and generation cards at output iso.
Injects the shared grid CSS/JS; detail cards, header and download button are
left untouched. Supports --files and --dataset (--dry-run); idempotent.
Co-authored-by: Cursor <cursoragent@cursor.com>
- tools/backfill_report_grid.py +242 -0
tools/backfill_report_grid.py
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Backfill the thumbnail-grid summary view into already-published reports.
|
| 3 |
+
|
| 4 |
+
The report generator (``cadgenbench.eval.report.single_run``) now renders the
|
| 5 |
+
summary view as a grouped thumbnail grid instead of a flat table. Reports
|
| 6 |
+
produced before that change are static HTML files in the submissions dataset
|
| 7 |
+
(``reports/<id>.html``); changing the generator does nothing to them. This
|
| 8 |
+
one-time tool rewrites those published reports **in place, without re-evaluating
|
| 9 |
+
or regenerating from run dirs**:
|
| 10 |
+
|
| 11 |
+
- it reads each report's existing summary table (sample number, status, CAD
|
| 12 |
+
score) and detail cards (which fixtures are editing) plus the render-bucket
|
| 13 |
+
base URL already embedded in the file;
|
| 14 |
+
- rebuilds the summary view as the grid using the *shared* builders from
|
| 15 |
+
``single_run`` (so a backfilled report is byte-identical to a freshly
|
| 16 |
+
generated one), pointing editing cards at the ``edit_diff.png`` still and
|
| 17 |
+
generation cards at the output ``iso.png`` — all assets that already exist;
|
| 18 |
+
- injects the shared grid CSS/JS; the detail cards, header, score text and
|
| 19 |
+
download button are left untouched.
|
| 20 |
+
|
| 21 |
+
Run on local files (writes alongside, good for eyeballing)::
|
| 22 |
+
|
| 23 |
+
python tools/backfill_report_grid.py --files /tmp/report.html -o /tmp/out.html
|
| 24 |
+
|
| 25 |
+
Rewrite every published report in the submissions dataset (needs a write token)::
|
| 26 |
+
|
| 27 |
+
HF_TOKEN=<write-token> python tools/backfill_report_grid.py --dataset
|
| 28 |
+
python tools/backfill_report_grid.py --dataset --dry-run # list only
|
| 29 |
+
"""
|
| 30 |
+
from __future__ import annotations
|
| 31 |
+
|
| 32 |
+
import argparse
|
| 33 |
+
import os
|
| 34 |
+
import re
|
| 35 |
+
import sys
|
| 36 |
+
from pathlib import Path
|
| 37 |
+
|
| 38 |
+
from huggingface_hub import CommitOperationAdd, HfApi, hf_hub_download
|
| 39 |
+
|
| 40 |
+
# cadgenbench (for the shared grid builders) must be importable.
|
| 41 |
+
_REPO_ROOT = Path(__file__).resolve().parents[2]
|
| 42 |
+
_SRC = _REPO_ROOT / "cadgenbench" / "src"
|
| 43 |
+
if _SRC.is_dir():
|
| 44 |
+
sys.path.insert(0, str(_SRC))
|
| 45 |
+
|
| 46 |
+
from cadgenbench.eval.report.single_run import ( # noqa: E402
|
| 47 |
+
_GRID_CSS,
|
| 48 |
+
_GRID_JS,
|
| 49 |
+
_render_grid_controls,
|
| 50 |
+
grid_card_html,
|
| 51 |
+
render_grid_groups,
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
HF_ORG = os.getenv("HF_ORG", "HuggingAI4Engineering")
|
| 55 |
+
SUBMISSIONS_REPO = os.getenv("HF_SUBMISSIONS_REPO", f"{HF_ORG}/cadgenbench-submissions")
|
| 56 |
+
INPUT_PROXY_BASE = "/task-input"
|
| 57 |
+
EDIT_DIFF_STILL = "edit_diff.png"
|
| 58 |
+
|
| 59 |
+
# --- parsing the old flat-table report -------------------------------------
|
| 60 |
+
_RENDER_BASE_RE = re.compile(
|
| 61 |
+
r'(https?://[^\s"\']+?/resolve/renders/[^/"\']+)/[^/"\']+/[^"\']+\.(?:png|webp)'
|
| 62 |
+
)
|
| 63 |
+
_ROW_RE = re.compile(
|
| 64 |
+
r'<tr class="q-[a-z]+" onclick="showDetail\((\d+)\)"[^>]*>(.*?)</tr>', re.S
|
| 65 |
+
)
|
| 66 |
+
_NAME_RE = re.compile(r"<td>([^<]+)</td>")
|
| 67 |
+
_STATUS_RE = re.compile(r'status-pill status-\w+">([^<]+)<')
|
| 68 |
+
_CAD_RE = re.compile(r'<td data-v="([^"]+)"><b>')
|
| 69 |
+
_SUMMARY_VIEW_RE = re.compile(r'(<div id="summary-view">).*?(</div>)', re.S)
|
| 70 |
+
_GRID_HELP = (
|
| 71 |
+
'<p class="grid-help">Click a card to view details. '
|
| 72 |
+
'<span class="kbd">j</span>/<span class="kbd">k</span> to navigate, '
|
| 73 |
+
'<span class="kbd">Esc</span> to return. Each card shows the input and the '
|
| 74 |
+
"candidate output. Score tint: "
|
| 75 |
+
"<span class='gtint q-high'>≥0.90</span> "
|
| 76 |
+
"<span class='gtint q-mid'>≥0.60</span> "
|
| 77 |
+
"<span class='gtint q-low'><0.60</span> CAD score.</p>"
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
_INPUT_SHAPE_RE = re.compile(re.escape(INPUT_PROXY_BASE) + r"/[^\"']+/renders/")
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def _editing_idxs(doc: str) -> set[int]:
|
| 85 |
+
"""Indices whose detail card is an editing task (has a STEP input).
|
| 86 |
+
|
| 87 |
+
Detected by the Input column showing the *starting shape's* renders
|
| 88 |
+
(``/task-input/<fixture>/renders/...``), which the report emits for every
|
| 89 |
+
editing sample because it derives from the ``input.step`` input. This is
|
| 90 |
+
deliberately not keyed on the edit-diff turntable / ``(edit diff)`` heading:
|
| 91 |
+
the old generator rendered an *invalid* editing candidate with the
|
| 92 |
+
generation layout (no diff), so those markers miss invalid edits, whereas
|
| 93 |
+
the starting-shape renders are always present. Matches the new generator's
|
| 94 |
+
``wants_shape`` grouping so a backfilled report and a freshly generated one
|
| 95 |
+
classify identically.
|
| 96 |
+
"""
|
| 97 |
+
out: set[int] = set()
|
| 98 |
+
for block in doc.split('<div class="fixture-card"')[1:]:
|
| 99 |
+
m = re.match(r'\s*data-idx="(\d+)"', block)
|
| 100 |
+
if m and _INPUT_SHAPE_RE.search(block):
|
| 101 |
+
out.add(int(m.group(1)))
|
| 102 |
+
return out
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def rewrite_report_html(doc: str) -> str | None:
|
| 106 |
+
"""Return the report rewritten with the grid summary view, or ``None``.
|
| 107 |
+
|
| 108 |
+
``None`` means "leave unchanged": the report is already a grid, or it
|
| 109 |
+
isn't a hosted report we can rebuild (no render-bucket URL to point the
|
| 110 |
+
output thumbnails at)."""
|
| 111 |
+
if 'class="ggrid"' in doc or 'id="groups"' in doc:
|
| 112 |
+
return None # already backfilled
|
| 113 |
+
base_m = _RENDER_BASE_RE.search(doc)
|
| 114 |
+
if not base_m:
|
| 115 |
+
return None # not a hosted report (e.g. base64-inlined local report)
|
| 116 |
+
render_base = base_m.group(1)
|
| 117 |
+
edit_idxs = _editing_idxs(doc)
|
| 118 |
+
|
| 119 |
+
gen_cards: list[str] = []
|
| 120 |
+
edit_cards: list[str] = []
|
| 121 |
+
for m in _ROW_RE.finditer(doc):
|
| 122 |
+
idx = int(m.group(1))
|
| 123 |
+
cells = m.group(2)
|
| 124 |
+
name_m = _NAME_RE.search(cells)
|
| 125 |
+
if not name_m:
|
| 126 |
+
continue
|
| 127 |
+
name = name_m.group(1).strip()
|
| 128 |
+
status_m = _STATUS_RE.search(cells)
|
| 129 |
+
status = status_m.group(1).strip() if status_m else "?"
|
| 130 |
+
cad_m = _CAD_RE.search(cells)
|
| 131 |
+
cad: float | None = None
|
| 132 |
+
if cad_m:
|
| 133 |
+
try:
|
| 134 |
+
v = float(cad_m.group(1))
|
| 135 |
+
cad = v if v >= 0 else None
|
| 136 |
+
except ValueError:
|
| 137 |
+
cad = None
|
| 138 |
+
is_editing = idx in edit_idxs
|
| 139 |
+
if is_editing:
|
| 140 |
+
in_src = f"{INPUT_PROXY_BASE}/{name}/renders/iso.png"
|
| 141 |
+
out_src = f"{render_base}/{name}/{EDIT_DIFF_STILL}"
|
| 142 |
+
else:
|
| 143 |
+
in_src = f"{INPUT_PROXY_BASE}/{name}/input.png"
|
| 144 |
+
out_src = f"{render_base}/{name}/iso.png"
|
| 145 |
+
card = grid_card_html(
|
| 146 |
+
idx=idx, name=name, is_editing=is_editing, status=status,
|
| 147 |
+
cad=cad, in_src=in_src, out_src=out_src,
|
| 148 |
+
)
|
| 149 |
+
(edit_cards if is_editing else gen_cards).append(card)
|
| 150 |
+
|
| 151 |
+
if not gen_cards and not edit_cards:
|
| 152 |
+
return None
|
| 153 |
+
|
| 154 |
+
new_inner = _GRID_HELP + _render_grid_controls() + render_grid_groups(
|
| 155 |
+
gen_cards, edit_cards,
|
| 156 |
+
)
|
| 157 |
+
if not _SUMMARY_VIEW_RE.search(doc):
|
| 158 |
+
return None
|
| 159 |
+
doc = _SUMMARY_VIEW_RE.sub(
|
| 160 |
+
lambda mm: mm.group(1) + new_inner + "</div>", doc, count=1,
|
| 161 |
+
)
|
| 162 |
+
# Inject the shared grid styles + filtering behavior.
|
| 163 |
+
doc = doc.replace("</style>", _GRID_CSS + "</style>", 1)
|
| 164 |
+
doc = doc.replace("</body>", f"<script>{_GRID_JS}</script></body>", 1)
|
| 165 |
+
return doc
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def _run_files(files: list[Path], out: Path | None) -> int:
|
| 169 |
+
for f in files:
|
| 170 |
+
doc = f.read_text()
|
| 171 |
+
new = rewrite_report_html(doc)
|
| 172 |
+
if new is None:
|
| 173 |
+
print(f" SKIP {f} (already grid / not a hosted report)")
|
| 174 |
+
continue
|
| 175 |
+
dest = out or f
|
| 176 |
+
dest.write_text(new)
|
| 177 |
+
print(f" wrote {dest} ({len(new) // 1024} KB)")
|
| 178 |
+
return 0
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
def _run_dataset(api: HfApi, token: str | None, dry_run: bool, limit: int | None) -> int:
|
| 182 |
+
files = [
|
| 183 |
+
f for f in api.list_repo_files(SUBMISSIONS_REPO, repo_type="dataset")
|
| 184 |
+
if f.startswith("reports/") and f.endswith(".html")
|
| 185 |
+
]
|
| 186 |
+
files.sort()
|
| 187 |
+
if limit is not None:
|
| 188 |
+
files = files[:limit]
|
| 189 |
+
print(f"Found {len(files)} report(s) in {SUBMISSIONS_REPO}.")
|
| 190 |
+
ops: list[CommitOperationAdd] = []
|
| 191 |
+
for i, rel in enumerate(files, start=1):
|
| 192 |
+
local = hf_hub_download(
|
| 193 |
+
repo_id=SUBMISSIONS_REPO, filename=rel, repo_type="dataset", token=token,
|
| 194 |
+
)
|
| 195 |
+
new = rewrite_report_html(Path(local).read_text())
|
| 196 |
+
if new is None:
|
| 197 |
+
print(f" [{i}/{len(files)}] SKIP {rel} (already grid / not hosted)")
|
| 198 |
+
continue
|
| 199 |
+
print(f" [{i}/{len(files)}] {rel} -> grid ({len(new) // 1024} KB)")
|
| 200 |
+
if not dry_run:
|
| 201 |
+
ops.append(CommitOperationAdd(path_in_repo=rel, path_or_fileobj=new.encode()))
|
| 202 |
+
if dry_run:
|
| 203 |
+
print(f"Dry run: would rewrite {len([f for f in files])} candidate(s).")
|
| 204 |
+
return 0
|
| 205 |
+
if not ops:
|
| 206 |
+
print("Nothing to rewrite.")
|
| 207 |
+
return 0
|
| 208 |
+
if not token:
|
| 209 |
+
print("HF_TOKEN required to commit.", file=sys.stderr)
|
| 210 |
+
return 2
|
| 211 |
+
api.create_commit(
|
| 212 |
+
repo_id=SUBMISSIONS_REPO, repo_type="dataset", operations=ops,
|
| 213 |
+
commit_message="reports: backfill thumbnail-grid summary view",
|
| 214 |
+
)
|
| 215 |
+
print(f"Committed {len(ops)} rewritten report(s) to {SUBMISSIONS_REPO}.")
|
| 216 |
+
return 0
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
def main() -> int:
|
| 220 |
+
parser = argparse.ArgumentParser(description=__doc__)
|
| 221 |
+
src = parser.add_mutually_exclusive_group(required=True)
|
| 222 |
+
src.add_argument("--files", nargs="+", type=Path, help="Local report HTML files.")
|
| 223 |
+
src.add_argument(
|
| 224 |
+
"--dataset", action="store_true",
|
| 225 |
+
help="Rewrite every reports/*.html in the submissions dataset.",
|
| 226 |
+
)
|
| 227 |
+
parser.add_argument("-o", "--output", type=Path, help="Output path (single --files).")
|
| 228 |
+
parser.add_argument("--dry-run", action="store_true", help="List only (dataset mode).")
|
| 229 |
+
parser.add_argument("--limit", type=int, default=None)
|
| 230 |
+
args = parser.parse_args()
|
| 231 |
+
|
| 232 |
+
if args.files:
|
| 233 |
+
if args.output and len(args.files) != 1:
|
| 234 |
+
parser.error("-o/--output only valid with a single --files argument.")
|
| 235 |
+
return _run_files(args.files, args.output)
|
| 236 |
+
|
| 237 |
+
token = os.environ.get("HF_TOKEN")
|
| 238 |
+
return _run_dataset(HfApi(token=token), token, args.dry_run, args.limit)
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
if __name__ == "__main__":
|
| 242 |
+
raise SystemExit(main())
|