#!/usr/bin/env python3 """Backfill the thumbnail-grid summary view into already-published reports. The report generator (``cadgenbench.eval.report.single_run``) now renders the summary view as a grouped thumbnail grid instead of a flat table. Reports produced before that change are static HTML files in the submissions dataset (``reports/.html``); changing the generator does nothing to them. This one-time tool rewrites those published reports **in place, without re-evaluating or regenerating from run dirs**: - it reads each report's existing summary table (sample number, status, CAD score) and detail cards (which fixtures are editing) plus the render-bucket base URL already embedded in the file; - rebuilds the summary view as the grid using the *shared* builders from ``single_run`` (so a backfilled report is byte-identical to a freshly generated one), pointing editing cards at the ``edit_diff.png`` still and generation cards at the output ``iso.png`` — all assets that already exist; - injects the shared grid CSS/JS; the detail cards, header, score text and download button are left untouched. Run on local files (writes alongside, good for eyeballing):: python tools/backfill_report_grid.py --files /tmp/report.html -o /tmp/out.html Rewrite every published report in the submissions dataset (needs a write token):: HF_TOKEN= python tools/backfill_report_grid.py --dataset python tools/backfill_report_grid.py --dataset --dry-run # list only """ from __future__ import annotations import argparse import os import re import sys from pathlib import Path from huggingface_hub import CommitOperationAdd, HfApi, hf_hub_download # cadgenbench (for the shared grid builders) must be importable. _REPO_ROOT = Path(__file__).resolve().parents[2] _SRC = _REPO_ROOT / "cadgenbench" / "src" if _SRC.is_dir(): sys.path.insert(0, str(_SRC)) from cadgenbench.eval.report.single_run import ( # noqa: E402 _GRID_CSS, _GRID_JS, _render_grid_controls, grid_card_html, render_grid_groups, ) HF_ORG = os.getenv("HF_ORG", "HuggingAI4Engineering") SUBMISSIONS_REPO = os.getenv("HF_SUBMISSIONS_REPO", f"{HF_ORG}/cadgenbench-submissions") INPUT_PROXY_BASE = "/task-input" EDIT_DIFF_STILL = "edit_diff.png" # --- parsing the old flat-table report ------------------------------------- _RENDER_BASE_RE = re.compile( r'(https?://[^\s"\']+?/resolve/renders/[^/"\']+)/[^/"\']+/[^"\']+\.(?:png|webp)' ) _ROW_RE = re.compile( r']*>(.*?)', re.S ) _NAME_RE = re.compile(r"([^<]+)") _STATUS_RE = re.compile(r'status-pill status-\w+">([^<]+)<') _CAD_RE = re.compile(r'') _SUMMARY_VIEW_RE = re.compile(r'(
).*?(
)', re.S) _GRID_HELP = ( '

Click a card to view details. ' 'j/k to navigate, ' 'Esc to return. Each card shows the input and the ' "candidate output. Score tint: " "≥0.90 " "≥0.60 " "<0.60 CAD score.

" ) _INPUT_SHAPE_RE = re.compile(re.escape(INPUT_PROXY_BASE) + r"/[^\"']+/renders/") def _editing_idxs(doc: str) -> set[int]: """Indices whose detail card is an editing task (has a STEP input). Detected by the Input column showing the *starting shape's* renders (``/task-input//renders/...``), which the report emits for every editing sample because it derives from the ``input.step`` input. This is deliberately not keyed on the edit-diff turntable / ``(edit diff)`` heading: the old generator rendered an *invalid* editing candidate with the generation layout (no diff), so those markers miss invalid edits, whereas the starting-shape renders are always present. Matches the new generator's ``wants_shape`` grouping so a backfilled report and a freshly generated one classify identically. """ out: set[int] = set() for block in doc.split('
str | None: """Return the report rewritten with the grid summary view, or ``None``. ``None`` means "leave unchanged": the report is already a grid, or it isn't a hosted report we can rebuild (no render-bucket URL to point the output thumbnails at).""" if 'class="ggrid"' in doc or 'id="groups"' in doc: return None # already backfilled base_m = _RENDER_BASE_RE.search(doc) if not base_m: return None # not a hosted report (e.g. base64-inlined local report) render_base = base_m.group(1) edit_idxs = _editing_idxs(doc) gen_cards: list[str] = [] edit_cards: list[str] = [] for m in _ROW_RE.finditer(doc): idx = int(m.group(1)) cells = m.group(2) name_m = _NAME_RE.search(cells) if not name_m: continue name = name_m.group(1).strip() status_m = _STATUS_RE.search(cells) status = status_m.group(1).strip() if status_m else "?" cad_m = _CAD_RE.search(cells) cad: float | None = None if cad_m: try: v = float(cad_m.group(1)) cad = v if v >= 0 else None except ValueError: cad = None is_editing = idx in edit_idxs if is_editing: in_src = f"{INPUT_PROXY_BASE}/{name}/renders/iso.png" out_src = f"{render_base}/{name}/{EDIT_DIFF_STILL}" else: in_src = f"{INPUT_PROXY_BASE}/{name}/input.png" out_src = f"{render_base}/{name}/iso.png" card = grid_card_html( idx=idx, name=name, is_editing=is_editing, status=status, cad=cad, in_src=in_src, out_src=out_src, ) (edit_cards if is_editing else gen_cards).append(card) if not gen_cards and not edit_cards: return None new_inner = _GRID_HELP + _render_grid_controls() + render_grid_groups( gen_cards, edit_cards, ) if not _SUMMARY_VIEW_RE.search(doc): return None doc = _SUMMARY_VIEW_RE.sub( lambda mm: mm.group(1) + new_inner + "
", doc, count=1, ) # Inject the shared grid styles + filtering behavior. doc = doc.replace("", _GRID_CSS + "", 1) doc = doc.replace("", f"", 1) return doc def _run_files(files: list[Path], out: Path | None) -> int: for f in files: doc = f.read_text() new = rewrite_report_html(doc) if new is None: print(f" SKIP {f} (already grid / not a hosted report)") continue dest = out or f dest.write_text(new) print(f" wrote {dest} ({len(new) // 1024} KB)") return 0 def _run_dataset(api: HfApi, token: str | None, dry_run: bool, limit: int | None) -> int: files = [ f for f in api.list_repo_files(SUBMISSIONS_REPO, repo_type="dataset") if f.startswith("reports/") and f.endswith(".html") ] files.sort() if limit is not None: files = files[:limit] print(f"Found {len(files)} report(s) in {SUBMISSIONS_REPO}.") ops: list[CommitOperationAdd] = [] for i, rel in enumerate(files, start=1): local = hf_hub_download( repo_id=SUBMISSIONS_REPO, filename=rel, repo_type="dataset", token=token, ) new = rewrite_report_html(Path(local).read_text()) if new is None: print(f" [{i}/{len(files)}] SKIP {rel} (already grid / not hosted)") continue print(f" [{i}/{len(files)}] {rel} -> grid ({len(new) // 1024} KB)") if not dry_run: ops.append(CommitOperationAdd(path_in_repo=rel, path_or_fileobj=new.encode())) if dry_run: print(f"Dry run: would rewrite {len([f for f in files])} candidate(s).") return 0 if not ops: print("Nothing to rewrite.") return 0 if not token: print("HF_TOKEN required to commit.", file=sys.stderr) return 2 api.create_commit( repo_id=SUBMISSIONS_REPO, repo_type="dataset", operations=ops, commit_message="reports: backfill thumbnail-grid summary view", ) print(f"Committed {len(ops)} rewritten report(s) to {SUBMISSIONS_REPO}.") return 0 def main() -> int: parser = argparse.ArgumentParser(description=__doc__) src = parser.add_mutually_exclusive_group(required=True) src.add_argument("--files", nargs="+", type=Path, help="Local report HTML files.") src.add_argument( "--dataset", action="store_true", help="Rewrite every reports/*.html in the submissions dataset.", ) parser.add_argument("-o", "--output", type=Path, help="Output path (single --files).") parser.add_argument("--dry-run", action="store_true", help="List only (dataset mode).") parser.add_argument("--limit", type=int, default=None) args = parser.parse_args() if args.files: if args.output and len(args.files) != 1: parser.error("-o/--output only valid with a single --files argument.") return _run_files(args.files, args.output) token = os.environ.get("HF_TOKEN") return _run_dataset(HfApi(token=token), token, args.dry_run, args.limit) if __name__ == "__main__": raise SystemExit(main())