#!/usr/bin/env python3 """Backfill the "Download submission ZIP" button into existing HTML reports. The report generator now renders a download button in the run header (see ``cadgenbench`` ``eval/report/single_run.py``). Reports published before that change don't have it. Rather than re-running ``evaluate`` (expensive) or even re-rendering from artifacts, this one-off tool patches the **already-stored** ``reports/.html`` files in place: it injects the button's CSS + markup at a stable anchor in the header and re-uploads. Pure HTML string edit, no eval, no image re-render. Idempotent: a report that already carries the button (``class="download-zip"``) is left untouched, so re-running is safe. The button links the submission's ``submissions/.zip`` blob URL -- the same artifact the gallery links and the freshly-generated reports point at -- so a patched report is visually and behaviorally identical to a fresh one. Usage (dry-run lists what would change; nothing is written):: python tools/backfill_report_download_button.py # actually patch + re-upload (needs a write-scoped HF_TOKEN): python tools/backfill_report_download_button.py --apply """ from __future__ import annotations import argparse import html import logging import re import sys from pathlib import Path # Import the Space's own read/identity helpers (this file lives in tools/, one # level under the Space root). sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) from huggingface_hub import CommitOperationAdd, HfApi, hf_hub_download # noqa: E402 from leaderboard import ( # noqa: E402 HF_SUBMISSIONS_REPO, _load_rows_from_hub, _report_relative_url, ) from submit import REPORTS_DIR, _submission_zip_url # noqa: E402 logger = logging.getLogger(__name__) # CSS injected before . Mirrors the rules generate_html now emits so a # patched report is byte-for-byte equivalent in the header to a fresh one. _CSS_BLOCK = ( "\n.run-header-top { display: flex; align-items: center; " "justify-content: space-between; gap: 16px; flex-wrap: wrap; }\n" ".run-header-top h1 { border-bottom: none; padding-bottom: 0; margin: 0; }\n" ".download-zip { background: #37474f; color: #fff; text-decoration: none; " "padding: 8px 16px; border-radius: 6px; font-size: 0.9em; " "font-weight: 600; white-space: nowrap; flex-shrink: 0; }\n" ".download-zip:hover { background: #455a64; }\n" ) # Matches the pre-button header: the run-header div immediately followed by the #

title. Capture the title so we can re-wrap it in the flex top row. _HEADER_RE = re.compile( r'(
)\s*(

.*?

)', re.DOTALL, ) def patch_html(doc: str, zip_url: str) -> str | None: """Return the patched HTML, or ``None`` if no change is needed/possible. Injects the download-button CSS before ```` and wraps the header title + button in a ``run-header-top`` flex row. Idempotent: returns ``None`` when the button is already present or the header anchor is missing. """ if 'class="download-zip"' in doc: return None # already patched href = html.escape(str(zip_url), quote=True) button = ( f'' f"⬇ Download submission ZIP" ) new_doc, n = _HEADER_RE.subn( r'\1\n
\n\2\n' + button + "\n
", doc, count=1, ) if n == 0: return None # header shape not recognized; skip rather than corrupt if "" in new_doc: new_doc = new_doc.replace("", _CSS_BLOCK + "", 1) return new_doc def _zip_url_for(row: dict) -> str: """Prefer the row's recorded blob URL; fall back to the canonical path.""" url = row.get("submission_blob_url") if url: return str(url) return _submission_zip_url(row["submission_id"]) def main() -> int: logging.basicConfig(level=logging.INFO, format="%(message)s") parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--apply", action="store_true", help="Re-upload patched reports (default is a dry run).", ) parser.add_argument( "--limit", type=int, default=None, help="Patch at most N reports (for a cautious first pass).", ) args = parser.parse_args() rows = _load_rows_from_hub() # Only completed rows from the modern pipeline have a reports/.html. targets = [ r for r in rows if r.get("submission_id") and _report_relative_url( r.get("submission_id"), r.get("status"), r.get("submission_sha256"), ) ] logger.info("Found %d report(s) to consider.", len(targets)) ops: list[CommitOperationAdd] = [] skipped = 0 for row in targets: sid = row["submission_id"] try: local = hf_hub_download( repo_id=HF_SUBMISSIONS_REPO, repo_type="dataset", filename=f"{REPORTS_DIR}/{sid}.html", ) except Exception as e: # noqa: BLE001 logger.warning(" skip %s: could not fetch report (%s)", sid, e) skipped += 1 continue doc = Path(local).read_text(encoding="utf-8") patched = patch_html(doc, _zip_url_for(row)) if patched is None: logger.info(" unchanged %s (already has button or no header)", sid) skipped += 1 continue logger.info(" patched %s", sid) ops.append( CommitOperationAdd( path_in_repo=f"{REPORTS_DIR}/{sid}.html", path_or_fileobj=patched.encode("utf-8"), ) ) if args.limit is not None and len(ops) >= args.limit: break logger.info( "%d to patch, %d unchanged/skipped.", len(ops), skipped, ) if not ops: logger.info("Nothing to do.") return 0 if not args.apply: logger.info("Dry run -- re-run with --apply to upload.") return 0 HfApi().create_commit( repo_id=HF_SUBMISSIONS_REPO, repo_type="dataset", operations=ops, commit_message="reports: backfill download-submission-zip button", ) logger.info("Uploaded %d patched report(s).", len(ops)) return 0 if __name__ == "__main__": sys.exit(main())