| |
| """Backfill the "Download submission ZIP" button into existing HTML reports. |
| |
| The report generator now renders a download button in the run header (see |
| ``cadgenbench`` ``eval/report/single_run.py``). Reports published before that |
| change don't have it. Rather than re-running ``evaluate`` (expensive) or even |
| re-rendering from artifacts, this one-off tool patches the **already-stored** |
| ``reports/<id>.html`` files in place: it injects the button's CSS + markup at a |
| stable anchor in the header and re-uploads. Pure HTML string edit, no eval, no |
| image re-render. |
| |
| Idempotent: a report that already carries the button (``class="download-zip"``) |
| is left untouched, so re-running is safe. |
| |
| The button links the submission's ``submissions/<id>.zip`` blob URL -- the same |
| artifact the gallery links and the freshly-generated reports point at -- so a |
| patched report is visually and behaviorally identical to a fresh one. |
| |
| Usage (dry-run lists what would change; nothing is written):: |
| |
| python tools/backfill_report_download_button.py |
| |
| # actually patch + re-upload (needs a write-scoped HF_TOKEN): |
| python tools/backfill_report_download_button.py --apply |
| """ |
| from __future__ import annotations |
|
|
| import argparse |
| import html |
| import logging |
| import re |
| import sys |
| from pathlib import Path |
|
|
| |
| |
| sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) |
|
|
| from huggingface_hub import CommitOperationAdd, HfApi, hf_hub_download |
|
|
| from leaderboard import ( |
| HF_SUBMISSIONS_REPO, |
| _load_rows_from_hub, |
| _report_relative_url, |
| ) |
| from submit import REPORTS_DIR, _submission_zip_url |
|
|
| logger = logging.getLogger(__name__) |
|
|
| |
| |
| _CSS_BLOCK = ( |
| "\n.run-header-top { display: flex; align-items: center; " |
| "justify-content: space-between; gap: 16px; flex-wrap: wrap; }\n" |
| ".run-header-top h1 { border-bottom: none; padding-bottom: 0; margin: 0; }\n" |
| ".download-zip { background: #37474f; color: #fff; text-decoration: none; " |
| "padding: 8px 16px; border-radius: 6px; font-size: 0.9em; " |
| "font-weight: 600; white-space: nowrap; flex-shrink: 0; }\n" |
| ".download-zip:hover { background: #455a64; }\n" |
| ) |
|
|
| |
| |
| _HEADER_RE = re.compile( |
| r'(<div class="run-header">)\s*(<h1>.*?</h1>)', |
| re.DOTALL, |
| ) |
|
|
|
|
| def patch_html(doc: str, zip_url: str) -> str | None: |
| """Return the patched HTML, or ``None`` if no change is needed/possible. |
| |
| Injects the download-button CSS before ``</style>`` and wraps the header |
| title + button in a ``run-header-top`` flex row. Idempotent: returns |
| ``None`` when the button is already present or the header anchor is missing. |
| """ |
| if 'class="download-zip"' in doc: |
| return None |
|
|
| href = html.escape(str(zip_url), quote=True) |
| button = ( |
| f'<a class="download-zip" href="{href}" download rel="noopener">' |
| f"⬇ Download submission ZIP</a>" |
| ) |
|
|
| new_doc, n = _HEADER_RE.subn( |
| r'\1\n<div class="run-header-top">\n\2\n' + button + "\n</div>", |
| doc, |
| count=1, |
| ) |
| if n == 0: |
| return None |
|
|
| if "</style>" in new_doc: |
| new_doc = new_doc.replace("</style>", _CSS_BLOCK + "</style>", 1) |
| return new_doc |
|
|
|
|
| def _zip_url_for(row: dict) -> str: |
| """Prefer the row's recorded blob URL; fall back to the canonical path.""" |
| url = row.get("submission_blob_url") |
| if url: |
| return str(url) |
| return _submission_zip_url(row["submission_id"]) |
|
|
|
|
| def main() -> int: |
| logging.basicConfig(level=logging.INFO, format="%(message)s") |
| parser = argparse.ArgumentParser(description=__doc__) |
| parser.add_argument( |
| "--apply", action="store_true", |
| help="Re-upload patched reports (default is a dry run).", |
| ) |
| parser.add_argument( |
| "--limit", type=int, default=None, |
| help="Patch at most N reports (for a cautious first pass).", |
| ) |
| args = parser.parse_args() |
|
|
| rows = _load_rows_from_hub() |
| |
| targets = [ |
| r for r in rows |
| if r.get("submission_id") |
| and _report_relative_url( |
| r.get("submission_id"), r.get("status"), r.get("submission_sha256"), |
| ) |
| ] |
| logger.info("Found %d report(s) to consider.", len(targets)) |
|
|
| ops: list[CommitOperationAdd] = [] |
| skipped = 0 |
| for row in targets: |
| sid = row["submission_id"] |
| try: |
| local = hf_hub_download( |
| repo_id=HF_SUBMISSIONS_REPO, |
| repo_type="dataset", |
| filename=f"{REPORTS_DIR}/{sid}.html", |
| ) |
| except Exception as e: |
| logger.warning(" skip %s: could not fetch report (%s)", sid, e) |
| skipped += 1 |
| continue |
| doc = Path(local).read_text(encoding="utf-8") |
| patched = patch_html(doc, _zip_url_for(row)) |
| if patched is None: |
| logger.info(" unchanged %s (already has button or no header)", sid) |
| skipped += 1 |
| continue |
| logger.info(" patched %s", sid) |
| ops.append( |
| CommitOperationAdd( |
| path_in_repo=f"{REPORTS_DIR}/{sid}.html", |
| path_or_fileobj=patched.encode("utf-8"), |
| ) |
| ) |
| if args.limit is not None and len(ops) >= args.limit: |
| break |
|
|
| logger.info( |
| "%d to patch, %d unchanged/skipped.", len(ops), skipped, |
| ) |
| if not ops: |
| logger.info("Nothing to do.") |
| return 0 |
| if not args.apply: |
| logger.info("Dry run -- re-run with --apply to upload.") |
| return 0 |
|
|
| HfApi().create_commit( |
| repo_id=HF_SUBMISSIONS_REPO, |
| repo_type="dataset", |
| operations=ops, |
| commit_message="reports: backfill download-submission-zip button", |
| ) |
| logger.info("Uploaded %d patched report(s).", len(ops)) |
| return 0 |
|
|
|
|
| if __name__ == "__main__": |
| sys.exit(main()) |
|
|