File size: 6,426 Bytes
c48c18f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 | #!/usr/bin/env python3
"""Backfill the "Download submission ZIP" button into existing HTML reports.
The report generator now renders a download button in the run header (see
``cadgenbench`` ``eval/report/single_run.py``). Reports published before that
change don't have it. Rather than re-running ``evaluate`` (expensive) or even
re-rendering from artifacts, this one-off tool patches the **already-stored**
``reports/<id>.html`` files in place: it injects the button's CSS + markup at a
stable anchor in the header and re-uploads. Pure HTML string edit, no eval, no
image re-render.
Idempotent: a report that already carries the button (``class="download-zip"``)
is left untouched, so re-running is safe.
The button links the submission's ``submissions/<id>.zip`` blob URL -- the same
artifact the gallery links and the freshly-generated reports point at -- so a
patched report is visually and behaviorally identical to a fresh one.
Usage (dry-run lists what would change; nothing is written)::
python tools/backfill_report_download_button.py
# actually patch + re-upload (needs a write-scoped HF_TOKEN):
python tools/backfill_report_download_button.py --apply
"""
from __future__ import annotations
import argparse
import html
import logging
import re
import sys
from pathlib import Path
# Import the Space's own read/identity helpers (this file lives in tools/, one
# level under the Space root).
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from huggingface_hub import CommitOperationAdd, HfApi, hf_hub_download # noqa: E402
from leaderboard import ( # noqa: E402
HF_SUBMISSIONS_REPO,
_load_rows_from_hub,
_report_relative_url,
)
from submit import REPORTS_DIR, _submission_zip_url # noqa: E402
logger = logging.getLogger(__name__)
# CSS injected before </style>. Mirrors the rules generate_html now emits so a
# patched report is byte-for-byte equivalent in the header to a fresh one.
_CSS_BLOCK = (
"\n.run-header-top { display: flex; align-items: center; "
"justify-content: space-between; gap: 16px; flex-wrap: wrap; }\n"
".run-header-top h1 { border-bottom: none; padding-bottom: 0; margin: 0; }\n"
".download-zip { background: #37474f; color: #fff; text-decoration: none; "
"padding: 8px 16px; border-radius: 6px; font-size: 0.9em; "
"font-weight: 600; white-space: nowrap; flex-shrink: 0; }\n"
".download-zip:hover { background: #455a64; }\n"
)
# Matches the pre-button header: the run-header div immediately followed by the
# <h1> title. Capture the title so we can re-wrap it in the flex top row.
_HEADER_RE = re.compile(
r'(<div class="run-header">)\s*(<h1>.*?</h1>)',
re.DOTALL,
)
def patch_html(doc: str, zip_url: str) -> str | None:
"""Return the patched HTML, or ``None`` if no change is needed/possible.
Injects the download-button CSS before ``</style>`` and wraps the header
title + button in a ``run-header-top`` flex row. Idempotent: returns
``None`` when the button is already present or the header anchor is missing.
"""
if 'class="download-zip"' in doc:
return None # already patched
href = html.escape(str(zip_url), quote=True)
button = (
f'<a class="download-zip" href="{href}" download rel="noopener">'
f"⬇ Download submission ZIP</a>"
)
new_doc, n = _HEADER_RE.subn(
r'\1\n<div class="run-header-top">\n\2\n' + button + "\n</div>",
doc,
count=1,
)
if n == 0:
return None # header shape not recognized; skip rather than corrupt
if "</style>" in new_doc:
new_doc = new_doc.replace("</style>", _CSS_BLOCK + "</style>", 1)
return new_doc
def _zip_url_for(row: dict) -> str:
"""Prefer the row's recorded blob URL; fall back to the canonical path."""
url = row.get("submission_blob_url")
if url:
return str(url)
return _submission_zip_url(row["submission_id"])
def main() -> int:
logging.basicConfig(level=logging.INFO, format="%(message)s")
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--apply", action="store_true",
help="Re-upload patched reports (default is a dry run).",
)
parser.add_argument(
"--limit", type=int, default=None,
help="Patch at most N reports (for a cautious first pass).",
)
args = parser.parse_args()
rows = _load_rows_from_hub()
# Only completed rows from the modern pipeline have a reports/<id>.html.
targets = [
r for r in rows
if r.get("submission_id")
and _report_relative_url(
r.get("submission_id"), r.get("status"), r.get("submission_sha256"),
)
]
logger.info("Found %d report(s) to consider.", len(targets))
ops: list[CommitOperationAdd] = []
skipped = 0
for row in targets:
sid = row["submission_id"]
try:
local = hf_hub_download(
repo_id=HF_SUBMISSIONS_REPO,
repo_type="dataset",
filename=f"{REPORTS_DIR}/{sid}.html",
)
except Exception as e: # noqa: BLE001
logger.warning(" skip %s: could not fetch report (%s)", sid, e)
skipped += 1
continue
doc = Path(local).read_text(encoding="utf-8")
patched = patch_html(doc, _zip_url_for(row))
if patched is None:
logger.info(" unchanged %s (already has button or no header)", sid)
skipped += 1
continue
logger.info(" patched %s", sid)
ops.append(
CommitOperationAdd(
path_in_repo=f"{REPORTS_DIR}/{sid}.html",
path_or_fileobj=patched.encode("utf-8"),
)
)
if args.limit is not None and len(ops) >= args.limit:
break
logger.info(
"%d to patch, %d unchanged/skipped.", len(ops), skipped,
)
if not ops:
logger.info("Nothing to do.")
return 0
if not args.apply:
logger.info("Dry run -- re-run with --apply to upload.")
return 0
HfApi().create_commit(
repo_id=HF_SUBMISSIONS_REPO,
repo_type="dataset",
operations=ops,
commit_message="reports: backfill download-submission-zip button",
)
logger.info("Uploaded %d patched report(s).", len(ops))
return 0
if __name__ == "__main__":
sys.exit(main())
|