File size: 6,426 Bytes
c48c18f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#!/usr/bin/env python3
"""Backfill the "Download submission ZIP" button into existing HTML reports.

The report generator now renders a download button in the run header (see
``cadgenbench`` ``eval/report/single_run.py``). Reports published before that
change don't have it. Rather than re-running ``evaluate`` (expensive) or even
re-rendering from artifacts, this one-off tool patches the **already-stored**
``reports/<id>.html`` files in place: it injects the button's CSS + markup at a
stable anchor in the header and re-uploads. Pure HTML string edit, no eval, no
image re-render.

Idempotent: a report that already carries the button (``class="download-zip"``)
is left untouched, so re-running is safe.

The button links the submission's ``submissions/<id>.zip`` blob URL -- the same
artifact the gallery links and the freshly-generated reports point at -- so a
patched report is visually and behaviorally identical to a fresh one.

Usage (dry-run lists what would change; nothing is written)::

    python tools/backfill_report_download_button.py

    # actually patch + re-upload (needs a write-scoped HF_TOKEN):
    python tools/backfill_report_download_button.py --apply
"""
from __future__ import annotations

import argparse
import html
import logging
import re
import sys
from pathlib import Path

# Import the Space's own read/identity helpers (this file lives in tools/, one
# level under the Space root).
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

from huggingface_hub import CommitOperationAdd, HfApi, hf_hub_download  # noqa: E402

from leaderboard import (  # noqa: E402
    HF_SUBMISSIONS_REPO,
    _load_rows_from_hub,
    _report_relative_url,
)
from submit import REPORTS_DIR, _submission_zip_url  # noqa: E402

logger = logging.getLogger(__name__)

# CSS injected before </style>. Mirrors the rules generate_html now emits so a
# patched report is byte-for-byte equivalent in the header to a fresh one.
_CSS_BLOCK = (
    "\n.run-header-top { display: flex; align-items: center; "
    "justify-content: space-between; gap: 16px; flex-wrap: wrap; }\n"
    ".run-header-top h1 { border-bottom: none; padding-bottom: 0; margin: 0; }\n"
    ".download-zip { background: #37474f; color: #fff; text-decoration: none; "
    "padding: 8px 16px; border-radius: 6px; font-size: 0.9em; "
    "font-weight: 600; white-space: nowrap; flex-shrink: 0; }\n"
    ".download-zip:hover { background: #455a64; }\n"
)

# Matches the pre-button header: the run-header div immediately followed by the
# <h1> title. Capture the title so we can re-wrap it in the flex top row.
_HEADER_RE = re.compile(
    r'(<div class="run-header">)\s*(<h1>.*?</h1>)',
    re.DOTALL,
)


def patch_html(doc: str, zip_url: str) -> str | None:
    """Return the patched HTML, or ``None`` if no change is needed/possible.

    Injects the download-button CSS before ``</style>`` and wraps the header
    title + button in a ``run-header-top`` flex row. Idempotent: returns
    ``None`` when the button is already present or the header anchor is missing.
    """
    if 'class="download-zip"' in doc:
        return None  # already patched

    href = html.escape(str(zip_url), quote=True)
    button = (
        f'<a class="download-zip" href="{href}" download rel="noopener">'
        f"&#11015; Download submission ZIP</a>"
    )

    new_doc, n = _HEADER_RE.subn(
        r'\1\n<div class="run-header-top">\n\2\n' + button + "\n</div>",
        doc,
        count=1,
    )
    if n == 0:
        return None  # header shape not recognized; skip rather than corrupt

    if "</style>" in new_doc:
        new_doc = new_doc.replace("</style>", _CSS_BLOCK + "</style>", 1)
    return new_doc


def _zip_url_for(row: dict) -> str:
    """Prefer the row's recorded blob URL; fall back to the canonical path."""
    url = row.get("submission_blob_url")
    if url:
        return str(url)
    return _submission_zip_url(row["submission_id"])


def main() -> int:
    logging.basicConfig(level=logging.INFO, format="%(message)s")
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "--apply", action="store_true",
        help="Re-upload patched reports (default is a dry run).",
    )
    parser.add_argument(
        "--limit", type=int, default=None,
        help="Patch at most N reports (for a cautious first pass).",
    )
    args = parser.parse_args()

    rows = _load_rows_from_hub()
    # Only completed rows from the modern pipeline have a reports/<id>.html.
    targets = [
        r for r in rows
        if r.get("submission_id")
        and _report_relative_url(
            r.get("submission_id"), r.get("status"), r.get("submission_sha256"),
        )
    ]
    logger.info("Found %d report(s) to consider.", len(targets))

    ops: list[CommitOperationAdd] = []
    skipped = 0
    for row in targets:
        sid = row["submission_id"]
        try:
            local = hf_hub_download(
                repo_id=HF_SUBMISSIONS_REPO,
                repo_type="dataset",
                filename=f"{REPORTS_DIR}/{sid}.html",
            )
        except Exception as e:  # noqa: BLE001
            logger.warning("  skip %s: could not fetch report (%s)", sid, e)
            skipped += 1
            continue
        doc = Path(local).read_text(encoding="utf-8")
        patched = patch_html(doc, _zip_url_for(row))
        if patched is None:
            logger.info("  unchanged %s (already has button or no header)", sid)
            skipped += 1
            continue
        logger.info("  patched   %s", sid)
        ops.append(
            CommitOperationAdd(
                path_in_repo=f"{REPORTS_DIR}/{sid}.html",
                path_or_fileobj=patched.encode("utf-8"),
            )
        )
        if args.limit is not None and len(ops) >= args.limit:
            break

    logger.info(
        "%d to patch, %d unchanged/skipped.", len(ops), skipped,
    )
    if not ops:
        logger.info("Nothing to do.")
        return 0
    if not args.apply:
        logger.info("Dry run -- re-run with --apply to upload.")
        return 0

    HfApi().create_commit(
        repo_id=HF_SUBMISSIONS_REPO,
        repo_type="dataset",
        operations=ops,
        commit_message="reports: backfill download-submission-zip button",
    )
    logger.info("Uploaded %d patched report(s).", len(ops))
    return 0


if __name__ == "__main__":
    sys.exit(main())