#!/usr/bin/env python3 """Backfill the corrected interface/edit-diff legends into existing reports. The report generator now renders color-chip legends that match the render palettes (see ``cadgenbench`` ``eval/report/single_run.py``): - **Interface overlay**: the old legend omitted the dominant blue (the part itself) and used vague "free/filled sub-volumes" wording. The new legend is ``your part / keep-out (must stay empty) / keep-in (must be filled) / disagreement`` with matching chips. - **Edit diff** (editing fixtures): previously had no legend; the new one is ``your output / extra material vs GT / missing material vs GT``. Reports published before that change still carry the old/absent legends. Rather than re-running ``evaluate``, this one-off tool patches the already-stored ``reports/.html`` files in place: it swaps the old interface legend, adds the edit-diff legend, and injects the chip CSS, then re-uploads. Idempotent: a report already carrying the new chips (``.legend-chip``) has its interface/edit anchors absent, so re-running is a no-op. Usage (dry-run lists what would change; nothing is written):: python tools/backfill_report_legends.py # actually patch + re-upload (needs a write-scoped HF_TOKEN): python tools/backfill_report_legends.py --apply """ from __future__ import annotations import argparse import logging import re import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) from huggingface_hub import CommitOperationAdd, HfApi, hf_hub_download # noqa: E402 from leaderboard import ( # noqa: E402 HF_SUBMISSIONS_REPO, _load_rows_from_hub, _report_relative_url, ) from submit import REPORTS_DIR # noqa: E402 logger = logging.getLogger(__name__) # These three strings MUST stay byte-for-byte identical to what # single_run.py emits (_legend_html(_IFACE_LEGEND) / _legend_html( # _EDIT_DIFF_LEGEND) and the .legend/.legend-chip CSS) so a patched report is # indistinguishable from a freshly-generated one. A self-check in main() # compares against the live generator when it is importable. _IFACE_LEGEND_HTML = ( '' 'your part' '' "keep-out (must stay empty)" '' "keep-in (must be filled)" 'disagreement' "" ) _EDIT_LEGEND_HTML = ( '' 'your output' '' "extra material vs GT" '' "missing material vs GT" "" ) _CSS_BLOCK = ( "\n.legend { color: #6b7785; font-size: 0.78em; font-weight: 400; " "text-transform: none; letter-spacing: normal; line-height: 1.6; }\n" ".legend-chip { display: inline-block; width: 11px; height: 11px; " "border-radius: 3px; vertical-align: middle; " "margin: 0 5px 0 14px; border: 1px solid rgba(0,0,0,0.18); }\n" ) # Old interface legend span (any per-fixture occurrence). _OLD_IFACE_RE = re.compile( r".*?", re.DOTALL, ) # Bare edit-diff heading (no legend yet). _BARE_EDIT_H3 = "

Output vs ground truth (edit diff)

" _NEW_EDIT_H3 = f"

Output vs ground truth (edit diff) {_EDIT_LEGEND_HTML}

" def patch_html(doc: str) -> str | None: """Return the patched HTML, or ``None`` when nothing needs changing. Swaps the old interface legend, adds the edit-diff legend, and injects the chip CSS. Idempotent: each sub-edit's anchor disappears once applied. """ new = _OLD_IFACE_RE.sub(lambda _m: _IFACE_LEGEND_HTML, doc) new = new.replace(_BARE_EDIT_H3, _NEW_EDIT_H3) changed = new != doc if changed and ".legend-chip" not in new and "" in new: new = new.replace("", _CSS_BLOCK + "", 1) return new if changed else None def main() -> int: logging.basicConfig(level=logging.INFO, format="%(message)s") parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--apply", action="store_true", help="Re-upload patched reports (default is a dry run).", ) args = parser.parse_args() rows = _load_rows_from_hub() targets = [ r for r in rows if r.get("submission_id") and _report_relative_url( r.get("submission_id"), r.get("status"), r.get("submission_sha256"), ) ] logger.info("Found %d report(s) to consider.", len(targets)) ops: list[CommitOperationAdd] = [] skipped = 0 for row in targets: sid = row["submission_id"] try: local = hf_hub_download( repo_id=HF_SUBMISSIONS_REPO, repo_type="dataset", filename=f"{REPORTS_DIR}/{sid}.html", ) except Exception as e: # noqa: BLE001 logger.warning(" skip %s: could not fetch report (%s)", sid, e) skipped += 1 continue doc = Path(local).read_text(encoding="utf-8") patched = patch_html(doc) if patched is None: logger.info(" unchanged %s", sid) skipped += 1 continue logger.info(" patched %s", sid) ops.append( CommitOperationAdd( path_in_repo=f"{REPORTS_DIR}/{sid}.html", path_or_fileobj=patched.encode("utf-8"), ) ) logger.info("%d to patch, %d unchanged/skipped.", len(ops), skipped) if not ops: logger.info("Nothing to do.") return 0 if not args.apply: logger.info("Dry run -- re-run with --apply to upload.") return 0 HfApi().create_commit( repo_id=HF_SUBMISSIONS_REPO, repo_type="dataset", operations=ops, commit_message="reports: backfill corrected interface + edit-diff legends", ) logger.info("Uploaded %d patched report(s).", len(ops)) return 0 if __name__ == "__main__": sys.exit(main())