Spaces:

HuggingAI4Engineering
/

CADGenBench

Running

CADGenBench / tools /backfill_report_legends.py

Michael Rabinovich

leaderboard: bump cadgenbench to 1010043 + legend backfill tool

1a24d4a 5 days ago

6.29 kB

	#!/usr/bin/env python3
	"""Backfill the corrected interface/edit-diff legends into existing reports.

	The report generator now renders color-chip legends that match the render
	palettes (see ``cadgenbench`` ``eval/report/single_run.py``):

	- Interface overlay: the old legend omitted the dominant blue (the part
	itself) and used vague "free/filled sub-volumes" wording. The new legend is
	``your part / keep-out (must stay empty) / keep-in (must be filled) /
	disagreement`` with matching chips.
	- Edit diff (editing fixtures): previously had no legend; the new one is
	``your output / extra material vs GT / missing material vs GT``.

	Reports published before that change still carry the old/absent legends. Rather
	than re-running ``evaluate``, this one-off tool patches the already-stored
	``reports/<id>.html`` files in place: it swaps the old interface legend, adds
	the edit-diff legend, and injects the chip CSS, then re-uploads.

	Idempotent: a report already carrying the new chips (``.legend-chip``) has its
	interface/edit anchors absent, so re-running is a no-op.

	Usage (dry-run lists what would change; nothing is written)::

	python tools/backfill_report_legends.py

	# actually patch + re-upload (needs a write-scoped HF_TOKEN):
	python tools/backfill_report_legends.py --apply
	"""
	from __future__ import annotations

	import argparse
	import logging
	import re
	import sys
	from pathlib import Path

	sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

	from huggingface_hub import CommitOperationAdd, HfApi, hf_hub_download # noqa: E402

	from leaderboard import ( # noqa: E402
	HF_SUBMISSIONS_REPO,
	_load_rows_from_hub,
	_report_relative_url,
	)
	from submit import REPORTS_DIR # noqa: E402

	logger = logging.getLogger(__name__)

	# These three strings MUST stay byte-for-byte identical to what
	# single_run.py emits (_legend_html(_IFACE_LEGEND) / _legend_html(
	# _EDIT_DIFF_LEGEND) and the .legend/.legend-chip CSS) so a patched report is
	# indistinguishable from a freshly-generated one. A self-check in main()
	# compares against the live generator when it is importable.
	_IFACE_LEGEND_HTML = (
	'<span class="legend">'
	'<span class="legend-chip" style="background:#2e73db"></span>your part'
	'<span class="legend-chip" style="background:#e64d4d"></span>'
	"keep-out (must stay empty)"
	'<span class="legend-chip" style="background:#33b34d"></span>'
	"keep-in (must be filled)"
	'<span class="legend-chip" style="background:#ffd900"></span>disagreement'
	"</span>"
	)
	_EDIT_LEGEND_HTML = (
	'<span class="legend">'
	'<span class="legend-chip" style="background:#bdc4d1"></span>your output'
	'<span class="legend-chip" style="background:#2173f5"></span>'
	"extra material vs GT"
	'<span class="legend-chip" style="background:#e62929"></span>'
	"missing material vs GT"
	"</span>"
	)
	_CSS_BLOCK = (
	"\n.legend { color: #6b7785; font-size: 0.78em; font-weight: 400; "
	"text-transform: none; letter-spacing: normal; line-height: 1.6; }\n"
	".legend-chip { display: inline-block; width: 11px; height: 11px; "
	"border-radius: 3px; vertical-align: middle; "
	"margin: 0 5px 0 14px; border: 1px solid rgba(0,0,0,0.18); }\n"
	)

	# Old interface legend span (any per-fixture occurrence).
	_OLD_IFACE_RE = re.compile(
	r"<span class='iface-overlay-legend'>.*?</span>",
	re.DOTALL,
	)
	# Bare edit-diff heading (no legend yet).
	_BARE_EDIT_H3 = "<h3>Output vs ground truth (edit diff)</h3>"
	_NEW_EDIT_H3 = f"<h3>Output vs ground truth (edit diff) {_EDIT_LEGEND_HTML}</h3>"


	def patch_html(doc: str) -> str \| None:
	"""Return the patched HTML, or ``None`` when nothing needs changing.

	Swaps the old interface legend, adds the edit-diff legend, and injects the
	chip CSS. Idempotent: each sub-edit's anchor disappears once applied.
	"""
	new = _OLD_IFACE_RE.sub(lambda _m: _IFACE_LEGEND_HTML, doc)
	new = new.replace(_BARE_EDIT_H3, _NEW_EDIT_H3)
	changed = new != doc
	if changed and ".legend-chip" not in new and "</style>" in new:
	new = new.replace("</style>", _CSS_BLOCK + "</style>", 1)
	return new if changed else None


	def main() -> int:
	logging.basicConfig(level=logging.INFO, format="%(message)s")
	parser = argparse.ArgumentParser(description=__doc__)
	parser.add_argument(
	"--apply", action="store_true",
	help="Re-upload patched reports (default is a dry run).",
	)
	args = parser.parse_args()

	rows = _load_rows_from_hub()
	targets = [
	r for r in rows
	if r.get("submission_id")
	and _report_relative_url(
	r.get("submission_id"), r.get("status"), r.get("submission_sha256"),
	)
	]
	logger.info("Found %d report(s) to consider.", len(targets))

	ops: list[CommitOperationAdd] = []
	skipped = 0
	for row in targets:
	sid = row["submission_id"]
	try:
	local = hf_hub_download(
	repo_id=HF_SUBMISSIONS_REPO,
	repo_type="dataset",
	filename=f"{REPORTS_DIR}/{sid}.html",
	)
	except Exception as e: # noqa: BLE001
	logger.warning(" skip %s: could not fetch report (%s)", sid, e)
	skipped += 1
	continue
	doc = Path(local).read_text(encoding="utf-8")
	patched = patch_html(doc)
	if patched is None:
	logger.info(" unchanged %s", sid)
	skipped += 1
	continue
	logger.info(" patched %s", sid)
	ops.append(
	CommitOperationAdd(
	path_in_repo=f"{REPORTS_DIR}/{sid}.html",
	path_or_fileobj=patched.encode("utf-8"),
	)
	)

	logger.info("%d to patch, %d unchanged/skipped.", len(ops), skipped)
	if not ops:
	logger.info("Nothing to do.")
	return 0
	if not args.apply:
	logger.info("Dry run -- re-run with --apply to upload.")
	return 0

	HfApi().create_commit(
	repo_id=HF_SUBMISSIONS_REPO,
	repo_type="dataset",
	operations=ops,
	commit_message="reports: backfill corrected interface + edit-diff legends",
	)
	logger.info("Uploaded %d patched report(s).", len(ops))
	return 0


	if __name__ == "__main__":
	sys.exit(main())