File size: 9,605 Bytes
ba3eefb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
#!/usr/bin/env python3
"""Backfill the thumbnail-grid summary view into already-published reports.

The report generator (``cadgenbench.eval.report.single_run``) now renders the
summary view as a grouped thumbnail grid instead of a flat table. Reports
produced before that change are static HTML files in the submissions dataset
(``reports/<id>.html``); changing the generator does nothing to them. This
one-time tool rewrites those published reports **in place, without re-evaluating
or regenerating from run dirs**:

- it reads each report's existing summary table (sample number, status, CAD
  score) and detail cards (which fixtures are editing) plus the render-bucket
  base URL already embedded in the file;
- rebuilds the summary view as the grid using the *shared* builders from
  ``single_run`` (so a backfilled report is byte-identical to a freshly
  generated one), pointing editing cards at the ``edit_diff.png`` still and
  generation cards at the output ``iso.png`` — all assets that already exist;
- injects the shared grid CSS/JS; the detail cards, header, score text and
  download button are left untouched.

Run on local files (writes alongside, good for eyeballing)::

    python tools/backfill_report_grid.py --files /tmp/report.html -o /tmp/out.html

Rewrite every published report in the submissions dataset (needs a write token)::

    HF_TOKEN=<write-token> python tools/backfill_report_grid.py --dataset
    python tools/backfill_report_grid.py --dataset --dry-run    # list only
"""
from __future__ import annotations

import argparse
import os
import re
import sys
from pathlib import Path

from huggingface_hub import CommitOperationAdd, HfApi, hf_hub_download

# cadgenbench (for the shared grid builders) must be importable.
_REPO_ROOT = Path(__file__).resolve().parents[2]
_SRC = _REPO_ROOT / "cadgenbench" / "src"
if _SRC.is_dir():
    sys.path.insert(0, str(_SRC))

from cadgenbench.eval.report.single_run import (  # noqa: E402
    _GRID_CSS,
    _GRID_JS,
    _render_grid_controls,
    grid_card_html,
    render_grid_groups,
)

HF_ORG = os.getenv("HF_ORG", "HuggingAI4Engineering")
SUBMISSIONS_REPO = os.getenv("HF_SUBMISSIONS_REPO", f"{HF_ORG}/cadgenbench-submissions")
INPUT_PROXY_BASE = "/task-input"
EDIT_DIFF_STILL = "edit_diff.png"

# --- parsing the old flat-table report -------------------------------------
_RENDER_BASE_RE = re.compile(
    r'(https?://[^\s"\']+?/resolve/renders/[^/"\']+)/[^/"\']+/[^"\']+\.(?:png|webp)'
)
_ROW_RE = re.compile(
    r'<tr class="q-[a-z]+" onclick="showDetail\((\d+)\)"[^>]*>(.*?)</tr>', re.S
)
_NAME_RE = re.compile(r"<td>([^<]+)</td>")
_STATUS_RE = re.compile(r'status-pill status-\w+">([^<]+)<')
_CAD_RE = re.compile(r'<td data-v="([^"]+)"><b>')
_SUMMARY_VIEW_RE = re.compile(r'(<div id="summary-view">).*?(</div>)', re.S)
_GRID_HELP = (
    '<p class="grid-help">Click a card to view details. '
    '<span class="kbd">j</span>/<span class="kbd">k</span> to navigate, '
    '<span class="kbd">Esc</span> to return. Each card shows the input and the '
    "candidate output. Score tint: "
    "<span class='gtint q-high'>&ge;0.90</span> "
    "<span class='gtint q-mid'>&ge;0.60</span> "
    "<span class='gtint q-low'>&lt;0.60</span> CAD score.</p>"
)


_INPUT_SHAPE_RE = re.compile(re.escape(INPUT_PROXY_BASE) + r"/[^\"']+/renders/")


def _editing_idxs(doc: str) -> set[int]:
    """Indices whose detail card is an editing task (has a STEP input).

    Detected by the Input column showing the *starting shape's* renders
    (``/task-input/<fixture>/renders/...``), which the report emits for every
    editing sample because it derives from the ``input.step`` input. This is
    deliberately not keyed on the edit-diff turntable / ``(edit diff)`` heading:
    the old generator rendered an *invalid* editing candidate with the
    generation layout (no diff), so those markers miss invalid edits, whereas
    the starting-shape renders are always present. Matches the new generator's
    ``wants_shape`` grouping so a backfilled report and a freshly generated one
    classify identically.
    """
    out: set[int] = set()
    for block in doc.split('<div class="fixture-card"')[1:]:
        m = re.match(r'\s*data-idx="(\d+)"', block)
        if m and _INPUT_SHAPE_RE.search(block):
            out.add(int(m.group(1)))
    return out


def rewrite_report_html(doc: str) -> str | None:
    """Return the report rewritten with the grid summary view, or ``None``.

    ``None`` means "leave unchanged": the report is already a grid, or it
    isn't a hosted report we can rebuild (no render-bucket URL to point the
    output thumbnails at)."""
    if 'class="ggrid"' in doc or 'id="groups"' in doc:
        return None  # already backfilled
    base_m = _RENDER_BASE_RE.search(doc)
    if not base_m:
        return None  # not a hosted report (e.g. base64-inlined local report)
    render_base = base_m.group(1)
    edit_idxs = _editing_idxs(doc)

    gen_cards: list[str] = []
    edit_cards: list[str] = []
    for m in _ROW_RE.finditer(doc):
        idx = int(m.group(1))
        cells = m.group(2)
        name_m = _NAME_RE.search(cells)
        if not name_m:
            continue
        name = name_m.group(1).strip()
        status_m = _STATUS_RE.search(cells)
        status = status_m.group(1).strip() if status_m else "?"
        cad_m = _CAD_RE.search(cells)
        cad: float | None = None
        if cad_m:
            try:
                v = float(cad_m.group(1))
                cad = v if v >= 0 else None
            except ValueError:
                cad = None
        is_editing = idx in edit_idxs
        if is_editing:
            in_src = f"{INPUT_PROXY_BASE}/{name}/renders/iso.png"
            out_src = f"{render_base}/{name}/{EDIT_DIFF_STILL}"
        else:
            in_src = f"{INPUT_PROXY_BASE}/{name}/input.png"
            out_src = f"{render_base}/{name}/iso.png"
        card = grid_card_html(
            idx=idx, name=name, is_editing=is_editing, status=status,
            cad=cad, in_src=in_src, out_src=out_src,
        )
        (edit_cards if is_editing else gen_cards).append(card)

    if not gen_cards and not edit_cards:
        return None

    new_inner = _GRID_HELP + _render_grid_controls() + render_grid_groups(
        gen_cards, edit_cards,
    )
    if not _SUMMARY_VIEW_RE.search(doc):
        return None
    doc = _SUMMARY_VIEW_RE.sub(
        lambda mm: mm.group(1) + new_inner + "</div>", doc, count=1,
    )
    # Inject the shared grid styles + filtering behavior.
    doc = doc.replace("</style>", _GRID_CSS + "</style>", 1)
    doc = doc.replace("</body>", f"<script>{_GRID_JS}</script></body>", 1)
    return doc


def _run_files(files: list[Path], out: Path | None) -> int:
    for f in files:
        doc = f.read_text()
        new = rewrite_report_html(doc)
        if new is None:
            print(f"  SKIP {f} (already grid / not a hosted report)")
            continue
        dest = out or f
        dest.write_text(new)
        print(f"  wrote {dest} ({len(new) // 1024} KB)")
    return 0


def _run_dataset(api: HfApi, token: str | None, dry_run: bool, limit: int | None) -> int:
    files = [
        f for f in api.list_repo_files(SUBMISSIONS_REPO, repo_type="dataset")
        if f.startswith("reports/") and f.endswith(".html")
    ]
    files.sort()
    if limit is not None:
        files = files[:limit]
    print(f"Found {len(files)} report(s) in {SUBMISSIONS_REPO}.")
    ops: list[CommitOperationAdd] = []
    for i, rel in enumerate(files, start=1):
        local = hf_hub_download(
            repo_id=SUBMISSIONS_REPO, filename=rel, repo_type="dataset", token=token,
        )
        new = rewrite_report_html(Path(local).read_text())
        if new is None:
            print(f"  [{i}/{len(files)}] SKIP {rel} (already grid / not hosted)")
            continue
        print(f"  [{i}/{len(files)}] {rel} -> grid ({len(new) // 1024} KB)")
        if not dry_run:
            ops.append(CommitOperationAdd(path_in_repo=rel, path_or_fileobj=new.encode()))
    if dry_run:
        print(f"Dry run: would rewrite {len([f for f in files])} candidate(s).")
        return 0
    if not ops:
        print("Nothing to rewrite.")
        return 0
    if not token:
        print("HF_TOKEN required to commit.", file=sys.stderr)
        return 2
    api.create_commit(
        repo_id=SUBMISSIONS_REPO, repo_type="dataset", operations=ops,
        commit_message="reports: backfill thumbnail-grid summary view",
    )
    print(f"Committed {len(ops)} rewritten report(s) to {SUBMISSIONS_REPO}.")
    return 0


def main() -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    src = parser.add_mutually_exclusive_group(required=True)
    src.add_argument("--files", nargs="+", type=Path, help="Local report HTML files.")
    src.add_argument(
        "--dataset", action="store_true",
        help="Rewrite every reports/*.html in the submissions dataset.",
    )
    parser.add_argument("-o", "--output", type=Path, help="Output path (single --files).")
    parser.add_argument("--dry-run", action="store_true", help="List only (dataset mode).")
    parser.add_argument("--limit", type=int, default=None)
    args = parser.parse_args()

    if args.files:
        if args.output and len(args.files) != 1:
            parser.error("-o/--output only valid with a single --files argument.")
        return _run_files(args.files, args.output)

    token = os.environ.get("HF_TOKEN")
    return _run_dataset(HfApi(token=token), token, args.dry_run, args.limit)


if __name__ == "__main__":
    raise SystemExit(main())