Michael Rabinovich Cursor commited on
Commit ·
e611f15
1
Parent(s): 31854f7
leaderboard: show edit-diff turntable in gallery grid for editing fixtures
Browse filesThe gallery grid tile for an editing fixture now serves the ghost edit-diff
WebP (renders/<id>/<fixture>/edit_diff.webp) instead of the plain candidate
turntable, so a small edit is legible at a glance. Generation tiles, the GT
column, and the cell-click modal are unchanged (the modal keeps the plain
aligned output via img; the grid uses a new gridImg).
- gallery: per-cell gridImg + gridRenderFor hook; build/render take a
diff_resolver.
- app: _fetch_render_diff + /render-diff proxy route + resolver. A miss 404s
to the dashed cell (no fallback to rotating.webp).
Co-authored-by: Cursor <cursoragent@cursor.com>
- app.py +54 -1
- gallery.py +35 -12
app.py
CHANGED
|
@@ -555,6 +555,29 @@ def _fetch_render(submission_id: str, fixture: str) -> bytes | None:
|
|
| 555 |
return None
|
| 556 |
|
| 557 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 558 |
def _fetch_gt_render(fixture: str) -> bytes | None:
|
| 559 |
"""Pull a fixture's ground-truth GIF from the private GT dataset.
|
| 560 |
|
|
@@ -606,6 +629,17 @@ def _render_proxy_url(submission_id: str, fixture: str) -> str | None:
|
|
| 606 |
return f"/render/{submission_id}/{fixture}.webp"
|
| 607 |
|
| 608 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 609 |
def _gt_proxy_url(fixture: str) -> str | None:
|
| 610 |
"""Resolver returning the cached proxy URL for a fixture's GT WebP."""
|
| 611 |
return f"/gt-render/{fixture}.webp"
|
|
@@ -629,6 +663,18 @@ def serve_render(submission_id: str, fixture: str) -> Response:
|
|
| 629 |
)
|
| 630 |
|
| 631 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 632 |
def serve_gt_render(fixture: str) -> Response:
|
| 633 |
"""Stream a fixture's ground-truth render WebP with long-lived caching."""
|
| 634 |
webp = _fetch_gt_render(fixture)
|
|
@@ -656,7 +702,9 @@ def _gallery_iframe_html() -> str:
|
|
| 656 |
except LeaderboardDataError:
|
| 657 |
logger.exception("Gallery row load failed; rendering empty gallery")
|
| 658 |
rows = []
|
| 659 |
-
doc = render_gallery_page(
|
|
|
|
|
|
|
| 660 |
escaped = html.escape(doc, quote=True)
|
| 661 |
return (
|
| 662 |
f'<iframe srcdoc="{escaped}" '
|
|
@@ -1102,6 +1150,11 @@ app.add_api_route(
|
|
| 1102 |
serve_render,
|
| 1103 |
methods=["GET"],
|
| 1104 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1105 |
app.add_api_route(
|
| 1106 |
"/gt-render/{fixture}.webp",
|
| 1107 |
serve_gt_render,
|
|
|
|
| 555 |
return None
|
| 556 |
|
| 557 |
|
| 558 |
+
def _fetch_render_diff(submission_id: str, fixture: str) -> bytes | None:
|
| 559 |
+
"""Pull a submission's edit-diff turntable (``renders/<id>/<fixture>/edit_diff.webp``).
|
| 560 |
+
|
| 561 |
+
Editing fixtures only; the evaluator writes this alongside the plain
|
| 562 |
+
``rotating.webp``. Same no-memoization rationale as :func:`_fetch_render`.
|
| 563 |
+
Returns ``None`` on any failure, including the expected miss for
|
| 564 |
+
non-editing fixtures, so the gallery degrades to the dashed cell.
|
| 565 |
+
"""
|
| 566 |
+
try:
|
| 567 |
+
local_path = hf_hub_download(
|
| 568 |
+
repo_id=HF_SUBMISSIONS_REPO,
|
| 569 |
+
filename=f"renders/{submission_id}/{fixture}/edit_diff.webp",
|
| 570 |
+
repo_type="dataset",
|
| 571 |
+
)
|
| 572 |
+
return Path(local_path).read_bytes()
|
| 573 |
+
except Exception as e: # noqa: BLE001 - any Hub failure -> 404
|
| 574 |
+
logger.warning(
|
| 575 |
+
"Failed to fetch edit-diff render %s/%s (%s: %s)",
|
| 576 |
+
submission_id, fixture, type(e).__name__, e,
|
| 577 |
+
)
|
| 578 |
+
return None
|
| 579 |
+
|
| 580 |
+
|
| 581 |
def _fetch_gt_render(fixture: str) -> bytes | None:
|
| 582 |
"""Pull a fixture's ground-truth GIF from the private GT dataset.
|
| 583 |
|
|
|
|
| 629 |
return f"/render/{submission_id}/{fixture}.webp"
|
| 630 |
|
| 631 |
|
| 632 |
+
def _render_diff_proxy_url(submission_id: str, fixture: str) -> str | None:
|
| 633 |
+
"""Resolver returning the cached proxy URL for a submission's edit-diff WebP.
|
| 634 |
+
|
| 635 |
+
Used by the gallery grid for editing fixtures (see
|
| 636 |
+
``gallery.build_gallery_payload``). A miss (non-editing fixture, or an edit
|
| 637 |
+
that never rendered a diff) 404s and degrades to the dashed cell client-side
|
| 638 |
+
via the ``<img onerror>`` hook, no fallback to the plain turntable.
|
| 639 |
+
"""
|
| 640 |
+
return f"/render-diff/{submission_id}/{fixture}.webp"
|
| 641 |
+
|
| 642 |
+
|
| 643 |
def _gt_proxy_url(fixture: str) -> str | None:
|
| 644 |
"""Resolver returning the cached proxy URL for a fixture's GT WebP."""
|
| 645 |
return f"/gt-render/{fixture}.webp"
|
|
|
|
| 663 |
)
|
| 664 |
|
| 665 |
|
| 666 |
+
def serve_render_diff(submission_id: str, fixture: str) -> Response:
|
| 667 |
+
"""Stream a submission's edit-diff turntable WebP with long-lived caching."""
|
| 668 |
+
webp = _fetch_render_diff(submission_id, fixture)
|
| 669 |
+
if webp is None:
|
| 670 |
+
return Response(status_code=404)
|
| 671 |
+
return Response(
|
| 672 |
+
content=webp,
|
| 673 |
+
media_type="image/webp",
|
| 674 |
+
headers={"Cache-Control": RENDER_CACHE_CONTROL},
|
| 675 |
+
)
|
| 676 |
+
|
| 677 |
+
|
| 678 |
def serve_gt_render(fixture: str) -> Response:
|
| 679 |
"""Stream a fixture's ground-truth render WebP with long-lived caching."""
|
| 680 |
webp = _fetch_gt_render(fixture)
|
|
|
|
| 702 |
except LeaderboardDataError:
|
| 703 |
logger.exception("Gallery row load failed; rendering empty gallery")
|
| 704 |
rows = []
|
| 705 |
+
doc = render_gallery_page(
|
| 706 |
+
rows, _render_proxy_url, _gt_proxy_url, _render_diff_proxy_url,
|
| 707 |
+
)
|
| 708 |
escaped = html.escape(doc, quote=True)
|
| 709 |
return (
|
| 710 |
f'<iframe srcdoc="{escaped}" '
|
|
|
|
| 1150 |
serve_render,
|
| 1151 |
methods=["GET"],
|
| 1152 |
)
|
| 1153 |
+
app.add_api_route(
|
| 1154 |
+
"/render-diff/{submission_id}/{fixture}.webp",
|
| 1155 |
+
serve_render_diff,
|
| 1156 |
+
methods=["GET"],
|
| 1157 |
+
)
|
| 1158 |
app.add_api_route(
|
| 1159 |
"/gt-render/{fixture}.webp",
|
| 1160 |
serve_gt_render,
|
gallery.py
CHANGED
|
@@ -97,7 +97,7 @@ def _fixture_universe(rows: list[dict]) -> list[dict]:
|
|
| 97 |
]
|
| 98 |
|
| 99 |
|
| 100 |
-
def _sub_payload(row: dict, render_resolver) -> dict:
|
| 101 |
"""Project one verified row into the compact shape the page JS needs.
|
| 102 |
|
| 103 |
``render_resolver(submission_id, fixture_id)`` returns the cached
|
|
@@ -105,6 +105,13 @@ def _sub_payload(row: dict, render_resolver) -> dict:
|
|
| 105 |
fixtures carry ``img: null`` so the page draws the dashed cell;
|
| 106 |
note validity is driven by the per-fixture ``status`` in the data,
|
| 107 |
not by whether an image fetch happened to succeed.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
"""
|
| 109 |
by_task = row.get("score_by_task_type") or {}
|
| 110 |
pfs = row.get("per_fixture_scores") or {}
|
|
@@ -113,10 +120,16 @@ def _sub_payload(row: dict, render_resolver) -> dict:
|
|
| 113 |
for fid, fx in pfs.items():
|
| 114 |
fx = fx or {}
|
| 115 |
status = fx.get("status") or "missing"
|
|
|
|
|
|
|
| 116 |
cells[fid] = {
|
| 117 |
"status": status,
|
| 118 |
"cad": fx.get("cad_score"),
|
| 119 |
-
"img": render_resolver(sid, fid) if
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
}
|
| 121 |
return {
|
| 122 |
"id": sid,
|
|
@@ -133,13 +146,16 @@ def _sub_payload(row: dict, render_resolver) -> dict:
|
|
| 133 |
}
|
| 134 |
|
| 135 |
|
| 136 |
-
def build_gallery_payload(rows: list[dict], render_resolver, gt_resolver) -> dict:
|
| 137 |
"""Shape live rows into the JSON the gallery page renders from.
|
| 138 |
|
| 139 |
-
Image sources are injected via
|
| 140 |
agnostic to how the cached render URLs are constructed:
|
| 141 |
|
| 142 |
-
- ``render_resolver(submission_id, fixture_id) -> str | None``
|
|
|
|
|
|
|
|
|
|
| 143 |
- ``gt_resolver(fixture_id) -> str | None``
|
| 144 |
|
| 145 |
Returns ``{"fixtures", "subs", "selected", "gtImg"}`` where
|
|
@@ -152,24 +168,24 @@ def build_gallery_payload(rows: list[dict], render_resolver, gt_resolver) -> dic
|
|
| 152 |
gt_img = {f["id"]: gt_resolver(f["id"]) for f in fixtures}
|
| 153 |
return {
|
| 154 |
"fixtures": fixtures,
|
| 155 |
-
"subs": [_sub_payload(r, render_resolver) for r in verified],
|
| 156 |
"selected": selected,
|
| 157 |
"gtImg": gt_img,
|
| 158 |
}
|
| 159 |
|
| 160 |
|
| 161 |
-
def render_gallery_page(rows: list[dict], render_resolver, gt_resolver) -> str:
|
| 162 |
"""Build the full standalone gallery HTML document from live rows.
|
| 163 |
|
| 164 |
-
``render_resolver`` / ``gt_resolver``
|
| 165 |
-
URLs (see :func:`build_gallery_payload`); the
|
| 166 |
-
the on-screen turntables.
|
| 167 |
|
| 168 |
The document is self-contained and uses **system font stacks only**
|
| 169 |
(no external font CDN fetch) so it never errors inside a sandboxed
|
| 170 |
iframe.
|
| 171 |
"""
|
| 172 |
-
payload = build_gallery_payload(rows, render_resolver, gt_resolver)
|
| 173 |
data_json = json.dumps(payload, ensure_ascii=False)
|
| 174 |
return (
|
| 175 |
"<!DOCTYPE html><html lang='en'><head>"
|
|
@@ -456,6 +472,13 @@ function renderFor(sub, fxId) {
|
|
| 456 |
const c = sub.cells[fxId];
|
| 457 |
return c ? c.img : null;
|
| 458 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 459 |
function gtRenderFor(fxId) {
|
| 460 |
return (DATA.gtImg || {})[fxId] || null;
|
| 461 |
}
|
|
@@ -660,7 +683,7 @@ function buildGallery() {
|
|
| 660 |
+ '<div class="score-cell"><span class="agg">' + fmt(s.score, 3) + '</span>'
|
| 661 |
+ '<span class="validity ' + imperfect + '">' + pct(s.validity) + ' <span class="vlabel">valid</span></span></div>';
|
| 662 |
selected.forEach(id => {
|
| 663 |
-
cells += '<div class="thumb-cell">' + thumbHTML(
|
| 664 |
});
|
| 665 |
cells += '<div class="row-toggle" data-toggle="' + esc(s.id) + '">more numbers <span class="chev">▾</span></div>';
|
| 666 |
cells += '<div class="detail" id="detail-' + esc(s.id) + '">'
|
|
|
|
| 97 |
]
|
| 98 |
|
| 99 |
|
| 100 |
+
def _sub_payload(row: dict, render_resolver, diff_resolver) -> dict:
|
| 101 |
"""Project one verified row into the compact shape the page JS needs.
|
| 102 |
|
| 103 |
``render_resolver(submission_id, fixture_id)`` returns the cached
|
|
|
|
| 105 |
fixtures carry ``img: null`` so the page draws the dashed cell;
|
| 106 |
note validity is driven by the per-fixture ``status`` in the data,
|
| 107 |
not by whether an image fetch happened to succeed.
|
| 108 |
+
|
| 109 |
+
Each cell also carries ``gridImg``, the source the gallery grid tile
|
| 110 |
+
uses: for ``editing`` fixtures this is the ghost edit-diff turntable
|
| 111 |
+
(``diff_resolver``) so the grid shows what actually changed; for every
|
| 112 |
+
other task it is the same plain candidate turntable as ``img``. The
|
| 113 |
+
modal keeps using ``img`` (the plain aligned output), so swapping the
|
| 114 |
+
grid never changes the modal.
|
| 115 |
"""
|
| 116 |
by_task = row.get("score_by_task_type") or {}
|
| 117 |
pfs = row.get("per_fixture_scores") or {}
|
|
|
|
| 120 |
for fid, fx in pfs.items():
|
| 121 |
fx = fx or {}
|
| 122 |
status = fx.get("status") or "missing"
|
| 123 |
+
valid = status == "valid"
|
| 124 |
+
is_editing = (fx.get("task_type") or "") == "editing"
|
| 125 |
cells[fid] = {
|
| 126 |
"status": status,
|
| 127 |
"cad": fx.get("cad_score"),
|
| 128 |
+
"img": render_resolver(sid, fid) if valid else None,
|
| 129 |
+
"gridImg": (
|
| 130 |
+
(diff_resolver(sid, fid) if is_editing else render_resolver(sid, fid))
|
| 131 |
+
if valid else None
|
| 132 |
+
),
|
| 133 |
}
|
| 134 |
return {
|
| 135 |
"id": sid,
|
|
|
|
| 146 |
}
|
| 147 |
|
| 148 |
|
| 149 |
+
def build_gallery_payload(rows: list[dict], render_resolver, gt_resolver, diff_resolver) -> dict:
|
| 150 |
"""Shape live rows into the JSON the gallery page renders from.
|
| 151 |
|
| 152 |
+
Image sources are injected via resolvers so this module stays
|
| 153 |
agnostic to how the cached render URLs are constructed:
|
| 154 |
|
| 155 |
+
- ``render_resolver(submission_id, fixture_id) -> str | None`` (plain
|
| 156 |
+
candidate turntable; backs the modal and non-editing grid tiles)
|
| 157 |
+
- ``diff_resolver(submission_id, fixture_id) -> str | None`` (edit-diff
|
| 158 |
+
turntable; backs the grid tile for editing fixtures)
|
| 159 |
- ``gt_resolver(fixture_id) -> str | None``
|
| 160 |
|
| 161 |
Returns ``{"fixtures", "subs", "selected", "gtImg"}`` where
|
|
|
|
| 168 |
gt_img = {f["id"]: gt_resolver(f["id"]) for f in fixtures}
|
| 169 |
return {
|
| 170 |
"fixtures": fixtures,
|
| 171 |
+
"subs": [_sub_payload(r, render_resolver, diff_resolver) for r in verified],
|
| 172 |
"selected": selected,
|
| 173 |
"gtImg": gt_img,
|
| 174 |
}
|
| 175 |
|
| 176 |
|
| 177 |
+
def render_gallery_page(rows: list[dict], render_resolver, gt_resolver, diff_resolver) -> str:
|
| 178 |
"""Build the full standalone gallery HTML document from live rows.
|
| 179 |
|
| 180 |
+
``render_resolver`` / ``gt_resolver`` / ``diff_resolver`` supply the
|
| 181 |
+
cached render-proxy URLs (see :func:`build_gallery_payload`); the
|
| 182 |
+
browser lazy-loads only the on-screen turntables.
|
| 183 |
|
| 184 |
The document is self-contained and uses **system font stacks only**
|
| 185 |
(no external font CDN fetch) so it never errors inside a sandboxed
|
| 186 |
iframe.
|
| 187 |
"""
|
| 188 |
+
payload = build_gallery_payload(rows, render_resolver, gt_resolver, diff_resolver)
|
| 189 |
data_json = json.dumps(payload, ensure_ascii=False)
|
| 190 |
return (
|
| 191 |
"<!DOCTYPE html><html lang='en'><head>"
|
|
|
|
| 472 |
const c = sub.cells[fxId];
|
| 473 |
return c ? c.img : null;
|
| 474 |
}
|
| 475 |
+
// Grid tiles use gridImg (the edit-diff turntable for editing fixtures, the
|
| 476 |
+
// plain candidate turntable otherwise); the modal keeps renderFor (img), so
|
| 477 |
+
// the grid swap never changes the modal.
|
| 478 |
+
function gridRenderFor(sub, fxId) {
|
| 479 |
+
const c = sub.cells[fxId];
|
| 480 |
+
return c ? (c.gridImg || c.img) : null;
|
| 481 |
+
}
|
| 482 |
function gtRenderFor(fxId) {
|
| 483 |
return (DATA.gtImg || {})[fxId] || null;
|
| 484 |
}
|
|
|
|
| 683 |
+ '<div class="score-cell"><span class="agg">' + fmt(s.score, 3) + '</span>'
|
| 684 |
+ '<span class="validity ' + imperfect + '">' + pct(s.validity) + ' <span class="vlabel">valid</span></span></div>';
|
| 685 |
selected.forEach(id => {
|
| 686 |
+
cells += '<div class="thumb-cell">' + thumbHTML(gridRenderFor(s, id), 'data-sub="' + esc(s.id) + '" data-fix="' + esc(id) + '"', true) + '</div>';
|
| 687 |
});
|
| 688 |
cells += '<div class="row-toggle" data-toggle="' + esc(s.id) + '">more numbers <span class="chev">▾</span></div>';
|
| 689 |
cells += '<div class="detail" id="detail-' + esc(s.id) + '">'
|