Michael Rabinovich Cursor commited on
Commit
e611f15
·
1 Parent(s): 31854f7

leaderboard: show edit-diff turntable in gallery grid for editing fixtures

Browse files

The gallery grid tile for an editing fixture now serves the ghost edit-diff
WebP (renders/<id>/<fixture>/edit_diff.webp) instead of the plain candidate
turntable, so a small edit is legible at a glance. Generation tiles, the GT
column, and the cell-click modal are unchanged (the modal keeps the plain
aligned output via img; the grid uses a new gridImg).

- gallery: per-cell gridImg + gridRenderFor hook; build/render take a
diff_resolver.
- app: _fetch_render_diff + /render-diff proxy route + resolver. A miss 404s
to the dashed cell (no fallback to rotating.webp).

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (2) hide show
  1. app.py +54 -1
  2. gallery.py +35 -12
app.py CHANGED
@@ -555,6 +555,29 @@ def _fetch_render(submission_id: str, fixture: str) -> bytes | None:
555
  return None
556
 
557
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
558
  def _fetch_gt_render(fixture: str) -> bytes | None:
559
  """Pull a fixture's ground-truth GIF from the private GT dataset.
560
 
@@ -606,6 +629,17 @@ def _render_proxy_url(submission_id: str, fixture: str) -> str | None:
606
  return f"/render/{submission_id}/{fixture}.webp"
607
 
608
 
 
 
 
 
 
 
 
 
 
 
 
609
  def _gt_proxy_url(fixture: str) -> str | None:
610
  """Resolver returning the cached proxy URL for a fixture's GT WebP."""
611
  return f"/gt-render/{fixture}.webp"
@@ -629,6 +663,18 @@ def serve_render(submission_id: str, fixture: str) -> Response:
629
  )
630
 
631
 
 
 
 
 
 
 
 
 
 
 
 
 
632
  def serve_gt_render(fixture: str) -> Response:
633
  """Stream a fixture's ground-truth render WebP with long-lived caching."""
634
  webp = _fetch_gt_render(fixture)
@@ -656,7 +702,9 @@ def _gallery_iframe_html() -> str:
656
  except LeaderboardDataError:
657
  logger.exception("Gallery row load failed; rendering empty gallery")
658
  rows = []
659
- doc = render_gallery_page(rows, _render_proxy_url, _gt_proxy_url)
 
 
660
  escaped = html.escape(doc, quote=True)
661
  return (
662
  f'<iframe srcdoc="{escaped}" '
@@ -1102,6 +1150,11 @@ app.add_api_route(
1102
  serve_render,
1103
  methods=["GET"],
1104
  )
 
 
 
 
 
1105
  app.add_api_route(
1106
  "/gt-render/{fixture}.webp",
1107
  serve_gt_render,
 
555
  return None
556
 
557
 
558
+ def _fetch_render_diff(submission_id: str, fixture: str) -> bytes | None:
559
+ """Pull a submission's edit-diff turntable (``renders/<id>/<fixture>/edit_diff.webp``).
560
+
561
+ Editing fixtures only; the evaluator writes this alongside the plain
562
+ ``rotating.webp``. Same no-memoization rationale as :func:`_fetch_render`.
563
+ Returns ``None`` on any failure, including the expected miss for
564
+ non-editing fixtures, so the gallery degrades to the dashed cell.
565
+ """
566
+ try:
567
+ local_path = hf_hub_download(
568
+ repo_id=HF_SUBMISSIONS_REPO,
569
+ filename=f"renders/{submission_id}/{fixture}/edit_diff.webp",
570
+ repo_type="dataset",
571
+ )
572
+ return Path(local_path).read_bytes()
573
+ except Exception as e: # noqa: BLE001 - any Hub failure -> 404
574
+ logger.warning(
575
+ "Failed to fetch edit-diff render %s/%s (%s: %s)",
576
+ submission_id, fixture, type(e).__name__, e,
577
+ )
578
+ return None
579
+
580
+
581
  def _fetch_gt_render(fixture: str) -> bytes | None:
582
  """Pull a fixture's ground-truth GIF from the private GT dataset.
583
 
 
629
  return f"/render/{submission_id}/{fixture}.webp"
630
 
631
 
632
+ def _render_diff_proxy_url(submission_id: str, fixture: str) -> str | None:
633
+ """Resolver returning the cached proxy URL for a submission's edit-diff WebP.
634
+
635
+ Used by the gallery grid for editing fixtures (see
636
+ ``gallery.build_gallery_payload``). A miss (non-editing fixture, or an edit
637
+ that never rendered a diff) 404s and degrades to the dashed cell client-side
638
+ via the ``<img onerror>`` hook, no fallback to the plain turntable.
639
+ """
640
+ return f"/render-diff/{submission_id}/{fixture}.webp"
641
+
642
+
643
  def _gt_proxy_url(fixture: str) -> str | None:
644
  """Resolver returning the cached proxy URL for a fixture's GT WebP."""
645
  return f"/gt-render/{fixture}.webp"
 
663
  )
664
 
665
 
666
+ def serve_render_diff(submission_id: str, fixture: str) -> Response:
667
+ """Stream a submission's edit-diff turntable WebP with long-lived caching."""
668
+ webp = _fetch_render_diff(submission_id, fixture)
669
+ if webp is None:
670
+ return Response(status_code=404)
671
+ return Response(
672
+ content=webp,
673
+ media_type="image/webp",
674
+ headers={"Cache-Control": RENDER_CACHE_CONTROL},
675
+ )
676
+
677
+
678
  def serve_gt_render(fixture: str) -> Response:
679
  """Stream a fixture's ground-truth render WebP with long-lived caching."""
680
  webp = _fetch_gt_render(fixture)
 
702
  except LeaderboardDataError:
703
  logger.exception("Gallery row load failed; rendering empty gallery")
704
  rows = []
705
+ doc = render_gallery_page(
706
+ rows, _render_proxy_url, _gt_proxy_url, _render_diff_proxy_url,
707
+ )
708
  escaped = html.escape(doc, quote=True)
709
  return (
710
  f'<iframe srcdoc="{escaped}" '
 
1150
  serve_render,
1151
  methods=["GET"],
1152
  )
1153
+ app.add_api_route(
1154
+ "/render-diff/{submission_id}/{fixture}.webp",
1155
+ serve_render_diff,
1156
+ methods=["GET"],
1157
+ )
1158
  app.add_api_route(
1159
  "/gt-render/{fixture}.webp",
1160
  serve_gt_render,
gallery.py CHANGED
@@ -97,7 +97,7 @@ def _fixture_universe(rows: list[dict]) -> list[dict]:
97
  ]
98
 
99
 
100
- def _sub_payload(row: dict, render_resolver) -> dict:
101
  """Project one verified row into the compact shape the page JS needs.
102
 
103
  ``render_resolver(submission_id, fixture_id)`` returns the cached
@@ -105,6 +105,13 @@ def _sub_payload(row: dict, render_resolver) -> dict:
105
  fixtures carry ``img: null`` so the page draws the dashed cell;
106
  note validity is driven by the per-fixture ``status`` in the data,
107
  not by whether an image fetch happened to succeed.
 
 
 
 
 
 
 
108
  """
109
  by_task = row.get("score_by_task_type") or {}
110
  pfs = row.get("per_fixture_scores") or {}
@@ -113,10 +120,16 @@ def _sub_payload(row: dict, render_resolver) -> dict:
113
  for fid, fx in pfs.items():
114
  fx = fx or {}
115
  status = fx.get("status") or "missing"
 
 
116
  cells[fid] = {
117
  "status": status,
118
  "cad": fx.get("cad_score"),
119
- "img": render_resolver(sid, fid) if status == "valid" else None,
 
 
 
 
120
  }
121
  return {
122
  "id": sid,
@@ -133,13 +146,16 @@ def _sub_payload(row: dict, render_resolver) -> dict:
133
  }
134
 
135
 
136
- def build_gallery_payload(rows: list[dict], render_resolver, gt_resolver) -> dict:
137
  """Shape live rows into the JSON the gallery page renders from.
138
 
139
- Image sources are injected via two resolvers so this module stays
140
  agnostic to how the cached render URLs are constructed:
141
 
142
- - ``render_resolver(submission_id, fixture_id) -> str | None``
 
 
 
143
  - ``gt_resolver(fixture_id) -> str | None``
144
 
145
  Returns ``{"fixtures", "subs", "selected", "gtImg"}`` where
@@ -152,24 +168,24 @@ def build_gallery_payload(rows: list[dict], render_resolver, gt_resolver) -> dic
152
  gt_img = {f["id"]: gt_resolver(f["id"]) for f in fixtures}
153
  return {
154
  "fixtures": fixtures,
155
- "subs": [_sub_payload(r, render_resolver) for r in verified],
156
  "selected": selected,
157
  "gtImg": gt_img,
158
  }
159
 
160
 
161
- def render_gallery_page(rows: list[dict], render_resolver, gt_resolver) -> str:
162
  """Build the full standalone gallery HTML document from live rows.
163
 
164
- ``render_resolver`` / ``gt_resolver`` supply the cached render-proxy
165
- URLs (see :func:`build_gallery_payload`); the browser lazy-loads only
166
- the on-screen turntables.
167
 
168
  The document is self-contained and uses **system font stacks only**
169
  (no external font CDN fetch) so it never errors inside a sandboxed
170
  iframe.
171
  """
172
- payload = build_gallery_payload(rows, render_resolver, gt_resolver)
173
  data_json = json.dumps(payload, ensure_ascii=False)
174
  return (
175
  "<!DOCTYPE html><html lang='en'><head>"
@@ -456,6 +472,13 @@ function renderFor(sub, fxId) {
456
  const c = sub.cells[fxId];
457
  return c ? c.img : null;
458
  }
 
 
 
 
 
 
 
459
  function gtRenderFor(fxId) {
460
  return (DATA.gtImg || {})[fxId] || null;
461
  }
@@ -660,7 +683,7 @@ function buildGallery() {
660
  + '<div class="score-cell"><span class="agg">' + fmt(s.score, 3) + '</span>'
661
  + '<span class="validity ' + imperfect + '">' + pct(s.validity) + ' <span class="vlabel">valid</span></span></div>';
662
  selected.forEach(id => {
663
- cells += '<div class="thumb-cell">' + thumbHTML(renderFor(s, id), 'data-sub="' + esc(s.id) + '" data-fix="' + esc(id) + '"', true) + '</div>';
664
  });
665
  cells += '<div class="row-toggle" data-toggle="' + esc(s.id) + '">more numbers <span class="chev">&#9662;</span></div>';
666
  cells += '<div class="detail" id="detail-' + esc(s.id) + '">'
 
97
  ]
98
 
99
 
100
+ def _sub_payload(row: dict, render_resolver, diff_resolver) -> dict:
101
  """Project one verified row into the compact shape the page JS needs.
102
 
103
  ``render_resolver(submission_id, fixture_id)`` returns the cached
 
105
  fixtures carry ``img: null`` so the page draws the dashed cell;
106
  note validity is driven by the per-fixture ``status`` in the data,
107
  not by whether an image fetch happened to succeed.
108
+
109
+ Each cell also carries ``gridImg``, the source the gallery grid tile
110
+ uses: for ``editing`` fixtures this is the ghost edit-diff turntable
111
+ (``diff_resolver``) so the grid shows what actually changed; for every
112
+ other task it is the same plain candidate turntable as ``img``. The
113
+ modal keeps using ``img`` (the plain aligned output), so swapping the
114
+ grid never changes the modal.
115
  """
116
  by_task = row.get("score_by_task_type") or {}
117
  pfs = row.get("per_fixture_scores") or {}
 
120
  for fid, fx in pfs.items():
121
  fx = fx or {}
122
  status = fx.get("status") or "missing"
123
+ valid = status == "valid"
124
+ is_editing = (fx.get("task_type") or "") == "editing"
125
  cells[fid] = {
126
  "status": status,
127
  "cad": fx.get("cad_score"),
128
+ "img": render_resolver(sid, fid) if valid else None,
129
+ "gridImg": (
130
+ (diff_resolver(sid, fid) if is_editing else render_resolver(sid, fid))
131
+ if valid else None
132
+ ),
133
  }
134
  return {
135
  "id": sid,
 
146
  }
147
 
148
 
149
+ def build_gallery_payload(rows: list[dict], render_resolver, gt_resolver, diff_resolver) -> dict:
150
  """Shape live rows into the JSON the gallery page renders from.
151
 
152
+ Image sources are injected via resolvers so this module stays
153
  agnostic to how the cached render URLs are constructed:
154
 
155
+ - ``render_resolver(submission_id, fixture_id) -> str | None`` (plain
156
+ candidate turntable; backs the modal and non-editing grid tiles)
157
+ - ``diff_resolver(submission_id, fixture_id) -> str | None`` (edit-diff
158
+ turntable; backs the grid tile for editing fixtures)
159
  - ``gt_resolver(fixture_id) -> str | None``
160
 
161
  Returns ``{"fixtures", "subs", "selected", "gtImg"}`` where
 
168
  gt_img = {f["id"]: gt_resolver(f["id"]) for f in fixtures}
169
  return {
170
  "fixtures": fixtures,
171
+ "subs": [_sub_payload(r, render_resolver, diff_resolver) for r in verified],
172
  "selected": selected,
173
  "gtImg": gt_img,
174
  }
175
 
176
 
177
+ def render_gallery_page(rows: list[dict], render_resolver, gt_resolver, diff_resolver) -> str:
178
  """Build the full standalone gallery HTML document from live rows.
179
 
180
+ ``render_resolver`` / ``gt_resolver`` / ``diff_resolver`` supply the
181
+ cached render-proxy URLs (see :func:`build_gallery_payload`); the
182
+ browser lazy-loads only the on-screen turntables.
183
 
184
  The document is self-contained and uses **system font stacks only**
185
  (no external font CDN fetch) so it never errors inside a sandboxed
186
  iframe.
187
  """
188
+ payload = build_gallery_payload(rows, render_resolver, gt_resolver, diff_resolver)
189
  data_json = json.dumps(payload, ensure_ascii=False)
190
  return (
191
  "<!DOCTYPE html><html lang='en'><head>"
 
472
  const c = sub.cells[fxId];
473
  return c ? c.img : null;
474
  }
475
+ // Grid tiles use gridImg (the edit-diff turntable for editing fixtures, the
476
+ // plain candidate turntable otherwise); the modal keeps renderFor (img), so
477
+ // the grid swap never changes the modal.
478
+ function gridRenderFor(sub, fxId) {
479
+ const c = sub.cells[fxId];
480
+ return c ? (c.gridImg || c.img) : null;
481
+ }
482
  function gtRenderFor(fxId) {
483
  return (DATA.gtImg || {})[fxId] || null;
484
  }
 
683
  + '<div class="score-cell"><span class="agg">' + fmt(s.score, 3) + '</span>'
684
  + '<span class="validity ' + imperfect + '">' + pct(s.validity) + ' <span class="vlabel">valid</span></span></div>';
685
  selected.forEach(id => {
686
+ cells += '<div class="thumb-cell">' + thumbHTML(gridRenderFor(s, id), 'data-sub="' + esc(s.id) + '" data-fix="' + esc(id) + '"', true) + '</div>';
687
  });
688
  cells += '<div class="row-toggle" data-toggle="' + esc(s.id) + '">more numbers <span class="chev">&#9662;</span></div>';
689
  cells += '<div class="detail" id="detail-' + esc(s.id) + '">'