File size: 37,826 Bytes
01d67e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08eae45
 
 
 
 
 
 
 
 
 
 
 
 
01d67e9
c1cb5e4
4a9408a
 
c1cb5e4
01d67e9
08eae45
 
 
 
4a9408a
c1cb5e4
4a9408a
01d67e9
 
 
 
 
461547b
 
01d67e9
 
 
 
 
08eae45
 
 
 
 
 
 
 
 
 
 
 
 
85bb21f
08eae45
6cdb387
 
08eae45
01d67e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08eae45
01d67e9
 
08eae45
 
 
 
4a9408a
 
 
 
 
e611f15
 
 
 
 
 
 
01d67e9
 
be6fa3d
01d67e9
 
08eae45
 
01d67e9
e611f15
 
01d67e9
 
 
e611f15
 
 
 
 
01d67e9
 
 
 
461547b
 
 
01d67e9
 
 
 
 
 
 
 
 
 
 
 
49e27be
 
 
01d67e9
 
08eae45
 
 
 
01d67e9
e611f15
 
 
 
49e27be
 
 
 
 
01d67e9
08eae45
 
49e27be
 
01d67e9
 
08eae45
 
 
 
 
49e27be
 
 
 
 
 
01d67e9
 
08eae45
 
 
 
01d67e9
 
 
 
49e27be
 
 
01d67e9
 
49e27be
 
 
 
4a9408a
 
 
 
01d67e9
49e27be
 
 
01d67e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a9408a
 
01d67e9
 
 
 
 
 
 
 
 
 
 
4a9408a
01d67e9
 
 
 
4a9408a
882faa0
01d67e9
 
 
 
08eae45
01d67e9
 
 
 
4a9408a
01d67e9
 
 
 
08eae45
 
01d67e9
a435f45
 
 
 
 
 
 
 
 
 
 
0eccd77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a435f45
01d67e9
 
08eae45
01d67e9
 
 
 
 
 
 
 
 
08eae45
 
 
 
 
 
 
 
 
 
01d67e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a9408a
01d67e9
 
 
 
 
 
461547b
 
4a9408a
01d67e9
 
 
4a9408a
01d67e9
 
 
 
 
 
 
 
 
 
 
4a9408a
c1cb5e4
01d67e9
 
 
 
 
 
 
 
 
 
4a9408a
01d67e9
461547b
 
 
 
 
 
 
 
 
 
 
 
01d67e9
 
 
 
 
 
 
 
4a9408a
01d67e9
 
 
 
 
 
4a9408a
01d67e9
4a9408a
0c0ddd8
 
 
 
 
01d67e9
 
2ec97dd
0c0ddd8
 
 
 
 
 
 
2ec97dd
0c0ddd8
a636039
0c0ddd8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ec97dd
01d67e9
 
 
 
 
 
 
 
 
 
 
 
 
0eccd77
 
 
 
 
01d67e9
 
 
 
 
 
 
49e27be
01d67e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08eae45
 
 
4a9408a
01d67e9
 
4a9408a
 
 
01d67e9
 
 
 
e611f15
 
 
 
 
 
 
01d67e9
 
 
 
 
 
 
 
 
4a9408a
 
 
01d67e9
 
2ec97dd
08eae45
 
 
 
 
 
01d67e9
 
 
 
4a9408a
 
 
 
 
08eae45
4a9408a
 
01d67e9
 
 
 
 
4a9408a
08eae45
4a9408a
01d67e9
 
 
 
08eae45
01d67e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08eae45
 
01d67e9
 
 
 
 
 
 
 
 
461547b
 
 
01d67e9
461547b
 
 
 
 
01d67e9
461547b
 
 
 
01d67e9
08eae45
 
01d67e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a636039
01d67e9
08eae45
0c0ddd8
08eae45
 
 
 
01d67e9
 
0c0ddd8
 
 
 
 
01d67e9
 
 
49e27be
 
 
 
0c0ddd8
 
01d67e9
 
 
0c0ddd8
01d67e9
 
 
 
 
0c0ddd8
 
49e27be
0c0ddd8
 
 
 
 
01d67e9
0c0ddd8
 
49e27be
 
 
 
0c0ddd8
 
 
a0462d9
01d67e9
 
 
 
 
 
 
 
 
 
 
 
 
 
0eccd77
 
 
 
 
 
 
 
 
a435f45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ec97dd
a435f45
 
 
0c0ddd8
 
 
 
 
 
4090e8a
 
0c0ddd8
 
a435f45
0eccd77
a435f45
 
 
 
 
 
 
08eae45
 
 
a435f45
08eae45
 
 
01d67e9
a435f45
08eae45
0eccd77
 
 
 
 
 
5dd0c03
08eae45
5dd0c03
01d67e9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
# Copyright 2026 Hugging Face
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Visual gallery leaderboard page.

Builds a self-contained HTML document (its own CSS + JS) from the live
submission rows. The Space serves it at ``/gallery`` and embeds it in
the Gradio "Gallery" tab via an iframe, so the bespoke visual surface
(sticky ground-truth row, turntable grid, report modal) lives in plain
HTML/JS isolated from Gradio's styles rather than being forced into
Gradio components.

The page is intentionally simple: it shows a **fixed** set of four
sample columns (see :data:`FIXED_FIXTURES`) for the top-10 verified
rows. There is no fixture picker -- the columns are the same on every
visit so the page reads like a printed comparison sheet rather than an
interactive matrix. :func:`build_gallery_payload` shapes the rows +
the fixed fixtures into a small JSON blob the page's JS renders. Render
lookups are isolated behind the ``renderFor`` / ``gtRenderFor`` JS
hooks (mirroring the design brief), pointed at the cached render-proxy
URLs the caller injects via the two resolvers:

- ``renderFor(sub, fixtureId)``  -> ``/render/<id>/<fixture>.webp`` (or
  ``null`` when the per-fixture status is invalid/missing, which draws
  the dashed "invalid generation" cell).
- ``gtRenderFor(fixtureId)``     -> ``/gt-render/<fixture>.webp``.

GIFs are lazy-loaded by the browser, so only the on-screen tiles are
fetched and CDN/browser caching makes repeat visits essentially free.
This requires the Space to be **public** (HF's edge 404s in-browser
fetches to our custom routes while private).

Turntable clicks open a GT-vs-output compare modal that points at the
existing per-submission detail/report view.
"""
from __future__ import annotations

import json

from leaderboard import _report_relative_url

# Gallery shows the top-N verified submissions only (the visual shop
# window). The numeric long tail lives on the Full results / Leaderboard
# tab, not here.
GALLERY_TOP_N = 10

# Fixed gallery columns: two generation + two editing samples, one
# "Medium" and one "Hard" per task. Difficulty is deliberately **not**
# dynamic. It was chosen once from the Claude Opus 4.8 baseline
# (submission ``huggingface_claude-opus-4-8-hf-baseline-with-build12...``)
# by taking, within each task type's *valid* per-fixture CAD scores
# sorted ascending, the 50th-percentile fixture as "Medium" and the
# 20th-percentile fixture (i.e. 80% of fixtures score higher, so it is
# harder) as "Hard". See ``tools/pick_gallery_fixtures.py`` to recompute.
#
# The GUI does NOT refresh this selection as the leaderboard evolves:
# what counts as medium/hard may drift with new models, but a stable,
# simple comparison sheet is worth more here than a moving ground truth.
FIXED_FIXTURES = [
    {"id": "101", "task": "generation", "difficulty": "Medium"},
    {"id": "111", "task": "generation", "difficulty": "Hard"},
    {"id": "229", "task": "editing", "difficulty": "Medium"},
    {"id": "207", "task": "editing", "difficulty": "Hard"},
]


def _verified_rows(rows: list[dict]) -> list[dict]:
    """Completed + validated rows, score-sorted desc, capped at the top N.

    Mirrors the leaderboard's notion of "verified": ``validation_status
    == 'validated'`` and a terminal ``status == 'completed'`` with a
    real aggregate score. Pending / failed / unvalidated rows never
    reach the visual gallery.
    """
    verified = [
        r
        for r in rows
        if r.get("validation_status") == "validated"
        and r.get("status") == "completed"
        and isinstance(r.get("aggregate_score"), (int, float))
    ]
    verified.sort(key=lambda r: r.get("aggregate_score") or 0.0, reverse=True)
    return verified[:GALLERY_TOP_N]


def _sub_payload(row: dict, fixture_ids: list[str], render_resolver, diff_resolver) -> dict:
    """Project one verified row into the compact shape the page JS needs.

    Only the fixed gallery columns (``fixture_ids``) are projected; a
    fixture the row never scored shows up as a ``missing`` cell (dashed
    tile) rather than being dropped.

    ``render_resolver(submission_id, fixture_id)`` returns the cached
    proxy URL for a *valid* fixture, or ``None``. Invalid/missing
    fixtures carry ``img: null`` so the page draws the dashed cell;
    note validity is driven by the per-fixture ``status`` in the data,
    not by whether an image fetch happened to succeed.

    Each cell also carries ``gridImg``, the source the gallery grid tile
    uses: for ``editing`` fixtures this is the ghost edit-diff turntable
    (``diff_resolver``) so the grid shows what actually changed; for every
    other task it is the same plain candidate turntable as ``img``. The
    modal keeps using ``img`` (the plain aligned output), so swapping the
    grid never changes the modal.
    """
    by_task = row.get("score_by_task_type") or {}
    pfs = row.get("per_sample_scores") or {}
    sid = row.get("submission_id") or ""
    cells: dict[str, dict] = {}
    for fid in fixture_ids:
        fx = pfs.get(fid) or {}
        status = fx.get("status") or "missing"
        valid = status == "valid"
        is_editing = (fx.get("task_type") or "") == "editing"
        cells[fid] = {
            "status": status,
            "cad": fx.get("cad_score"),
            "img": render_resolver(sid, fid) if valid else None,
            "gridImg": (
                (diff_resolver(sid, fid) if is_editing else render_resolver(sid, fid))
                if valid else None
            ),
        }
    return {
        "id": sid,
        "name": row.get("submission_name") or "(unnamed submission)",
        "reportUrl": _report_relative_url(
            sid, row.get("status"), row.get("submission_sha256"),
        ),
        "who": row.get("submitter_name") or "",
        "score": row.get("aggregate_score"),
        "validity": row.get("validity_rate"),
        "gen": by_task.get("generation"),
        "edit": by_task.get("editing"),
        "date": row.get("submitted_at") or "",
        "version": row.get("cadgenbench_version") or "",
        "blobUrl": row.get("submission_blob_url") or "",
        "cells": cells,
    }


def build_gallery_payload(
    rows: list[dict], render_resolver, gt_resolver, diff_resolver, gt_diff_resolver,
) -> dict:
    """Shape live rows into the JSON the gallery page renders from.

    The fixture columns are the fixed :data:`FIXED_FIXTURES` set (no
    picker), so the page is the same every visit. Image sources are
    injected via resolvers so this module stays agnostic to how the
    cached render URLs are constructed:

    - ``render_resolver(submission_id, fixture_id) -> str | None`` (plain
      candidate turntable; backs the modal and non-editing grid tiles)
    - ``diff_resolver(submission_id, fixture_id) -> str | None`` (edit-diff
      turntable; backs the grid tile for editing fixtures)
    - ``gt_resolver(fixture_id) -> str | None`` (plain GT turntable)
    - ``gt_diff_resolver(fixture_id) -> str | None`` (GT "answer key"
      edit-diff turntable; used for the ground-truth row on **editing**
      fixtures so the reference also shows the correct change in blue,
      mirroring the candidate's red/amber diff in the same column)

    Returns ``{"fixtures", "subs", "gtImg"}`` where ``fixtures`` carries
    the fixed columns (id + task + difficulty) and ``gtImg`` maps each
    fixture to its ground-truth image source (the answer-key diff for
    editing fixtures, the plain turntable otherwise).
    """
    verified = _verified_rows(rows)
    fixtures = [
        {"id": f["id"], "name": f["id"], "task": f["task"], "difficulty": f["difficulty"]}
        for f in FIXED_FIXTURES
    ]
    fixture_ids = [f["id"] for f in fixtures]
    gt_img = {
        f["id"]: (
            gt_diff_resolver(f["id"]) if f["task"] == "editing" else gt_resolver(f["id"])
        )
        for f in fixtures
    }
    return {
        "fixtures": fixtures,
        "subs": [
            _sub_payload(r, fixture_ids, render_resolver, diff_resolver)
            for r in verified
        ],
        "gtImg": gt_img,
    }


def render_gallery_page(
    rows: list[dict], render_resolver, gt_resolver, diff_resolver, gt_diff_resolver,
) -> str:
    """Build the full standalone gallery HTML document from live rows.

    ``render_resolver`` / ``gt_resolver`` / ``diff_resolver`` /
    ``gt_diff_resolver`` supply the cached render-proxy URLs (see
    :func:`build_gallery_payload`); the browser lazy-loads only the
    on-screen turntables.

    The document is self-contained and uses **system font stacks only**
    (no external font CDN fetch) so it never errors inside a sandboxed
    iframe.
    """
    payload = build_gallery_payload(
        rows, render_resolver, gt_resolver, diff_resolver, gt_diff_resolver,
    )
    data_json = json.dumps(payload, ensure_ascii=False)
    return (
        "<!DOCTYPE html><html lang='en'><head>"
        "<meta charset='UTF-8'>"
        "<meta name='viewport' content='width=device-width, initial-scale=1.0'>"
        "<title>CADGenBench Gallery</title>"
        f"<style>{_CSS}</style>"
        "</head><body>"
        f"{_BODY}"
        f"<script>window.GALLERY_DATA = {data_json};</script>"
        f"<script>{_JS}</script>"
        "</body></html>"
    )


# ---------------------------------------------------------------------------
# CSS (ported from the reference prototype, trimmed to the gallery surface).
# Self-contained: system font stacks only, no external font CDN fetch.
# ---------------------------------------------------------------------------

_CSS = """
:root {
  --bg: #f4f5f7; --panel: #ffffff; --ink: #14161c; --ink-soft: #5b6170;
  --ink-faint: #9aa0ad; --line: #e3e5ea; --line-strong: #d2d5dd;
  --accent: #4338ca; --accent-soft: #eef0ff; --good: #15803d;
  --good-soft: #e9f7ee; --bad: #b42318; --bad-soft: #fdeceb;
  --gt: #0f766e; --gt-soft: #e6f4f2; --thumb-bg: #eceef2;
  --shadow: 0 1px 2px rgba(20,22,28,.04), 0 8px 24px rgba(20,22,28,.06);
  --radius: 14px;
  --mono: ui-monospace, 'SF Mono', Menlo, Consolas, monospace;
}
* { box-sizing: border-box; }
body {
  margin: 0; background: var(--bg); color: var(--ink);
  font-family: system-ui, -apple-system, 'Segoe UI', Roboto, sans-serif;
  -webkit-font-smoothing: antialiased; padding: 8px 0 10px;
}
.wrap { max-width: 1180px; margin: 0 auto; padding: 0 24px; }

.section-label {
  display: flex; align-items: center; gap: 10px; margin: 4px 0 6px;
  font-size: 14px; font-weight: 700; color: var(--accent);
  text-transform: uppercase; letter-spacing: .05em;
}
.section-label .verified-pill {
  font-family: var(--mono); font-size: 10px; color: var(--good);
  background: var(--good-soft); padding: 3px 8px; border-radius: 999px;
  letter-spacing: .02em; display: inline-flex; align-items: center; gap: 5px;
}
.dot { width: 6px; height: 6px; border-radius: 50%; background: currentColor; }
.section-caption { margin: 0 0 16px; font-size: 12.5px; color: var(--ink-soft); line-height: 1.5; }
.section-caption b { color: var(--ink); font-weight: 600; }

/* The gallery is its own scroll container so the column header + ground-truth
   row (both `position: sticky`) stay locked at the top while the submission
   rows scroll inside it. This is the ONLY scroller for the rows -- it must not
   leak out to the host page. Height is set in JS from `screen.availHeight`
   (the one viewport-ish measure that survives HF's nested iframes, which
   inflate `innerHeight`/`vh` to the full content height); the px value here is
   the pre-script fallback. */
.gallery {
  background: var(--panel); border: 1px solid var(--line);
  border-radius: var(--radius); box-shadow: var(--shadow); position: relative;
  max-height: var(--gallery-max, 560px); overflow-y: auto; overflow-x: hidden;
  scrollbar-width: thin; scrollbar-color: var(--line-strong) transparent;
}
/* Keep the scrollbar visible (macOS overlay scrollbars otherwise hide it, so
   it isn't obvious the rows scroll). */
.gallery::-webkit-scrollbar { width: 11px; }
.gallery::-webkit-scrollbar-track { background: transparent; }
.gallery::-webkit-scrollbar-thumb {
  background: var(--line-strong); border-radius: 8px; border: 2px solid var(--panel);
}
.gallery::-webkit-scrollbar-thumb:hover { background: var(--ink-faint); }

/* Affordance that there are more rows below: a fade + label pinned to the
   bottom of the box, hidden by JS once scrolled to the end. */
.gallery-shell { position: relative; }
.scroll-cue {
  position: absolute; left: 1px; right: 12px; bottom: 1px; height: 56px;
  pointer-events: none; display: flex; align-items: flex-end; justify-content: center;
  padding-bottom: 9px; border-radius: 0 0 var(--radius) var(--radius);
  background: linear-gradient(to bottom, rgba(255,255,255,0), var(--panel) 82%);
}
.scroll-cue[hidden] { display: none; }
.scroll-cue span {
  font-size: 11px; font-weight: 700; text-transform: uppercase; letter-spacing: .05em;
  color: var(--accent); background: var(--accent-soft); padding: 4px 12px; border-radius: 999px;
  box-shadow: 0 1px 3px rgba(20,22,28,.12);
}
.grid-head, .grow {
  display: grid;
  grid-template-columns: 52px minmax(200px, 1.3fr) 160px repeat(var(--ncol, 4), minmax(140px, 1fr));
  align-items: stretch;
}
.grid-head {
  background: #fbfbfd; border-bottom: 1px solid var(--line); font-size: 11px;
  text-transform: uppercase; letter-spacing: .05em; color: var(--ink-faint);
  font-weight: 700; position: sticky; top: 0; z-index: 20;
  border-radius: var(--radius) var(--radius) 0 0;
}
.grid-head > div { padding: 13px 14px; display: flex; align-items: center; }
.grid-head .fix-h { flex-direction: column; align-items: flex-start; gap: 3px; }
.grid-head .fix-h .ftask { font-size: 11px; color: var(--ink-soft); text-transform: none; letter-spacing: 0; font-weight: 700; }
.grid-head .fix-h .ftop { display: flex; align-items: center; gap: 6px; }
.grid-head .fix-h .fname { font-size: 9.5px; color: var(--ink-faint); text-transform: none; letter-spacing: 0; font-family: var(--mono); font-weight: 600; }
.grid-head .fix-h .fdiff {
  font-size: 9px; font-weight: 700; text-transform: uppercase; letter-spacing: .05em;
  padding: 2px 7px; border-radius: 999px;
}
.fdiff.diff-medium { color: #b45309; background: #fdf3e7; }
.fdiff.diff-hard { color: var(--bad); background: var(--bad-soft); }

.grow.gt-row {
  background: var(--gt-soft); border-bottom: 2px solid var(--gt);
  position: sticky; top: var(--head-h, 46px); z-index: 15;
  box-shadow: 0 6px 14px -8px rgba(15,118,110,.45);
}
.grow.gt-row .rank, .grow.gt-row .ident { display: flex; align-items: center; }
.grow.gt-row .ident { font-weight: 700; color: var(--gt); flex-direction: column; align-items: flex-start; justify-content: center; gap: 2px; }
.grow.gt-row .ident .gt-sub { font-weight: 500; font-size: 11.5px; color: var(--gt); opacity: .8; }
.grow.gt-row .score-cell { color: var(--gt); }

.grow.sub-row { border-bottom: 1px solid var(--line); transition: background .12s ease; }
.grow.sub-row:last-child { border-bottom: none; }
.grow.sub-row:hover { background: #fafbff; }

.rank {
  padding: 16px 14px; font-family: var(--mono); font-weight: 700;
  font-size: 15px; color: var(--ink-faint); display: flex; align-items: center;
  justify-content: center;
}
.rank.medal-1 { color: #b8860b; } .rank.medal-2 { color: #6b7280; } .rank.medal-3 { color: #a0522d; }

.ident { padding: 14px; display: flex; flex-direction: column; justify-content: center; gap: 3px; min-width: 0; }
.ident .sub-name { font-weight: 600; font-size: 14.5px; line-height: 1.25; color: var(--ink); text-decoration: none; }
a.sub-name:hover { color: var(--accent); text-decoration: underline; }
.ident .submitter { font-size: 12px; color: var(--ink-faint); font-family: var(--mono); }

.score-cell { padding: 14px; display: flex; flex-direction: column; justify-content: center; gap: 4px; }
.score-cell .agg { font-size: 22px; font-weight: 800; letter-spacing: -.01em; }
.score-cell .validity { font-size: 11.5px; font-family: var(--mono); color: var(--good); font-weight: 700; display: flex; align-items: baseline; gap: 5px; }
.score-cell .validity .vlabel { font-weight: 400; color: var(--ink-faint); text-transform: uppercase; letter-spacing: .04em; font-size: 10px; }
.score-cell .validity.imperfect { color: #b45309; }
.score-cell .validity.imperfect .vlabel { color: #c98a3a; }

.thumb-cell { padding: 8px; display: flex; align-items: center; justify-content: center; position: relative; }
.thumb {
  width: 100%; aspect-ratio: 16/10; border-radius: 8px; background: var(--thumb-bg);
  border: 1px solid var(--line); overflow: hidden; cursor: pointer; position: relative;
  transition: transform .14s ease, box-shadow .14s ease, border-color .14s ease;
}
.thumb:hover { transform: translateY(-2px); box-shadow: 0 6px 18px rgba(20,22,28,.14); border-color: var(--accent); }
/* Display width is CSS-constrained so the browser downscales the existing
   render artifact: no resize step, no new assets. */
.thumb img { width: 100%; height: 100%; display: block; object-fit: contain; }
.thumb .open-hint {
  position: absolute; inset: 0; display: flex; align-items: center; justify-content: center;
  opacity: 0; background: rgba(67,56,202,.08); transition: opacity .14s ease;
  font-size: 11px; font-weight: 700; color: var(--accent); text-transform: uppercase; letter-spacing: .04em;
}
.thumb:hover .open-hint { opacity: 1; }

.thumb.failed { cursor: default; background: var(--bad-soft); border: 1px dashed #e9b3ae; display: flex; align-items: center; justify-content: center; }
.thumb.failed:hover { transform: none; box-shadow: none; border-color: #e9b3ae; }
.thumb.failed .ftag { font-family: var(--mono); font-size: 10px; font-weight: 700; color: var(--bad); text-transform: uppercase; letter-spacing: .04em; text-align: center; line-height: 1.4; }

/* Inline Gen/Edit breakdown beneath the aggregate score (replaces the old
   "more numbers" expander). Validity stays as its own line below. */
.score-cell .score-breakdown { display: flex; gap: 14px; margin: 1px 0; }
.score-cell .sb { display: flex; flex-direction: column; line-height: 1.15; }
.score-cell .sb-l { font-size: 9px; text-transform: uppercase; letter-spacing: .05em; color: var(--ink-faint); font-weight: 700; }
.score-cell .sb-v { font-size: 13px; font-weight: 700; font-family: var(--mono); color: var(--ink-soft); }

/* Download link + submission date, tucked under the submitter name. */
.ident .ident-foot { display: flex; align-items: center; gap: 10px; margin-top: 5px; flex-wrap: wrap; }
.ident .dl { font-size: 11.5px; font-weight: 600; color: var(--accent); text-decoration: none; display: inline-flex; align-items: center; gap: 4px; }
.ident .dl .dl-ic { font-size: 13px; line-height: 1; }
.ident .dl:hover { text-decoration: underline; }

.empty-note { background: var(--panel); border: 1px dashed var(--line-strong); border-radius: var(--radius); padding: 48px 24px; text-align: center; color: var(--ink-faint); font-size: 14px; }

/* compare modal (GT vs output) */
.modal-back { position: fixed; inset: 0; background: rgba(20,22,28,.5); backdrop-filter: blur(3px); display: none; align-items: center; justify-content: center; z-index: 50; padding: 24px; }
.modal-back.show { display: flex; }
.modal { background: var(--panel); border-radius: 16px; width: 100%; max-width: 620px; padding: 26px; box-shadow: 0 24px 60px rgba(0,0,0,.3); }
.modal h4 { margin: 0 0 4px; font-size: 18px; }
.modal .msub { color: var(--ink-faint); font-size: 13px; font-family: var(--mono); margin-bottom: 18px; }
.modal-compare { display: grid; grid-template-columns: 1fr 1fr; gap: 14px; }
.modal-compare figure { margin: 0; }
.modal-compare figcaption { font-size: 11px; text-transform: uppercase; letter-spacing: .05em; color: var(--ink-faint); font-weight: 700; margin-bottom: 6px; }
.modal-compare .mthumb { width: 100%; aspect-ratio: 16/10; border-radius: 8px; background: var(--thumb-bg); border: 1px solid var(--line); overflow: hidden; }
.modal-compare .mthumb img { width: 100%; height: 100%; object-fit: contain; display: block; }
.modal-compare .mthumb.failed { background: var(--bad-soft); border: 1px dashed #e9b3ae; display: flex; align-items: center; justify-content: center; }
.modal-compare .mthumb.failed span { font-family: var(--mono); font-size: 10px; font-weight: 700; color: var(--bad); text-transform: uppercase; letter-spacing: .04em; text-align: center; }
.modal-note { margin-top: 18px; font-size: 12.5px; color: var(--ink-soft); background: var(--accent-soft); padding: 12px 14px; border-radius: 10px; }
.modal-note a { color: var(--accent); font-weight: 600; }
/* Edit-diff color key (editing fixtures only), mirrors the report legend. */
.modal-legend { margin-top: 9px; font-size: 11.5px; color: var(--ink-soft); line-height: 1.7; }
.modal-legend .mlc { display: inline-block; width: 11px; height: 11px; border-radius: 3px;
                     vertical-align: middle; margin: 0 5px 0 14px; border: 1px solid rgba(0,0,0,0.18); }
.modal-legend .mlc:first-child { margin-left: 0; }
.modal-close { margin-top: 20px; width: 100%; padding: 11px; border: 1px solid var(--line-strong); background: #fafbfc; border-radius: 10px; font-family: inherit; font-weight: 600; cursor: pointer; font-size: 14px; }
.modal-close:hover { background: var(--accent-soft); border-color: var(--accent); color: var(--accent); }

/* --- Mobile / narrow screens ------------------------------------------------
   Same comparison as desktop -- the four sample columns with the ground-truth
   row pinned on top so each model's render sits directly under the GT it should
   match -- but adapted to phone width: there is no room for a left identity
   column AND four renders, so the model name + score become a slim bar above
   that model's four renders. The four columns stay full-width and aligned
   across the GT row and every model, and the header + GT row stay pinned. */
@media (max-width: 760px) {
  .wrap { padding: 0 7px; }
  .section-label { margin: 2px 0 4px; font-size: 12px; }
  .grid-head, .grow { grid-template-columns: repeat(var(--ncol, 4), 1fr); }

  /* Header: keep only the four sample-column labels, aligned over the renders. */
  .grid-head .h-rank, .grid-head .h-sub, .grid-head .h-score { display: none !important; }
  .grid-head .fix-h { padding: 8px 4px; }
  .grid-head .fix-h .fname { display: none; }        /* drop the #id, keep task+difficulty */
  .grid-head .fix-h .ftop { flex-direction: column; align-items: flex-start; gap: 3px; }
  .grid-head .fix-h .ftask { font-size: 10px; }
  .grid-head .fix-h .fdiff { font-size: 8px; padding: 1px 5px; }

  /* Rows: name (3 cols) + score (last col) form a bar above the renders. */
  .rank { display: none !important; }
  .ident { grid-column: 1 / span 3; padding: 9px 8px 5px; }
  .ident .sub-name { font-size: 13.5px; }
  .ident .ident-foot { margin-top: 3px; }
  .score-cell { grid-column: 4 / span 1; padding: 9px 8px 5px; gap: 1px; align-items: flex-end; }
  .score-cell .agg { font-size: 17px; }
  .score-cell .score-breakdown { display: none; }
  .score-cell .validity { font-size: 10px; }
  .grow.gt-row .score-cell { align-items: flex-end; }
  .thumb-cell { padding: 4px; }
}
"""


# ---------------------------------------------------------------------------
# Body
# ---------------------------------------------------------------------------

_BODY = """
<div class="wrap">
  <div class="section-label">
    Validated leaderboard - Top 10
    <span class="verified-pill"><span class="dot"></span>verified only</span>
  </div>
  <div class="gallery-shell">
    <div class="gallery" id="gallery">
      <div class="grid-head" id="gridHead"></div>
    </div>
    <div class="scroll-cue" id="scrollCue" hidden><span>&#9662; scroll for more models</span></div>
  </div>
</div>
<div class="modal-back" id="modalBack">
  <div class="modal">
    <h4 id="modalTitle"></h4>
    <div class="msub" id="modalSub"></div>
    <div class="modal-compare">
      <figure><figcaption id="modalGtCap">Ground truth</figcaption><div class="mthumb" id="modalGt"></div></figure>
      <figure><figcaption id="modalOutCap">Output (aligned)</figcaption><div class="mthumb" id="modalOut"></div></figure>
    </div>
    <div class="modal-note" id="modalNote"></div>
    <button class="modal-close" id="modalClose">Close</button>
  </div>
</div>
"""


# ---------------------------------------------------------------------------
# JS (data-driven render of the gallery; render lookups isolated behind
# renderFor / gtRenderFor as in the design brief)
# ---------------------------------------------------------------------------

_JS = """
const DATA = window.GALLERY_DATA || {fixtures: [], subs: [], gtImg: {}};
// Fixed columns: the server hands us exactly the gallery's sample set, in
// order, so there is no picker and no client-side selection state.
const FIXTURES = DATA.fixtures || [];

// --- Render hooks. ---------------------------------------------------------
// The image sources are cached render-proxy URLs injected by the server, so
// these just read the payload (the browser lazy-loads only the on-screen
// tiles). renderFor returns null for an invalid/missing fixture -> dashed cell.
function renderFor(sub, fxId) {
  const c = sub.cells[fxId];
  return c ? c.img : null;
}
// Grid tiles use gridImg (the edit-diff turntable for editing fixtures, the
// plain candidate turntable otherwise); the modal keeps renderFor (img), so
// the grid swap never changes the modal.
function gridRenderFor(sub, fxId) {
  const c = sub.cells[fxId];
  return c ? (c.gridImg || c.img) : null;
}
function gtRenderFor(fxId) {
  return (DATA.gtImg || {})[fxId] || null;
}
function cellOf(sub, fxId) { return sub.cells[fxId] || {}; }

function fmt(x, d) { return (x === null || x === undefined) ? '-' : Number(x).toFixed(d); }
function pct(x) { return (x === null || x === undefined) ? '-' : Math.round(Number(x) * 100) + '%'; }
function esc(s) { return String(s == null ? '' : s).replace(/[&<>"']/g, c => ({'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'}[c])); }
function fixtureMeta(id) { return FIXTURES.find(f => f.id === id); }
function groupLabel(task) { return task ? (task.charAt(0).toUpperCase() + task.slice(1)) : 'Other'; }

// --- Gallery render -------------------------------------------------------
function buildHead() {
  const head = document.getElementById('gridHead');
  let h = '<div class="h-rank">#</div><div class="h-sub">Submission</div><div class="h-score">Score</div>';
  FIXTURES.forEach(f => {
    const diff = f.difficulty
      ? '<span class="fdiff diff-' + esc((f.difficulty || '').toLowerCase()) + '">' + esc(f.difficulty) + '</span>'
      : '';
    h += '<div class="fix-h"><div class="ftop"><span class="ftask">' + esc(groupLabel(f.task)) + '</span>' + diff + '</div>'
      + '<span class="fname">#' + esc(f.name) + '</span></div>';
  });
  head.innerHTML = h;
}

// Fall back to the dashed cell if a render URL 404s (a fixture marked valid
// whose render upload is missing) instead of showing a broken image.
function imgFail(img) {
  const cell = img.closest('.thumb-cell');
  if (cell) cell.innerHTML = '<div class="thumb failed"><span class="ftag">invalid<br>generation</span></div>';
  fitIframe();
}

function thumbHTML(url, attrs, clickable) {
  if (!url) {
    return '<div class="thumb failed"><span class="ftag">invalid<br>generation</span></div>';
  }
  const hint = clickable ? '<span class="open-hint">open</span>' : '';
  return '<div class="thumb" ' + attrs + '>'
    + '<img loading="lazy" decoding="async" src="' + url + '" alt="" onload="fitIframe()" onerror="imgFail(this)">'
    + hint + '</div>';
}

function buildGallery() {
  const g = document.getElementById('gallery');
  g.style.setProperty('--ncol', Math.max(FIXTURES.length, 1));
  buildHead();
  g.querySelectorAll('.grow').forEach(n => n.remove());

  if (!DATA.subs.length) {
    let note = g.querySelector('.empty-note');
    if (!note) {
      note = document.createElement('div');
      note.className = 'empty-note';
      note.textContent = 'No verified submissions yet. Once a submission is promoted to the validated tier it appears here.';
      g.appendChild(note);
    }
    return;
  }

  // Ground-truth pinned row.
  const gt = document.createElement('div');
  gt.className = 'grow gt-row';
  let gtCells = '<div class="rank">&#9733;</div>'
    + '<div class="ident">Ground truth<span class="gt-sub">reference geometry</span></div>'
    + '<div class="score-cell"><span class="agg">1.000</span></div>';
  FIXTURES.forEach(f => {
    gtCells += '<div class="thumb-cell">' + thumbHTML(gtRenderFor(f.id), 'data-gt="' + esc(f.id) + '"', false) + '</div>';
  });
  gt.innerHTML = gtCells;
  g.appendChild(gt);

  DATA.subs.forEach((s, i) => {
    const row = document.createElement('div');
    row.className = 'grow sub-row';
    const medal = i < 3 ? 'medal-' + (i + 1) : '';
    const imperfect = (s.validity !== null && s.validity < 1) ? 'imperfect' : '';
    const nameHTML = s.reportUrl
      ? '<a class="sub-name" href="' + esc(s.reportUrl) + '" target="_blank" rel="noopener">' + esc(s.name) + '</a>'
      : '<span class="sub-name">' + esc(s.name) + '</span>';
    let cells = '<div class="rank ' + medal + '">' + (i + 1) + '</div>'
      + '<div class="ident">' + nameHTML
      + '<span class="submitter">' + esc(s.who) + '</span>'
      + '<div class="ident-foot">'
      + (s.blobUrl ? '<a class="dl" href="' + esc(s.blobUrl) + '" target="_blank" rel="noopener"><span class="dl-ic">&#8675;</span>Download ZIP</a>' : '')
      + '</div></div>'
      + '<div class="score-cell"><span class="agg">' + fmt(s.score, 3) + '</span>'
      + '<div class="score-breakdown">'
      + '<span class="sb"><span class="sb-l">Gen</span><span class="sb-v">' + fmt(s.gen, 3) + '</span></span>'
      + '<span class="sb"><span class="sb-l">Edit</span><span class="sb-v">' + fmt(s.edit, 3) + '</span></span>'
      + '</div>'
      + '<span class="validity ' + imperfect + '">' + pct(s.validity) + ' <span class="vlabel">valid</span></span></div>';
    FIXTURES.forEach(f => {
      cells += '<div class="thumb-cell">' + thumbHTML(gridRenderFor(s, f.id), 'data-sub="' + esc(s.id) + '" data-fix="' + esc(f.id) + '"', true) + '</div>';
    });
    row.innerHTML = cells;
    g.appendChild(row);
  });

  wireGallery();
  syncHeadHeight();
}

function wireGallery() {
  document.querySelectorAll('.thumb[data-sub]').forEach(th => {
    th.onclick = () => {
      const sub = DATA.subs.find(x => x.id === th.dataset.sub);
      openModal(th.dataset.fix, sub);
    };
  });
}


function openModal(fxId, sub) {
  const f = fixtureMeta(fxId);
  const isEditing = !!(f && f.task === 'editing');
  const title = f
    ? groupLabel(f.task) + (f.difficulty ? ' \\u00b7 ' + f.difficulty : '') + ' (#' + fxId + ')'
    : fxId;
  document.getElementById('modalTitle').textContent = title;
  document.getElementById('modalSub').textContent = sub.name;
  const gt = gtRenderFor(fxId);
  // Editing fixtures: the meaningful output is the edit-diff turntable (the
  // material that actually changed vs GT), mirroring the per-submission report
  // -- the plain aligned candidate looks identical to GT for a small edit.
  // Generation shows the plain aligned candidate.
  const out = isEditing ? gridRenderFor(sub, fxId) : renderFor(sub, fxId);
  const cell = cellOf(sub, fxId);
  document.getElementById('modalGt').innerHTML = gt
    ? '<img src="' + gt + '" alt="ground truth">' : '<span>no GT render</span>';
  // For editing fixtures the GT side is the "answer key" edit diff (blue = the
  // correct change), so it pairs with the output's red/amber diff; label both.
  document.getElementById('modalGtCap').textContent =
    isEditing ? 'Ground truth (correct change)' : 'Ground truth';
  document.getElementById('modalOutCap').textContent =
    isEditing ? 'Output vs ground truth (edit diff)' : 'Output (aligned)';
  const outEl = document.getElementById('modalOut');
  if (out) {
    outEl.className = 'mthumb';
    outEl.innerHTML = '<img src="' + out + '" alt="' + (isEditing ? 'edit diff' : 'output') + '">';
  } else {
    outEl.className = 'mthumb failed';
    outEl.innerHTML = '<span>invalid<br>generation</span>';
  }
  const cad = (cell.cad === null || cell.cad === undefined) ? '-' : Number(cell.cad).toFixed(3);
  const legend = isEditing
    ? '<div class="modal-legend">'
      + '<span class="mlc" style="background:#2173f5"></span>correct change (ground truth)'
      + '<span class="mlc" style="background:#bdc4d1"></span>your output'
      + '<span class="mlc" style="background:#e62929"></span>extra material (too much)'
      + '<span class="mlc" style="background:#f5991a"></span>missing material (too little)'
      + '</div>'
    : '';
  document.getElementById('modalNote').innerHTML =
    'CAD score for this sample: <b>' + cad + '</b>. '
    + (isEditing
        ? 'Left is the ground-truth answer key: blue is the change the edit should '
          + 'make (vs the starting shape). Right is your output\\u2019s edit diff: red '
          + 'is material your output added that the GT lacks, amber is GT material your '
          + 'output is missing. '
        : '')
    + 'The full per-sample report (shape similarity, interface, topology + 3D view) '
    + 'opens from the report viewer.' + legend;
  document.getElementById('modalBack').classList.add('show');
}
function closeModal() {
  document.getElementById('modalBack').classList.remove('show');
}
document.getElementById('modalClose').onclick = closeModal;
document.getElementById('modalBack').onclick = (e) => { if (e.target.id === 'modalBack') closeModal(); };
document.addEventListener('keydown', (e) => { if (e.key === 'Escape') closeModal(); });

// Pin the GT row exactly beneath the sticky column header.
function syncHeadHeight() {
  const head = document.getElementById('gridHead');
  if (head) document.documentElement.style.setProperty('--head-h', head.offsetHeight + 'px');
}

// Show the "scroll for more" cue only while there are rows below the fold.
function updateScrollCue() {
  const g = document.getElementById('gallery');
  const cue = document.getElementById('scrollCue');
  if (!g || !cue) return;
  const more = (g.scrollHeight - g.clientHeight - g.scrollTop) > 8;
  cue.hidden = !more;
}

// Height of the gallery scroll box. HF auto-resizes the Space iframe to its
// full content height, so `window.innerHeight` / `vh` inside these nested
// iframes report the inflated content height, not the real viewport -- they
// can't be used to size a one-screen box. `screen.availHeight` is the screen
// work-area and is NOT affected by the iframe nesting, so we derive the box
// height from it (a fraction, clamped) and the rows scroll inside the box while
// the sticky header + ground-truth row stay locked.
// Reserve for everything that is NOT the scroll box but still has to fit on
// screen: the browser/OS chrome between the screen work-area and the window
// viewport, plus the HF page header + Gradio title/tabs + this page's caption
// and Refresh button. Subtracting it from the screen height keeps the whole
// gallery within one viewport, so there is a single scrollbar (the box's own)
// rather than the box plus an outer page scroll. Deliberately generous: a box
// that is a little short (a touch more in-box scrolling) is far better than a
// confusing second scrollbar.
var CHROME_RESERVE = 450;
function sizeGalleryBox() {
  try {
    const avail = (window.screen && window.screen.availHeight) || 900;
    // Phones have much taller browser + HF chrome, so they need a bigger
    // reserve (smaller box). Width is not inflated by the iframe nesting, so
    // innerWidth is a reliable narrow-screen check. The box still caps + scrolls
    // internally on mobile so the GT row + sample headers stay pinned and each
    // model's renders sit under the matching GT render (same as desktop).
    const narrow = (window.innerWidth || 1000) < 760;
    const reserve = narrow ? 410 : CHROME_RESERVE;
    const maxH = narrow ? 520 : 1200;
    const minH = narrow ? 280 : 320;
    const h = Math.max(minH, Math.min(maxH, Math.round(avail - reserve)));
    document.documentElement.style.setProperty('--gallery-max', h + 'px');
    updateScrollCue();
  } catch (e) { /* keep CSS fallback */ }
}

// With the gallery box capped, the page content (caption + box) is bounded, so
// sizing the iframe to it keeps the iframe from adding a second scrollbar: the
// gallery's own box is the single scroller for the rows. No-ops if frameElement
// is unreadable.
function fitIframe() {
  try {
    const fe = window.frameElement;
    if (fe) fe.style.height = Math.ceil(document.body.scrollHeight) + 'px';
  } catch (e) { /* sandboxed -> keep fallback height */ }
}

buildGallery();
sizeGalleryBox();
fitIframe();
(function () {
  const g = document.getElementById('gallery');
  if (g) g.addEventListener('scroll', updateScrollCue, { passive: true });
})();
updateScrollCue();
function relayout() { syncHeadHeight(); sizeGalleryBox(); fitIframe(); updateScrollCue(); }
window.addEventListener('resize', relayout);
if (window.ResizeObserver) new ResizeObserver(fitIframe).observe(document.body);
if (document.fonts && document.fonts.ready) document.fonts.ready.then(relayout);
"""