File size: 23,157 Bytes
b048bc2
 
 
 
 
 
 
 
 
 
 
 
 
 
b5ad973
 
a662bfa
 
 
 
 
 
 
b5ad973
 
 
5fb3ebc
b5ad973
9fcb684
b5ad973
c4e21b3
f585077
9fcb684
f585077
b5ad973
 
 
9fcb684
 
 
 
b5ad973
 
 
 
 
 
01d67e9
 
 
 
b5ad973
d2161b1
 
 
 
 
 
 
 
 
 
 
9fcb684
 
b5ad973
a662bfa
d2161b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a662bfa
 
 
 
 
 
 
 
 
3112173
 
 
 
1a8f331
77edebf
 
 
 
 
 
 
b5ad973
1748c76
b5ad973
 
 
 
 
 
77edebf
3112173
 
 
 
77edebf
 
b5ad973
 
0dd7215
 
 
 
 
 
 
 
 
 
 
 
77edebf
3112173
 
 
77edebf
 
0dd7215
 
3112173
 
 
77edebf
 
 
 
 
 
 
3112173
77edebf
 
 
 
 
 
 
 
53de73a
 
 
 
 
 
 
 
 
5aee3e5
 
 
c4e21b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5ad973
a662bfa
9fcb684
 
 
 
 
 
 
 
 
b5ad973
a662bfa
 
 
 
 
 
b5ad973
9fcb684
 
 
 
 
 
 
 
b5ad973
9fcb684
 
 
 
 
b5ad973
9fcb684
a662bfa
 
 
 
 
 
 
 
9fcb684
a662bfa
 
 
 
 
 
 
b5ad973
 
5aee3e5
00d091d
 
5aee3e5
 
 
00d091d
5aee3e5
 
 
 
00d091d
b5ad973
 
 
 
 
5aee3e5
 
 
 
 
 
00d091d
 
 
 
 
53de73a
 
 
 
 
 
 
 
 
 
 
1a8f331
 
 
 
 
 
 
 
 
 
 
53de73a
77edebf
1a8f331
 
 
77edebf
 
 
1a8f331
 
53de73a
1a8f331
 
53de73a
 
77edebf
 
53de73a
77edebf
 
 
 
 
 
1a8f331
77edebf
 
 
 
 
53de73a
 
 
1a8f331
 
77edebf
 
 
 
5fb3ebc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77edebf
 
 
5fb3ebc
 
 
 
 
53de73a
 
0dd7215
 
 
 
 
 
 
 
046548a
0dd7215
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
046548a
 
53de73a
0dd7215
 
 
53de73a
77edebf
 
 
 
53de73a
77edebf
 
1a8f331
 
 
 
 
53de73a
77edebf
 
 
 
 
3112173
 
 
 
 
 
 
0dd7215
3112173
0dd7215
 
 
 
 
 
 
 
 
 
 
77edebf
 
 
 
 
 
 
 
 
 
 
c4e21b3
 
0dd7215
f585077
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0957a56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
# Copyright 2026 Hugging Face
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Leaderboard read path.

Loads `results.jsonl` from the submissions dataset on the Hub and
shapes the rows into the dataframe shown on the Leaderboard tab. The
live file is the single source of truth: there is **no fallback** to
bundled/stale data, so any read failure raises
:class:`LeaderboardDataError` rather than silently serving wrong rows.
Module-level constants describe the env-var-driven repo identities
that the submit path also consumes.
"""
from __future__ import annotations

import html
import json
import logging
import os
import re
import tempfile
import time
import uuid
from pathlib import Path

import pandas as pd
import requests
from huggingface_hub import get_token

logger = logging.getLogger(__name__)

HF_ORG = os.getenv("HF_ORG", "michaelr27")
HF_SUBMISSIONS_REPO = os.getenv(
    "HF_SUBMISSIONS_REPO", f"{HF_ORG}/cadgenbench-submissions"
)
HF_DATA_REPO = os.getenv("HF_DATA_REPO", f"{HF_ORG}/cadgenbench-data")
# Private ground-truth dataset. The gallery's GT render proxy reads
# `<fixture>/renders/iso.png` from here (needs the Space HF_TOKEN's
# read scope, same token the eval already uses for GT).
HF_DATA_GT_REPO = os.getenv("HF_DATA_GT_REPO", f"{HF_ORG}/cadgenbench-data-gt")

# Public HF Storage Bucket holding the per-submission gallery/report renders
# (candidate turntables + edit-diff WebP). Public so the browser can fetch a
# render straight from object storage with no token and no Space proxy; the
# eval job is the only writer. Submission renders are public anyway, the GT
# renders stay in the private GT dataset and are never published here.
HF_RENDER_BUCKET = os.getenv("HF_RENDER_BUCKET", f"{HF_ORG}/cadgenbench-eval-staging")
HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co").rstrip("/")
# Permanent renders live under this prefix; transient shard staging lives under
# its own prefix and is wiped after merge, so the two never collide.
RENDER_BUCKET_PREFIX = "renders"

RESULTS_FILENAME = "results.jsonl"
HUB_FETCH_TIMEOUT_SECONDS = 30


def render_object_path(submission_id: str, fixture: str, filename: str) -> str:
    """Bucket-relative path of one render: ``renders/<id>/<fixture>/<file>``."""
    return f"{RENDER_BUCKET_PREFIX}/{submission_id}/{fixture}/{filename}"


def render_submission_prefix(submission_id: str) -> str:
    """Bucket-relative prefix holding every render for *submission_id*."""
    return f"{RENDER_BUCKET_PREFIX}/{submission_id}"


def render_public_url(submission_id: str, fixture: str, filename: str) -> str:
    """Stable anonymous URL for one render (browser follows the 302 to the CDN)."""
    return (
        f"{HF_ENDPOINT}/buckets/{HF_RENDER_BUCKET}/resolve/"
        f"{render_object_path(submission_id, fixture, filename)}"
    )


def render_submission_base_url(submission_id: str) -> str:
    """Public base URL for *submission_id*'s renders, ``.../resolve/renders/<id>``.

    The report generator appends ``/<fixture>/<file>`` to this; passed to
    ``cadgenbench``'s ``generate_html`` as the display-only ``render_base_url``.
    """
    return (
        f"{HF_ENDPOINT}/buckets/{HF_RENDER_BUCKET}/resolve/"
        f"{render_submission_prefix(submission_id)}"
    )


class LeaderboardDataError(RuntimeError):
    """Raised when the live ``results.jsonl`` cannot be read from the Hub.

    The leaderboard has **no fallback**: rather than silently serving
    stale or bundled data (which can make a broken Hub read, e.g. an
    under-scoped Space ``HF_TOKEN``, look like an up-to-date but wrong
    leaderboard), every read failure surfaces loudly here.
    """

# Columns visible in the rendered table, in left-to-right order, followed
# by hidden-but-data-present columns the row-click detail panel pulls from.
# Hidden columns ride along in the DataFrame so `Leaderboard.select(...)`
# can read them out without a separate state-cache or re-fetch.
# Display column names (these are what the rendered table headers
# read). The visible set is intentionally compact; the detail-panel
# columns at the end of the list ride along in the DataFrame so the
# row-click handler can populate from them without a separate state
# cache. `submission_name` is the primary link (markdown-wrapped at
# projection time to point at the report when one exists), so we
# don't need a separate `report` column. `submission_blob_url` lives
# in the detail panel only.
LEADERBOARD_COLS = [
    "status",
    "submission_name",
    "submitter_name",
    "aggregate_score",
    "validity_rate",
    "submitted_at",
    "cadgenbench_version",
    "model details (optional)",
    # Detail-panel-only (hidden via `hide_columns` on the widget):
    "submission_id",
    "notes",
    "failure_reason",
    "submission_blob_url",
    "report_url",
]

# Validated table additionally exposes `validation_method`; on the
# unvalidated table the field is always null so the column is omitted
# rather than rendered. See cadgenbench-submissions/schema.md.
VALIDATED_LEADERBOARD_COLS = [
    "status",
    "submission_name",
    "submitter_name",
    "aggregate_score",
    "validity_rate",
    "validation_method",
    "submitted_at",
    "cadgenbench_version",
    "model details (optional)",
    "submission_id",
    "notes",
    "failure_reason",
    "submission_blob_url",
    "report_url",
]

# Columns to hide from rendering on both tables. These ride in the
# DataFrame so the row-click detail panel can populate from them; the
# widget hides them from view.
LEADERBOARD_HIDE_COLUMNS = [
    "submission_id",
    "notes",
    "failure_reason",
    "submission_blob_url",
    "report_url",
]

# Per-column gradio_leaderboard datatypes. `submission_name` and
# `model details (optional)` render their pre-formatted markdown
# (the submission_name cell links to the report when one exists; the
# model-details cell is the agent URL or _None_); everything else is
# plain string (numeric cells get pending / failed status tags
# applied by _fmt_pct / _fmt_score so they're string-shaped by the
# time the widget sees them).
_LINK_COLUMNS = frozenset({"submission_name", "model details (optional)"})


def _datatypes_for(columns: list[str]) -> list[str]:
    return ["markdown" if c in _LINK_COLUMNS else "str" for c in columns]


LEADERBOARD_DATATYPES = _datatypes_for(LEADERBOARD_COLS)
VALIDATED_LEADERBOARD_DATATYPES = _datatypes_for(VALIDATED_LEADERBOARD_COLS)

PENDING_CELL_TAG = "⏳ evaluating..."
FAILED_CELL_TAG = "✗ failed"

_ISO_TS_RE = re.compile(r"^(\d{4}-\d{2}-\d{2})T(\d{2}:\d{2}):\d{2}Z$")


def _fmt_timestamp(ts) -> str:
    """Render an ISO-8601 ``submitted_at`` as ``YYYY-MM-DD HH:MM UTC``.

    The schema writes timestamps as ``YYYY-MM-DDTHH:MM:SSZ``; the
    minute-level UTC form is plenty for the table + detail panel,
    drops the ``T``/``Z`` punctuation, and renders the timezone
    explicitly so a reader doesn't have to know that "Z" means UTC.
    """
    if ts is None or (isinstance(ts, float) and pd.isna(ts)):
        return ""
    s = str(ts).strip()
    if not s:
        return ""
    m = _ISO_TS_RE.match(s)
    if m:
        return f"{m.group(1)} {m.group(2)} UTC"
    return s


def _load_rows_from_hub() -> list[dict]:
    """Pull results.jsonl from the submissions dataset via raw HTTPS.

    Avoids :func:`huggingface_hub.hf_hub_download` because its layered
    caching (local disk cache + revision pinning + the Hub's own
    CDN-fronted resolve endpoint) can hand back stale bytes for a few
    minutes even with ``force_download=True``, which makes pending
    rows look like they never landed. A direct GET with a cache-bust
    query param and ``Cache-Control: no-cache`` consistently sees the
    latest commit on the dataset's ``main`` branch within seconds.

    The live ``results.jsonl`` is the single source of truth. Any
    failure (network, auth, malformed JSON) raises
    :class:`LeaderboardDataError`: there is deliberately **no fallback**
    to bundled/stale data, so a broken read fails loudly instead of
    silently serving wrong rows. An empty file is a valid result (an
    empty leaderboard), not a failure.
    """
    url = (
        f"https://huggingface.co/datasets/{HF_SUBMISSIONS_REPO}"
        f"/resolve/main/{RESULTS_FILENAME}"
    )
    headers = {"Cache-Control": "no-cache"}
    token = get_token()
    if token:
        headers["Authorization"] = f"Bearer {token}"
    try:
        r = requests.get(
            url,
            headers=headers,
            params={"_cb": str(int(time.time() * 1000))},
            timeout=HUB_FETCH_TIMEOUT_SECONDS,
        )
        r.raise_for_status()
    except Exception as e:
        raise LeaderboardDataError(
            f"Could not fetch {RESULTS_FILENAME} from {HF_SUBMISSIONS_REPO}: "
            f"{type(e).__name__}: {e}. Verify the Space's HF_TOKEN has read "
            f"access to the (private) submissions dataset. The leaderboard "
            f"serves no fallback data."
        ) from e
    try:
        rows = [json.loads(line) for line in r.text.splitlines() if line.strip()]
    except json.JSONDecodeError as e:
        raise LeaderboardDataError(
            f"Malformed {RESULTS_FILENAME} from {HF_SUBMISSIONS_REPO}: "
            f"{type(e).__name__}: {e}."
        ) from e
    logger.info("Loaded %d rows from Hub", len(rows))
    return rows


def _fmt_pct(x: float | None, status: str) -> str:
    """Render a 0-1 fraction as 'NN%' (or 'NN.N%' for non-whole values).

    Status-aware: pending / failed rows render a tag in place of the
    number (the row's eventual score is not yet known or never will
    be). ``pd.isna`` covers both ``None`` and pandas-coerced ``NaN``.
    """
    if status == "pending":
        return PENDING_CELL_TAG
    if status == "failed":
        return FAILED_CELL_TAG
    if pd.isna(x):
        return ""
    pct = float(x) * 100
    return f"{pct:.0f}%" if pct == int(pct) else f"{pct:.1f}%"


def _fmt_score(x: float | None, status: str) -> str:
    """Render an aggregate CAD score, status-aware tag on pending / failed."""
    if status == "pending":
        return PENDING_CELL_TAG
    if status == "failed":
        return FAILED_CELL_TAG
    if pd.isna(x):
        return ""
    return f"{float(x):.4f}"


def _is_empty(v) -> bool:
    """True for None, NaN, or empty/whitespace-only strings."""
    if v is None:
        return True
    if isinstance(v, float) and pd.isna(v):
        return True
    if isinstance(v, str) and not v.strip():
        return True
    return False


_AGENT_URL_MAX_LINK_TEXT = 40


def _shorten_url_for_display(url: str) -> str:
    """Strip scheme + trailing slash; truncate to keep the table cell tidy."""
    s = url.replace("https://", "").replace("http://", "").rstrip("/")
    if len(s) > _AGENT_URL_MAX_LINK_TEXT:
        s = s[: _AGENT_URL_MAX_LINK_TEXT - 1] + "…"
    return s


def _agent_url_md(url) -> str:
    """Render the `model details (optional)` cell as a markdown link.

    Uses a shortened version of the URL itself as the link text:
    `agent_url` is a free-form "URL pointing at the agent code or
    paper" per the schema, so the URL itself carries the only honest
    hint about what's behind the click. Missing cells render as
    italic ``_None_`` so a reader sees the field is optional and
    just wasn't filled, rather than a blank.
    """
    if _is_empty(url):
        return "_None_"
    return f"[{_shorten_url_for_display(str(url))}]({url})"


def _report_relative_url(submission_id, status, submission_sha256) -> str:
    """Relative URL to the Space's report-proxy route, or empty.

    The Space exposes ``/reports/<id>.html`` which fetches the file
    from the submissions dataset and re-serves it as ``text/html``
    (the dataset's ``/resolve/`` path returns it as ``text/plain``,
    which renders as source). Returning a relative URL means the
    same string works whether the Space is running locally on a
    random port or on huggingface.co.

    `reports/<id>.html` only exists for completed rows from the
    modern submit pipeline; legacy seed rows (pre-pipeline,
    ``submission_sha256`` null) never had a report uploaded, so this
    function returns empty for them and the caller leaves the
    submission_name cell as plain text rather than a broken link.
    """
    if status != "completed" or _is_empty(submission_id):
        return ""
    if _is_empty(submission_sha256):
        return ""
    return f"/reports/{submission_id}.html"


def _submission_name_md(name, report_url) -> str:
    """Render `submission_name`, linking to the report in a new tab.

    Now that the Space is public, HF's edge serves the FastAPI
    ``/reports/<id>.html`` route to in-browser users (it 404'd
    same-origin pathname navigations while the Space was private,
    which is why an earlier iteration kept this cell plain text and
    inlined the report via an iframe-srcdoc viewer instead). So the
    name cell becomes a deep-link that opens the self-contained
    per-submission report in a **new tab** — the typical HF
    leaderboard pattern, and far lighter than shipping the
    (tens-to-hundreds-of-MB) report through the page on every click.

    ``report_url`` is the relative ``/reports/<id>.html`` route the
    reader computes only for completed modern-pipeline rows; rows
    without one (pending / failed / legacy) render as plain text. The
    name column is a ``markdown`` datatype, which renders inline HTML,
    so a raw anchor with ``target="_blank"`` works; the name is
    HTML-escaped so an odd submission name can't break the cell.
    """
    if _is_empty(name):
        return "(unnamed submission)"
    label = html.escape(str(name))
    if _is_empty(report_url):
        return label
    href = html.escape(str(report_url), quote=True)
    return f'<a href="{href}" target="_blank" rel="noopener">{label}</a>'


def load_leaderboard_split() -> tuple[pd.DataFrame, pd.DataFrame]:
    """Two-tier reader: returns ``(validated_df, unvalidated_df)``.

    Splits incoming rows on ``validation_status`` (defaulting to
    ``"unvalidated"`` for legacy rows that pre-date the schema bump).
    Both DataFrames sort by ``aggregate_score`` descending with null
    last; the validated DataFrame additionally exposes the
    ``validation_method`` column. Same status-aware cell formatting
    on both tiers via :func:`_project_and_format`.
    """
    rows = _load_rows_from_hub()
    if not rows:
        return (
            pd.DataFrame(columns=VALIDATED_LEADERBOARD_COLS),
            pd.DataFrame(columns=LEADERBOARD_COLS),
        )
    # Backfill defaults for legacy rows that pre-date the relevant
    # schema bumps. `status` retrofits to "completed" (the legacy
    # baseline rows all have populated score fields).
    # `validation_status` retrofits to "unvalidated" per the validation
    # policy doc; defaulting in the reader avoids a results.jsonl
    # rewrite.
    for row in rows:
        if row.get("status") is None:
            row["status"] = "completed"
        if row.get("validation_status") is None:
            row["validation_status"] = "unvalidated"
    df = pd.DataFrame(rows)
    # Defensive split: anything not literally "validated" lands in the
    # unvalidated table (legacy rows, null, future-unknown values).
    validated_mask = df["validation_status"] == "validated"
    validated = _project_and_format(df[validated_mask], VALIDATED_LEADERBOARD_COLS)
    unvalidated = _project_and_format(df[~validated_mask], LEADERBOARD_COLS)
    return validated, unvalidated


def _project_and_format(df: pd.DataFrame, columns: list[str]) -> pd.DataFrame:
    """Project to display columns, sort by score, apply status-aware formatting.

    Pulled into a helper because :func:`load_leaderboard_split` runs
    it twice (once per tier), and both tiers need identically-shaped
    pending / failed cell tagging and link rendering.
    """
    if df.empty:
        return pd.DataFrame(columns=columns)
    df = df.copy()
    # Compute the relative report URL before projection drops the
    # source columns it depends on. Lives as a hidden column so the
    # detail panel can show "Report: link" and `submission_name` can
    # use it as the link target.
    if "submission_id" in df.columns and "status" in df.columns:
        df["report_url"] = df.apply(
            lambda r: _report_relative_url(
                r["submission_id"],
                r["status"],
                r.get("submission_sha256"),
            ),
            axis=1,
        )
    # Schema field name -> display column header. Keep the rename
    # narrow: only the agent URL gets a friendlier header; the schema
    # field stays `agent_url` in results.jsonl. `submission_blob_url`
    # stays under that name (hidden, detail-panel-only).
    df = df.rename(columns={"agent_url": "model details (optional)"})
    # Make sure every declared column exists (legacy rows can be
    # missing optional fields). Detail-panel reads expect the
    # column-set to be stable regardless of which source rows had
    # which keys.
    for c in columns:
        if c not in df.columns:
            df[c] = None
    out = (
        df[columns]
        .sort_values("aggregate_score", ascending=False, na_position="last")
        .reset_index(drop=True)
    )
    if "validity_rate" in out.columns:
        out["validity_rate"] = out.apply(
            lambda r: _fmt_pct(r["validity_rate"], r["status"]), axis=1,
        )
    if "aggregate_score" in out.columns:
        out["aggregate_score"] = out.apply(
            lambda r: _fmt_score(r["aggregate_score"], r["status"]), axis=1,
        )
    if "submission_name" in out.columns:
        out["submission_name"] = out.apply(
            lambda r: _submission_name_md(
                r["submission_name"], r.get("report_url"),
            ),
            axis=1,
        )
    if "model details (optional)" in out.columns:
        out["model details (optional)"] = out["model details (optional)"].apply(
            _agent_url_md
        )
    if "submitted_at" in out.columns:
        out["submitted_at"] = out["submitted_at"].apply(_fmt_timestamp)
    return out


# CSV-export columns. Wider than the on-screen table (raw values
# instead of the display-formatted strings, plus identity / artifact
# fields useful for offline analysis). `validation_status` is the
# discriminator between the two on-screen tables when readers grep
# the file. Order matches roughly: identity -> state -> headline
# scores -> provenance / artifact links -> long-form fields.
CSV_COLUMNS = [
    "submission_id",
    "status",
    "validation_status",
    "validation_method",
    "submitter_name",
    "submission_name",
    "hf_username",
    "aggregate_score",
    "validity_rate",
    "agent_url",
    "submitted_at",
    "cadgenbench_version",
    "cadgenbench_data_revision",
    "submission_blob_url",
    "submission_sha256",
    "notes",
    "failure_reason",
]


def build_combined_csv() -> str:
    """Write the full leaderboard (both tiers) to a temp CSV and return its path.

    One file, both tables, ``validation_status`` discriminator
    column. Used by ``gr.DownloadButton`` on the Leaderboard tab.

    Each call writes a uniquely-named file under the OS tmp dir;
    Gradio caches the file at serve time so we don't need to delete
    it eagerly (the OS tmp cleaner reaps it eventually). Generating
    fresh on every click keeps the export current with whatever the
    next refresh of the table would show.

    Sort order: validated rows first (highest score top), then
    unvalidated, then any rows whose validation_status is some
    unexpected value (defensive). Mirrors the on-screen layout so
    readers diffing the CSV against the UI see the same ordering.
    """
    rows = _load_rows_from_hub()
    for row in rows:
        if row.get("status") is None:
            row["status"] = "completed"
        if row.get("validation_status") is None:
            row["validation_status"] = "unvalidated"
    df = pd.DataFrame(rows) if rows else pd.DataFrame(columns=CSV_COLUMNS)
    for c in CSV_COLUMNS:
        if c not in df.columns:
            df[c] = None
    df = df[CSV_COLUMNS]
    if not df.empty:
        # "validated" > "unvalidated" alphabetically (v > u), so
        # descending puts the validated tier first.
        df = df.sort_values(
            ["validation_status", "aggregate_score"],
            ascending=[False, False],
            na_position="last",
        )
    out_dir = Path(tempfile.gettempdir())
    path = out_dir / f"cadgenbench-leaderboard-{uuid.uuid4().hex[:8]}.csv"
    df.to_csv(path, index=False)
    return str(path)


# Admin-tab table. A single flat view of every row (both tiers) with a
# leading editable ``select`` checkbox column; the rest is read-only
# context the maintainer scans before acting. Raw values (not the
# display-formatted leaderboard strings) so the maintainer sees exactly
# what is on the row. `submission_id` rides last as the action key.
ADMIN_SELECT_COL = "select"
ADMIN_COLUMNS = [
    ADMIN_SELECT_COL,
    "validation_status",
    "validation_method",
    "submission_name",
    "submitter_name",
    "submitted_at",
    "status",
    "aggregate_score",
    "submission_id",
]


def load_admin_table() -> pd.DataFrame:
    """Build the Admin tab's editable table: one row per submission.

    Both tiers in a single frame, validated first then by score, with a
    fresh (all-unchecked) ``select`` column the maintainer ticks to
    choose action targets. Legacy rows get the same ``status`` /
    ``validation_status`` defaults the leaderboard reader applies, so
    pre-schema-bump rows still show up and are actionable.
    """
    rows = _load_rows_from_hub()
    for row in rows:
        if row.get("status") is None:
            row["status"] = "completed"
        if row.get("validation_status") is None:
            row["validation_status"] = "unvalidated"
    if not rows:
        return pd.DataFrame(columns=ADMIN_COLUMNS)
    df = pd.DataFrame(rows)
    for c in ADMIN_COLUMNS:
        if c not in df.columns:
            df[c] = None
    if "submitted_at" in df.columns:
        df["submitted_at"] = df["submitted_at"].apply(_fmt_timestamp)
    df = (
        df[ADMIN_COLUMNS]
        .sort_values(
            ["validation_status", "aggregate_score"],
            ascending=[False, False],
            na_position="last",
        )
        .reset_index(drop=True)
    )
    # Set after projection so the column is a clean all-False boolean
    # regardless of what (if anything) a stray source key held.
    df[ADMIN_SELECT_COL] = False
    return df