Michael Rabinovich Cursor commited on
Commit
d2161b1
·
1 Parent(s): e611f15

leaderboard: serve renders from the public bucket, not the dataset proxy

Browse files

Candidate renders (turntables + edit-diff WebP) now live in the public HF
Storage Bucket under renders/<id>/, uploaded once by the eval job. The gallery
grid and the hosted report reference them by anonymous bucket URL, so the Space
is out of the render read path and the binary renders stop bloating the
submissions dataset / its commit queue.

- leaderboard: HF_RENDER_BUCKET + render URL/path/prefix helpers.
- app: render resolvers return public bucket URLs; the two render proxy routes
+ fetchers are retired. Only the private GT render keeps its token-held proxy.
- submit: merge passes render_base_url to generate_html and commits report-only;
dispatched job gets CADGENBENCH_RENDER_BUCKET/HF_ENDPOINT.
- admin: delete also purges the bucket renders/<id>/ prefix (previously orphaned).
- Dockerfile: bump cadgenbench pin to 3d49822 (render_base_url support).

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (7) hide show
  1. Dockerfile +1 -1
  2. admin.py +32 -0
  3. app.py +20 -105
  4. leaderboard.py +41 -0
  5. submit.py +29 -51
  6. tests/test_admin.py +14 -0
  7. tests/test_proxy.py +15 -24
Dockerfile CHANGED
@@ -41,7 +41,7 @@ RUN pip install --no-cache-dir -r /tmp/requirements.txt \
41
  # image rebuild picks up the latest code (pre-v1: always-updated). Lock
42
  # to a specific commit SHA at the v1 release so published scores are
43
  # reproducible (see space-setup/post-gt-swap.md Stage F).
44
- ARG CADGENBENCH_SHA=b3ff8f0
45
  # Cache-bust the install below whenever the tracked ref moves: the
46
  # GitHub commits endpoint's response changes with each new commit on
47
  # `main`, so BuildKit re-fetches and invalidates the cached pip layer.
 
41
  # image rebuild picks up the latest code (pre-v1: always-updated). Lock
42
  # to a specific commit SHA at the v1 release so published scores are
43
  # reproducible (see space-setup/post-gt-swap.md Stage F).
44
+ ARG CADGENBENCH_SHA=3d49822
45
  # Cache-bust the install below whenever the tracked ref moves: the
46
  # GitHub commits endpoint's response changes with each new commit on
47
  # `main`, so BuildKit re-fetches and invalidates the cached pip layer.
admin.py CHANGED
@@ -34,6 +34,7 @@ import gradio as gr
34
  from huggingface_hub import cancel_job, list_jobs
35
  from huggingface_hub.errors import EntryNotFoundError
36
 
 
37
  from submit import (
38
  EVAL_JOB_NAMESPACE,
39
  HF_SUBMISSIONS_REPO,
@@ -194,6 +195,7 @@ def delete_rows(submission_ids: Iterable[str]) -> None:
194
  "Failed to delete artifact %s (%s: %s)",
195
  path, type(e).__name__, e,
196
  )
 
197
 
198
  def mutate(rows: list[dict[str, Any]]) -> None:
199
  rows[:] = [r for r in rows if r.get("submission_id") not in ids]
@@ -203,6 +205,36 @@ def delete_rows(submission_ids: Iterable[str]) -> None:
203
  )
204
 
205
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  def _cancel_jobs_for_submissions(ids: set[str]) -> int:
207
  """Best-effort cancel every non-terminal eval Job for one of *ids*.
208
 
 
34
  from huggingface_hub import cancel_job, list_jobs
35
  from huggingface_hub.errors import EntryNotFoundError
36
 
37
+ from leaderboard import HF_RENDER_BUCKET, render_submission_prefix
38
  from submit import (
39
  EVAL_JOB_NAMESPACE,
40
  HF_SUBMISSIONS_REPO,
 
195
  "Failed to delete artifact %s (%s: %s)",
196
  path, type(e).__name__, e,
197
  )
198
+ _delete_bucket_renders(sid)
199
 
200
  def mutate(rows: list[dict[str, Any]]) -> None:
201
  rows[:] = [r for r in rows if r.get("submission_id") not in ids]
 
205
  )
206
 
207
 
208
+ def _delete_bucket_renders(submission_id: str) -> None:
209
+ """Delete every render for *submission_id* from the public render bucket.
210
+
211
+ The renders live under ``renders/<id>/`` in the bucket (uploaded by the eval
212
+ job). ``batch_bucket_files`` has no recursive prefix delete, so we list the
213
+ prefix and delete the files in one batch. Best-effort: a bucket failure is
214
+ logged, never blocks the row deletion (mirrors the dataset-artifact path).
215
+ """
216
+ prefix = render_submission_prefix(submission_id)
217
+ try:
218
+ paths = [
219
+ entry.path
220
+ for entry in _HF_API.list_bucket_tree(
221
+ HF_RENDER_BUCKET, prefix=prefix, recursive=True,
222
+ )
223
+ if getattr(entry, "path", None) and not entry.path.endswith("/")
224
+ ]
225
+ if paths:
226
+ _HF_API.batch_bucket_files(HF_RENDER_BUCKET, delete=paths)
227
+ logger.info(
228
+ "Deleted %d render(s) under %s from bucket %s",
229
+ len(paths), prefix, HF_RENDER_BUCKET,
230
+ )
231
+ except Exception as e: # noqa: BLE001 - bucket failure must not block delete
232
+ logger.warning(
233
+ "Failed to delete bucket renders under %s (%s: %s)",
234
+ prefix, type(e).__name__, e,
235
+ )
236
+
237
+
238
  def _cancel_jobs_for_submissions(ids: set[str]) -> int:
239
  """Best-effort cancel every non-terminal eval Job for one of *ids*.
240
 
app.py CHANGED
@@ -56,6 +56,7 @@ from leaderboard import (
56
  build_combined_csv,
57
  load_admin_table,
58
  load_leaderboard_split,
 
59
  )
60
  from gallery import render_gallery_page
61
  from tasks import load_tasks_from_dir, render_tasks_page
@@ -530,54 +531,6 @@ def serve_report(submission_id: str) -> Response:
530
  return Response(content=content, media_type="text/html; charset=utf-8")
531
 
532
 
533
- def _fetch_render(submission_id: str, fixture: str) -> bytes | None:
534
- """Pull a submission's gallery WebP (``renders/<id>/<fixture>/rotating.webp``).
535
-
536
- Deliberately **not** memoized: renders land over time (a submission
537
- completes, or an existing row is backfilled) after the Space process
538
- booted, so negative-caching a boot-time miss would keep a turntable
539
- dashed until the next restart. ``hf_hub_download`` does its own disk
540
- caching per revision, so a re-fetch of an unchanged file stays cheap.
541
- Returns ``None`` on any failure (the gallery draws the dashed cell).
542
- """
543
- try:
544
- local_path = hf_hub_download(
545
- repo_id=HF_SUBMISSIONS_REPO,
546
- filename=f"renders/{submission_id}/{fixture}/rotating.webp",
547
- repo_type="dataset",
548
- )
549
- return Path(local_path).read_bytes()
550
- except Exception as e: # noqa: BLE001 - any Hub failure -> 404
551
- logger.warning(
552
- "Failed to fetch render %s/%s (%s: %s)",
553
- submission_id, fixture, type(e).__name__, e,
554
- )
555
- return None
556
-
557
-
558
- def _fetch_render_diff(submission_id: str, fixture: str) -> bytes | None:
559
- """Pull a submission's edit-diff turntable (``renders/<id>/<fixture>/edit_diff.webp``).
560
-
561
- Editing fixtures only; the evaluator writes this alongside the plain
562
- ``rotating.webp``. Same no-memoization rationale as :func:`_fetch_render`.
563
- Returns ``None`` on any failure, including the expected miss for
564
- non-editing fixtures, so the gallery degrades to the dashed cell.
565
- """
566
- try:
567
- local_path = hf_hub_download(
568
- repo_id=HF_SUBMISSIONS_REPO,
569
- filename=f"renders/{submission_id}/{fixture}/edit_diff.webp",
570
- repo_type="dataset",
571
- )
572
- return Path(local_path).read_bytes()
573
- except Exception as e: # noqa: BLE001 - any Hub failure -> 404
574
- logger.warning(
575
- "Failed to fetch edit-diff render %s/%s (%s: %s)",
576
- submission_id, fixture, type(e).__name__, e,
577
- )
578
- return None
579
-
580
-
581
  def _fetch_gt_render(fixture: str) -> bytes | None:
582
  """Pull a fixture's ground-truth GIF from the private GT dataset.
583
 
@@ -613,66 +566,35 @@ RENDER_CACHE_CONTROL = "public, max-age=31536000, immutable"
613
 
614
 
615
  def _render_proxy_url(submission_id: str, fixture: str) -> str | None:
616
- """Resolver returning the cached proxy URL for a submission GIF.
617
-
618
- Returns the route string **without** fetching the bytes (that's the
619
- whole point: the browser lazy-fetches on demand, so only the visible
620
- tiles load). The gallery only calls this for fixtures whose
621
- per-fixture status is ``valid``; an absolute path resolves against
622
- the Space origin even inside the iframe ``srcdoc``. A render that
623
- 404s (valid status but a missing upload) degrades to the dashed cell
624
- client-side via the ``<img onerror>`` hook.
625
-
626
- Requires the Space to be **public**: while private, HF's edge 404s
627
- in-browser fetches to these custom routes.
628
  """
629
- return f"/render/{submission_id}/{fixture}.webp"
630
 
631
 
632
  def _render_diff_proxy_url(submission_id: str, fixture: str) -> str | None:
633
- """Resolver returning the cached proxy URL for a submission's edit-diff WebP.
634
 
635
  Used by the gallery grid for editing fixtures (see
636
  ``gallery.build_gallery_payload``). A miss (non-editing fixture, or an edit
637
- that never rendered a diff) 404s and degrades to the dashed cell client-side
638
- via the ``<img onerror>`` hook, no fallback to the plain turntable.
639
  """
640
- return f"/render-diff/{submission_id}/{fixture}.webp"
641
 
642
 
643
  def _gt_proxy_url(fixture: str) -> str | None:
644
- """Resolver returning the cached proxy URL for a fixture's GT WebP."""
645
- return f"/gt-render/{fixture}.webp"
646
-
647
-
648
- def serve_render(submission_id: str, fixture: str) -> Response:
649
- """Stream a submission's per-fixture render WebP with long-lived caching.
650
 
651
- The gallery references ``/render/<id>/<fixture>.webp`` and the browser
652
- fetches it lazily. Re-streams the dataset bytes (the Space holds the
653
- read token) with an immutable ``Cache-Control`` so the CDN/browser
654
- cache it hard.
655
  """
656
- webp = _fetch_render(submission_id, fixture)
657
- if webp is None:
658
- return Response(status_code=404)
659
- return Response(
660
- content=webp,
661
- media_type="image/webp",
662
- headers={"Cache-Control": RENDER_CACHE_CONTROL},
663
- )
664
-
665
-
666
- def serve_render_diff(submission_id: str, fixture: str) -> Response:
667
- """Stream a submission's edit-diff turntable WebP with long-lived caching."""
668
- webp = _fetch_render_diff(submission_id, fixture)
669
- if webp is None:
670
- return Response(status_code=404)
671
- return Response(
672
- content=webp,
673
- media_type="image/webp",
674
- headers={"Cache-Control": RENDER_CACHE_CONTROL},
675
- )
676
 
677
 
678
  def serve_gt_render(fixture: str) -> Response:
@@ -1145,16 +1067,9 @@ app.add_api_route(
1145
  # Cached render proxies the gallery's lazy-loaded turntables point at.
1146
  # Registered before the Gradio mount so they're not shadowed by the
1147
  # catch-all sub-app.
1148
- app.add_api_route(
1149
- "/render/{submission_id}/{fixture}.webp",
1150
- serve_render,
1151
- methods=["GET"],
1152
- )
1153
- app.add_api_route(
1154
- "/render-diff/{submission_id}/{fixture}.webp",
1155
- serve_render_diff,
1156
- methods=["GET"],
1157
- )
1158
  app.add_api_route(
1159
  "/gt-render/{fixture}.webp",
1160
  serve_gt_render,
 
56
  build_combined_csv,
57
  load_admin_table,
58
  load_leaderboard_split,
59
+ render_public_url,
60
  )
61
  from gallery import render_gallery_page
62
  from tasks import load_tasks_from_dir, render_tasks_page
 
531
  return Response(content=content, media_type="text/html; charset=utf-8")
532
 
533
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
534
  def _fetch_gt_render(fixture: str) -> bytes | None:
535
  """Pull a fixture's ground-truth GIF from the private GT dataset.
536
 
 
566
 
567
 
568
  def _render_proxy_url(submission_id: str, fixture: str) -> str | None:
569
+ """Resolver for a submission's plain turntable: a public render-bucket URL.
570
+
571
+ The eval job uploads ``renders/<id>/<fixture>/rotating.webp`` to the public
572
+ bucket, so the browser fetches it straight from object storage (anonymous,
573
+ no Space proxy hop). The gallery only calls this for ``valid`` fixtures; a
574
+ missing upload 404s and degrades to the dashed cell via ``<img onerror>``.
 
 
 
 
 
 
575
  """
576
+ return render_public_url(submission_id, fixture, "rotating.webp")
577
 
578
 
579
  def _render_diff_proxy_url(submission_id: str, fixture: str) -> str | None:
580
+ """Resolver for an editing fixture's edit-diff turntable (public bucket URL).
581
 
582
  Used by the gallery grid for editing fixtures (see
583
  ``gallery.build_gallery_payload``). A miss (non-editing fixture, or an edit
584
+ that never rendered a diff) 404s and degrades to the dashed cell, no
585
+ fallback to the plain turntable.
586
  """
587
+ return render_public_url(submission_id, fixture, "edit_diff.webp")
588
 
589
 
590
  def _gt_proxy_url(fixture: str) -> str | None:
591
+ """Resolver returning the cached proxy URL for a fixture's GT WebP.
 
 
 
 
 
592
 
593
+ GT renders stay in the **private** GT dataset, so they cannot be public
594
+ bucket URLs; they are still re-streamed through the Space proxy (which
595
+ holds the read token).
 
596
  """
597
+ return f"/gt-render/{fixture}.webp"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598
 
599
 
600
  def serve_gt_render(fixture: str) -> Response:
 
1067
  # Cached render proxies the gallery's lazy-loaded turntables point at.
1068
  # Registered before the Gradio mount so they're not shadowed by the
1069
  # catch-all sub-app.
1070
+ # Candidate renders are served directly from the public render bucket (URLs
1071
+ # come from the gallery resolvers), so only the private GT render still needs a
1072
+ # token-holding Space proxy route.
 
 
 
 
 
 
 
1073
  app.add_api_route(
1074
  "/gt-render/{fixture}.webp",
1075
  serve_gt_render,
leaderboard.py CHANGED
@@ -50,10 +50,51 @@ HF_DATA_REPO = os.getenv("HF_DATA_REPO", f"{HF_ORG}/cadgenbench-data")
50
  # read scope, same token the eval already uses for GT).
51
  HF_DATA_GT_REPO = os.getenv("HF_DATA_GT_REPO", f"{HF_ORG}/cadgenbench-data-gt")
52
 
 
 
 
 
 
 
 
 
 
 
 
53
  RESULTS_FILENAME = "results.jsonl"
54
  HUB_FETCH_TIMEOUT_SECONDS = 30
55
 
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  class LeaderboardDataError(RuntimeError):
58
  """Raised when the live ``results.jsonl`` cannot be read from the Hub.
59
 
 
50
  # read scope, same token the eval already uses for GT).
51
  HF_DATA_GT_REPO = os.getenv("HF_DATA_GT_REPO", f"{HF_ORG}/cadgenbench-data-gt")
52
 
53
+ # Public HF Storage Bucket holding the per-submission gallery/report renders
54
+ # (candidate turntables + edit-diff WebP). Public so the browser can fetch a
55
+ # render straight from object storage with no token and no Space proxy; the
56
+ # eval job is the only writer. Submission renders are public anyway, the GT
57
+ # renders stay in the private GT dataset and are never published here.
58
+ HF_RENDER_BUCKET = os.getenv("HF_RENDER_BUCKET", f"{HF_ORG}/cadgenbench-eval-staging")
59
+ HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co").rstrip("/")
60
+ # Permanent renders live under this prefix; transient shard staging lives under
61
+ # its own prefix and is wiped after merge, so the two never collide.
62
+ RENDER_BUCKET_PREFIX = "renders"
63
+
64
  RESULTS_FILENAME = "results.jsonl"
65
  HUB_FETCH_TIMEOUT_SECONDS = 30
66
 
67
 
68
+ def render_object_path(submission_id: str, fixture: str, filename: str) -> str:
69
+ """Bucket-relative path of one render: ``renders/<id>/<fixture>/<file>``."""
70
+ return f"{RENDER_BUCKET_PREFIX}/{submission_id}/{fixture}/{filename}"
71
+
72
+
73
+ def render_submission_prefix(submission_id: str) -> str:
74
+ """Bucket-relative prefix holding every render for *submission_id*."""
75
+ return f"{RENDER_BUCKET_PREFIX}/{submission_id}"
76
+
77
+
78
+ def render_public_url(submission_id: str, fixture: str, filename: str) -> str:
79
+ """Stable anonymous URL for one render (browser follows the 302 to the CDN)."""
80
+ return (
81
+ f"{HF_ENDPOINT}/buckets/{HF_RENDER_BUCKET}/resolve/"
82
+ f"{render_object_path(submission_id, fixture, filename)}"
83
+ )
84
+
85
+
86
+ def render_submission_base_url(submission_id: str) -> str:
87
+ """Public base URL for *submission_id*'s renders, ``.../resolve/renders/<id>``.
88
+
89
+ The report generator appends ``/<fixture>/<file>`` to this; passed to
90
+ ``cadgenbench``'s ``generate_html`` as the display-only ``render_base_url``.
91
+ """
92
+ return (
93
+ f"{HF_ENDPOINT}/buckets/{HF_RENDER_BUCKET}/resolve/"
94
+ f"{render_submission_prefix(submission_id)}"
95
+ )
96
+
97
+
98
  class LeaderboardDataError(RuntimeError):
99
  """Raised when the live ``results.jsonl`` cannot be read from the Hub.
100
 
submit.py CHANGED
@@ -116,7 +116,14 @@ from huggingface_hub import (
116
  from huggingface_hub.errors import EntryNotFoundError, HfHubHTTPError
117
 
118
  import progress
119
- from leaderboard import HF_DATA_REPO, HF_ORG, HF_SUBMISSIONS_REPO
 
 
 
 
 
 
 
120
 
121
  logger = logging.getLogger(__name__)
122
 
@@ -132,7 +139,6 @@ SUBMISSION_ID_SLUG_MAX = 40
132
  RESULTS_FILENAME = "results.jsonl"
133
  SUBMISSIONS_DIR = "submissions"
134
  REPORTS_DIR = "reports"
135
- RENDERS_DIR = "renders"
136
  DATA_REV_SHORT_LEN = 12
137
  FAILURE_REASON_MAX_CHARS = 200
138
  SHA256_BLOCK_SIZE = 64 * 1024
@@ -1170,6 +1176,9 @@ def _dispatch_eval_command(
1170
  env: dict[str, str] = {
1171
  "HF_SUBMISSIONS_REPO": HF_SUBMISSIONS_REPO,
1172
  "EVAL_WORKER_COUNT": EVAL_JOB_WORKER_COUNT,
 
 
 
1173
  }
1174
  for key in ("CADGENBENCH_DATA_REPO", "CADGENBENCH_DATA_GT_REPO"):
1175
  value = os.environ.get(key)
@@ -1527,13 +1536,16 @@ def _merge_shards_and_publish(
1527
  report_json = _build_report_json(merged_run)
1528
 
1529
  run_data = discover_run(merged_run)
1530
- html = generate_html(run_data)
 
 
 
 
 
1531
  html_path = tmp / f"{submission_id}.html"
1532
  html_path.write_text(html, encoding="utf-8")
1533
 
1534
- _publish_reports_and_gallery(
1535
- submission_id, html_path, report_json, merged_run,
1536
- )
1537
  return report_json["run_summary"]
1538
  finally:
1539
  shutil.rmtree(tmp, ignore_errors=True)
@@ -1567,24 +1579,15 @@ def _publish_reports_and_gallery(
1567
  submission_id: str,
1568
  html_path: Path,
1569
  report_json: dict[str, Any],
1570
- run_dir: Path,
1571
  ) -> None:
1572
- """Publish report HTML/JSON + every per-fixture gallery render in one commit.
1573
-
1574
- The merged artifacts land at the exact paths the leaderboard + the
1575
- row-flip expect: ``reports/<id>.{html,json}`` plus one
1576
- ``renders/<id>/<fixture>/<filename>`` entry for every PNG/GIF in each
1577
- fixture's render folder. All of it goes in a single ``create_commit`` rather than
1578
- one ``upload_file`` per file: a fan-out can stage ~80+ thumbnails,
1579
- and a commit-per-file both serialises the publish (slow) and hammers
1580
- the dataset's commit endpoint (the 429 "concurrency queue" failures
1581
- that stranded earlier runs). One commit is atomic, fast, and
1582
- rate-limit friendly.
1583
-
1584
- A fixture with no render folder (missing output / render that never
1585
- ran) is skipped, matching the single-job behaviour; the gallery
1586
- draws the dashed "invalid" cell from the row, so an absent thumbnail
1587
- is not an error.
1588
  """
1589
  operations: list[CommitOperationAdd] = [
1590
  CommitOperationAdd(
@@ -1598,41 +1601,16 @@ def _publish_reports_and_gallery(
1598
  ).encode("utf-8"),
1599
  ),
1600
  ]
1601
- render_count = 0
1602
- for fixture_dir in sorted(d for d in run_dir.iterdir() if d.is_dir()):
1603
- renders_dir = fixture_dir / "renders"
1604
- if not renders_dir.is_dir():
1605
- continue
1606
- for render_path in sorted(renders_dir.iterdir()):
1607
- if render_path.suffix.lower() not in {".png", ".webp"}:
1608
- continue
1609
- operations.append(
1610
- CommitOperationAdd(
1611
- path_in_repo=(
1612
- f"{RENDERS_DIR}/{submission_id}/"
1613
- f"{fixture_dir.name}/{render_path.name}"
1614
- ),
1615
- path_or_fileobj=str(render_path),
1616
- )
1617
- )
1618
- render_count += 1
1619
-
1620
  _with_hub_retries(
1621
  lambda: _HF_API.create_commit(
1622
  repo_id=HF_SUBMISSIONS_REPO,
1623
  repo_type="dataset",
1624
  operations=operations,
1625
- commit_message=(
1626
- f"publish merged report + {render_count} gallery render(s) "
1627
- f"for {submission_id}"
1628
- ),
1629
  ),
1630
- what="merged report+gallery publish",
1631
- )
1632
- logger.info(
1633
- "Published reports/%s.{html,json} + %d gallery render(s) in one commit",
1634
- submission_id, render_count,
1635
  )
 
1636
 
1637
 
1638
  def _cleanup_shard_artifacts(submission_id: str) -> None:
 
116
  from huggingface_hub.errors import EntryNotFoundError, HfHubHTTPError
117
 
118
  import progress
119
+ from leaderboard import (
120
+ HF_DATA_REPO,
121
+ HF_ENDPOINT,
122
+ HF_ORG,
123
+ HF_RENDER_BUCKET,
124
+ HF_SUBMISSIONS_REPO,
125
+ render_submission_base_url,
126
+ )
127
 
128
  logger = logging.getLogger(__name__)
129
 
 
139
  RESULTS_FILENAME = "results.jsonl"
140
  SUBMISSIONS_DIR = "submissions"
141
  REPORTS_DIR = "reports"
 
142
  DATA_REV_SHORT_LEN = 12
143
  FAILURE_REASON_MAX_CHARS = 200
144
  SHA256_BLOCK_SIZE = 64 * 1024
 
1176
  env: dict[str, str] = {
1177
  "HF_SUBMISSIONS_REPO": HF_SUBMISSIONS_REPO,
1178
  "EVAL_WORKER_COUNT": EVAL_JOB_WORKER_COUNT,
1179
+ # The job is the sole render uploader; tell it which public bucket.
1180
+ "CADGENBENCH_RENDER_BUCKET": HF_RENDER_BUCKET,
1181
+ "HF_ENDPOINT": HF_ENDPOINT,
1182
  }
1183
  for key in ("CADGENBENCH_DATA_REPO", "CADGENBENCH_DATA_GT_REPO"):
1184
  value = os.environ.get(key)
 
1536
  report_json = _build_report_json(merged_run)
1537
 
1538
  run_data = discover_run(merged_run)
1539
+ # Hosted report references the candidate renders from the public bucket
1540
+ # (uploaded by the shard jobs), keeping the heavy WebP/PNG bytes out of
1541
+ # the committed HTML. GT/input stay inlined (GT is private).
1542
+ html = generate_html(
1543
+ run_data, render_base_url=render_submission_base_url(submission_id),
1544
+ )
1545
  html_path = tmp / f"{submission_id}.html"
1546
  html_path.write_text(html, encoding="utf-8")
1547
 
1548
+ _publish_reports_and_gallery(submission_id, html_path, report_json)
 
 
1549
  return report_json["run_summary"]
1550
  finally:
1551
  shutil.rmtree(tmp, ignore_errors=True)
 
1579
  submission_id: str,
1580
  html_path: Path,
1581
  report_json: dict[str, Any],
 
1582
  ) -> None:
1583
+ """Publish the merged report HTML + JSON to the submissions dataset.
1584
+
1585
+ Commits ``reports/<id>.{html,json}`` in one ``create_commit``. The gallery
1586
+ renders are **not** committed here: each shard job already uploaded its
1587
+ fixtures' renders to the public render bucket under ``renders/<id>/``, and
1588
+ the report HTML references them by bucket URL. Keeping the binary renders
1589
+ out of the dataset repo is what avoids bloating its git history and the
1590
+ commit-queue 429s the per-file fan-out used to cause.
 
 
 
 
 
 
 
 
1591
  """
1592
  operations: list[CommitOperationAdd] = [
1593
  CommitOperationAdd(
 
1601
  ).encode("utf-8"),
1602
  ),
1603
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1604
  _with_hub_retries(
1605
  lambda: _HF_API.create_commit(
1606
  repo_id=HF_SUBMISSIONS_REPO,
1607
  repo_type="dataset",
1608
  operations=operations,
1609
+ commit_message=f"publish merged report for {submission_id}",
 
 
 
1610
  ),
1611
+ what="merged report publish",
 
 
 
 
1612
  )
1613
+ logger.info("Published reports/%s.{html,json}", submission_id)
1614
 
1615
 
1616
  def _cleanup_shard_artifacts(submission_id: str) -> None:
tests/test_admin.py CHANGED
@@ -59,6 +59,8 @@ def hub(monkeypatch):
59
  "rows": [dict(r) for r in SEED_ROWS],
60
  "uploads": 0,
61
  "deleted_paths": [],
 
 
62
  }
63
 
64
  def fake_download() -> str:
@@ -78,9 +80,18 @@ def hub(monkeypatch):
78
  def fake_delete_file(*, path_in_repo, **kwargs) -> None:
79
  state["deleted_paths"].append(path_in_repo)
80
 
 
 
 
 
 
 
 
81
  monkeypatch.setattr(submit, "_download_results_jsonl", fake_download)
82
  monkeypatch.setattr(submit._HF_API, "upload_file", fake_upload)
83
  monkeypatch.setattr(submit._HF_API, "delete_file", fake_delete_file)
 
 
84
  return state
85
 
86
 
@@ -180,6 +191,9 @@ def test_delete_rows_removes_rows_and_artifacts(hub):
180
  "reports/alpha.html",
181
  "reports/alpha.json",
182
  ]
 
 
 
183
  assert hub["uploads"] == 1
184
 
185
 
 
59
  "rows": [dict(r) for r in SEED_ROWS],
60
  "uploads": 0,
61
  "deleted_paths": [],
62
+ "bucket_listed_prefixes": [],
63
+ "bucket_deleted_paths": [],
64
  }
65
 
66
  def fake_download() -> str:
 
80
  def fake_delete_file(*, path_in_repo, **kwargs) -> None:
81
  state["deleted_paths"].append(path_in_repo)
82
 
83
+ def fake_list_bucket_tree(bucket_id, *, prefix, recursive=False, **kwargs):
84
+ state["bucket_listed_prefixes"].append(prefix)
85
+ return [SimpleNamespace(path=f"{prefix}/101/rotating.webp")]
86
+
87
+ def fake_batch_bucket_files(bucket_id, *, add=None, delete=None, **kwargs):
88
+ state["bucket_deleted_paths"].extend(delete or [])
89
+
90
  monkeypatch.setattr(submit, "_download_results_jsonl", fake_download)
91
  monkeypatch.setattr(submit._HF_API, "upload_file", fake_upload)
92
  monkeypatch.setattr(submit._HF_API, "delete_file", fake_delete_file)
93
+ monkeypatch.setattr(submit._HF_API, "list_bucket_tree", fake_list_bucket_tree)
94
+ monkeypatch.setattr(submit._HF_API, "batch_bucket_files", fake_batch_bucket_files)
95
  return state
96
 
97
 
 
191
  "reports/alpha.html",
192
  "reports/alpha.json",
193
  ]
194
+ # Renders for the deleted submission are purged from the public bucket too.
195
+ assert hub["bucket_listed_prefixes"] == ["renders/alpha"]
196
+ assert hub["bucket_deleted_paths"] == ["renders/alpha/101/rotating.webp"]
197
  assert hub["uploads"] == 1
198
 
199
 
tests/test_proxy.py CHANGED
@@ -42,15 +42,20 @@ def test_serve_report_returns_404_when_file_missing(monkeypatch):
42
  assert "Report not found" in resp.body.decode("utf-8")
43
 
44
 
45
- def test_serve_render_returns_webp_when_file_exists(monkeypatch):
46
- """Submission render proxy serves WebP bytes with image/webp media type."""
47
- monkeypatch.setattr(app, "_fetch_render", lambda sid, fixture: b"RIFFwebp")
48
 
49
- resp = app.serve_render("sub-test", "101")
 
 
 
 
 
 
50
 
51
- assert resp.status_code == 200
52
- assert resp.media_type == "image/webp"
53
- assert resp.body == b"RIFFwebp"
54
 
55
 
56
  def test_serve_gt_render_returns_webp_when_file_exists(monkeypatch):
@@ -79,22 +84,6 @@ def test_fetch_report_html_returns_none_on_hub_failure(monkeypatch):
79
  assert app._fetch_report_html("sub-failure-probe-unique-1") is None
80
 
81
 
82
- def test_fetch_render_uses_nested_rotating_webp_path(monkeypatch, tmp_path):
83
- """Submission renders are fetched from the nested WebP artifact path."""
84
- webp = tmp_path / "rotating.webp"
85
- webp.write_bytes(b"RIFFwebp")
86
- captured: dict = {}
87
-
88
- def fake_download(**kwargs):
89
- captured.update(kwargs)
90
- return str(webp)
91
-
92
- monkeypatch.setattr(app, "hf_hub_download", fake_download)
93
-
94
- assert app._fetch_render("sub-test", "101") == b"RIFFwebp"
95
- assert captured["filename"] == "renders/sub-test/101/rotating.webp"
96
-
97
-
98
  def test_fetch_gt_render_uses_gt_rotating_webp_path(monkeypatch, tmp_path):
99
  """GT WebPs are fetched from the GT dataset's per-fixture render folder."""
100
  webp = tmp_path / "rotating.webp"
@@ -120,7 +109,9 @@ def test_proxy_route_is_registered():
120
  """
121
  routes = [getattr(r, "path", None) for r in app.app.routes]
122
  assert "/reports/{submission_id}.html" in routes
123
- assert "/render/{submission_id}/{fixture}.webp" in routes
 
 
124
  assert "/gt-render/{fixture}.webp" in routes
125
 
126
 
 
42
  assert "Report not found" in resp.body.decode("utf-8")
43
 
44
 
45
+ def test_render_resolvers_return_public_bucket_urls():
46
+ """Candidate renders are served straight from the public bucket, no proxy.
 
47
 
48
+ The resolvers must return a stable ``/buckets/<id>/resolve/renders/...``
49
+ URL (anonymous, browser-followable 302) for the plain turntable and the
50
+ edit-diff WebP.
51
+ """
52
+ url = app._render_proxy_url("sub-test", "101")
53
+ assert url.endswith("/renders/sub-test/101/rotating.webp")
54
+ assert "/buckets/" in url and "/resolve/" in url
55
 
56
+ diff = app._render_diff_proxy_url("sub-test", "207")
57
+ assert diff.endswith("/renders/sub-test/207/edit_diff.webp")
58
+ assert "/buckets/" in diff and "/resolve/" in diff
59
 
60
 
61
  def test_serve_gt_render_returns_webp_when_file_exists(monkeypatch):
 
84
  assert app._fetch_report_html("sub-failure-probe-unique-1") is None
85
 
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  def test_fetch_gt_render_uses_gt_rotating_webp_path(monkeypatch, tmp_path):
88
  """GT WebPs are fetched from the GT dataset's per-fixture render folder."""
89
  webp = tmp_path / "rotating.webp"
 
109
  """
110
  routes = [getattr(r, "path", None) for r in app.app.routes]
111
  assert "/reports/{submission_id}.html" in routes
112
+ # Candidate renders moved to the public bucket; only the private GT render
113
+ # still needs a token-holding Space proxy route.
114
+ assert "/render/{submission_id}/{fixture}.webp" not in routes
115
  assert "/gt-render/{fixture}.webp" in routes
116
 
117