leaderboard: serve renders from the public bucket, not the dataset proxy
Browse filesCandidate renders (turntables + edit-diff WebP) now live in the public HF
Storage Bucket under renders/<id>/, uploaded once by the eval job. The gallery
grid and the hosted report reference them by anonymous bucket URL, so the Space
is out of the render read path and the binary renders stop bloating the
submissions dataset / its commit queue.
- leaderboard: HF_RENDER_BUCKET + render URL/path/prefix helpers.
- app: render resolvers return public bucket URLs; the two render proxy routes
+ fetchers are retired. Only the private GT render keeps its token-held proxy.
- submit: merge passes render_base_url to generate_html and commits report-only;
dispatched job gets CADGENBENCH_RENDER_BUCKET/HF_ENDPOINT.
- admin: delete also purges the bucket renders/<id>/ prefix (previously orphaned).
- Dockerfile: bump cadgenbench pin to 3d49822 (render_base_url support).
Co-authored-by: Cursor <cursoragent@cursor.com>
- Dockerfile +1 -1
- admin.py +32 -0
- app.py +20 -105
- leaderboard.py +41 -0
- submit.py +29 -51
- tests/test_admin.py +14 -0
- tests/test_proxy.py +15 -24
|
@@ -41,7 +41,7 @@ RUN pip install --no-cache-dir -r /tmp/requirements.txt \
|
|
| 41 |
# image rebuild picks up the latest code (pre-v1: always-updated). Lock
|
| 42 |
# to a specific commit SHA at the v1 release so published scores are
|
| 43 |
# reproducible (see space-setup/post-gt-swap.md Stage F).
|
| 44 |
-
ARG CADGENBENCH_SHA=
|
| 45 |
# Cache-bust the install below whenever the tracked ref moves: the
|
| 46 |
# GitHub commits endpoint's response changes with each new commit on
|
| 47 |
# `main`, so BuildKit re-fetches and invalidates the cached pip layer.
|
|
|
|
| 41 |
# image rebuild picks up the latest code (pre-v1: always-updated). Lock
|
| 42 |
# to a specific commit SHA at the v1 release so published scores are
|
| 43 |
# reproducible (see space-setup/post-gt-swap.md Stage F).
|
| 44 |
+
ARG CADGENBENCH_SHA=3d49822
|
| 45 |
# Cache-bust the install below whenever the tracked ref moves: the
|
| 46 |
# GitHub commits endpoint's response changes with each new commit on
|
| 47 |
# `main`, so BuildKit re-fetches and invalidates the cached pip layer.
|
|
@@ -34,6 +34,7 @@ import gradio as gr
|
|
| 34 |
from huggingface_hub import cancel_job, list_jobs
|
| 35 |
from huggingface_hub.errors import EntryNotFoundError
|
| 36 |
|
|
|
|
| 37 |
from submit import (
|
| 38 |
EVAL_JOB_NAMESPACE,
|
| 39 |
HF_SUBMISSIONS_REPO,
|
|
@@ -194,6 +195,7 @@ def delete_rows(submission_ids: Iterable[str]) -> None:
|
|
| 194 |
"Failed to delete artifact %s (%s: %s)",
|
| 195 |
path, type(e).__name__, e,
|
| 196 |
)
|
|
|
|
| 197 |
|
| 198 |
def mutate(rows: list[dict[str, Any]]) -> None:
|
| 199 |
rows[:] = [r for r in rows if r.get("submission_id") not in ids]
|
|
@@ -203,6 +205,36 @@ def delete_rows(submission_ids: Iterable[str]) -> None:
|
|
| 203 |
)
|
| 204 |
|
| 205 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
def _cancel_jobs_for_submissions(ids: set[str]) -> int:
|
| 207 |
"""Best-effort cancel every non-terminal eval Job for one of *ids*.
|
| 208 |
|
|
|
|
| 34 |
from huggingface_hub import cancel_job, list_jobs
|
| 35 |
from huggingface_hub.errors import EntryNotFoundError
|
| 36 |
|
| 37 |
+
from leaderboard import HF_RENDER_BUCKET, render_submission_prefix
|
| 38 |
from submit import (
|
| 39 |
EVAL_JOB_NAMESPACE,
|
| 40 |
HF_SUBMISSIONS_REPO,
|
|
|
|
| 195 |
"Failed to delete artifact %s (%s: %s)",
|
| 196 |
path, type(e).__name__, e,
|
| 197 |
)
|
| 198 |
+
_delete_bucket_renders(sid)
|
| 199 |
|
| 200 |
def mutate(rows: list[dict[str, Any]]) -> None:
|
| 201 |
rows[:] = [r for r in rows if r.get("submission_id") not in ids]
|
|
|
|
| 205 |
)
|
| 206 |
|
| 207 |
|
| 208 |
+
def _delete_bucket_renders(submission_id: str) -> None:
|
| 209 |
+
"""Delete every render for *submission_id* from the public render bucket.
|
| 210 |
+
|
| 211 |
+
The renders live under ``renders/<id>/`` in the bucket (uploaded by the eval
|
| 212 |
+
job). ``batch_bucket_files`` has no recursive prefix delete, so we list the
|
| 213 |
+
prefix and delete the files in one batch. Best-effort: a bucket failure is
|
| 214 |
+
logged, never blocks the row deletion (mirrors the dataset-artifact path).
|
| 215 |
+
"""
|
| 216 |
+
prefix = render_submission_prefix(submission_id)
|
| 217 |
+
try:
|
| 218 |
+
paths = [
|
| 219 |
+
entry.path
|
| 220 |
+
for entry in _HF_API.list_bucket_tree(
|
| 221 |
+
HF_RENDER_BUCKET, prefix=prefix, recursive=True,
|
| 222 |
+
)
|
| 223 |
+
if getattr(entry, "path", None) and not entry.path.endswith("/")
|
| 224 |
+
]
|
| 225 |
+
if paths:
|
| 226 |
+
_HF_API.batch_bucket_files(HF_RENDER_BUCKET, delete=paths)
|
| 227 |
+
logger.info(
|
| 228 |
+
"Deleted %d render(s) under %s from bucket %s",
|
| 229 |
+
len(paths), prefix, HF_RENDER_BUCKET,
|
| 230 |
+
)
|
| 231 |
+
except Exception as e: # noqa: BLE001 - bucket failure must not block delete
|
| 232 |
+
logger.warning(
|
| 233 |
+
"Failed to delete bucket renders under %s (%s: %s)",
|
| 234 |
+
prefix, type(e).__name__, e,
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
|
| 238 |
def _cancel_jobs_for_submissions(ids: set[str]) -> int:
|
| 239 |
"""Best-effort cancel every non-terminal eval Job for one of *ids*.
|
| 240 |
|
|
@@ -56,6 +56,7 @@ from leaderboard import (
|
|
| 56 |
build_combined_csv,
|
| 57 |
load_admin_table,
|
| 58 |
load_leaderboard_split,
|
|
|
|
| 59 |
)
|
| 60 |
from gallery import render_gallery_page
|
| 61 |
from tasks import load_tasks_from_dir, render_tasks_page
|
|
@@ -530,54 +531,6 @@ def serve_report(submission_id: str) -> Response:
|
|
| 530 |
return Response(content=content, media_type="text/html; charset=utf-8")
|
| 531 |
|
| 532 |
|
| 533 |
-
def _fetch_render(submission_id: str, fixture: str) -> bytes | None:
|
| 534 |
-
"""Pull a submission's gallery WebP (``renders/<id>/<fixture>/rotating.webp``).
|
| 535 |
-
|
| 536 |
-
Deliberately **not** memoized: renders land over time (a submission
|
| 537 |
-
completes, or an existing row is backfilled) after the Space process
|
| 538 |
-
booted, so negative-caching a boot-time miss would keep a turntable
|
| 539 |
-
dashed until the next restart. ``hf_hub_download`` does its own disk
|
| 540 |
-
caching per revision, so a re-fetch of an unchanged file stays cheap.
|
| 541 |
-
Returns ``None`` on any failure (the gallery draws the dashed cell).
|
| 542 |
-
"""
|
| 543 |
-
try:
|
| 544 |
-
local_path = hf_hub_download(
|
| 545 |
-
repo_id=HF_SUBMISSIONS_REPO,
|
| 546 |
-
filename=f"renders/{submission_id}/{fixture}/rotating.webp",
|
| 547 |
-
repo_type="dataset",
|
| 548 |
-
)
|
| 549 |
-
return Path(local_path).read_bytes()
|
| 550 |
-
except Exception as e: # noqa: BLE001 - any Hub failure -> 404
|
| 551 |
-
logger.warning(
|
| 552 |
-
"Failed to fetch render %s/%s (%s: %s)",
|
| 553 |
-
submission_id, fixture, type(e).__name__, e,
|
| 554 |
-
)
|
| 555 |
-
return None
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
def _fetch_render_diff(submission_id: str, fixture: str) -> bytes | None:
|
| 559 |
-
"""Pull a submission's edit-diff turntable (``renders/<id>/<fixture>/edit_diff.webp``).
|
| 560 |
-
|
| 561 |
-
Editing fixtures only; the evaluator writes this alongside the plain
|
| 562 |
-
``rotating.webp``. Same no-memoization rationale as :func:`_fetch_render`.
|
| 563 |
-
Returns ``None`` on any failure, including the expected miss for
|
| 564 |
-
non-editing fixtures, so the gallery degrades to the dashed cell.
|
| 565 |
-
"""
|
| 566 |
-
try:
|
| 567 |
-
local_path = hf_hub_download(
|
| 568 |
-
repo_id=HF_SUBMISSIONS_REPO,
|
| 569 |
-
filename=f"renders/{submission_id}/{fixture}/edit_diff.webp",
|
| 570 |
-
repo_type="dataset",
|
| 571 |
-
)
|
| 572 |
-
return Path(local_path).read_bytes()
|
| 573 |
-
except Exception as e: # noqa: BLE001 - any Hub failure -> 404
|
| 574 |
-
logger.warning(
|
| 575 |
-
"Failed to fetch edit-diff render %s/%s (%s: %s)",
|
| 576 |
-
submission_id, fixture, type(e).__name__, e,
|
| 577 |
-
)
|
| 578 |
-
return None
|
| 579 |
-
|
| 580 |
-
|
| 581 |
def _fetch_gt_render(fixture: str) -> bytes | None:
|
| 582 |
"""Pull a fixture's ground-truth GIF from the private GT dataset.
|
| 583 |
|
|
@@ -613,66 +566,35 @@ RENDER_CACHE_CONTROL = "public, max-age=31536000, immutable"
|
|
| 613 |
|
| 614 |
|
| 615 |
def _render_proxy_url(submission_id: str, fixture: str) -> str | None:
|
| 616 |
-
"""Resolver
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
the Space origin even inside the iframe ``srcdoc``. A render that
|
| 623 |
-
404s (valid status but a missing upload) degrades to the dashed cell
|
| 624 |
-
client-side via the ``<img onerror>`` hook.
|
| 625 |
-
|
| 626 |
-
Requires the Space to be **public**: while private, HF's edge 404s
|
| 627 |
-
in-browser fetches to these custom routes.
|
| 628 |
"""
|
| 629 |
-
return
|
| 630 |
|
| 631 |
|
| 632 |
def _render_diff_proxy_url(submission_id: str, fixture: str) -> str | None:
|
| 633 |
-
"""Resolver
|
| 634 |
|
| 635 |
Used by the gallery grid for editing fixtures (see
|
| 636 |
``gallery.build_gallery_payload``). A miss (non-editing fixture, or an edit
|
| 637 |
-
that never rendered a diff) 404s and degrades to the dashed cell
|
| 638 |
-
|
| 639 |
"""
|
| 640 |
-
return
|
| 641 |
|
| 642 |
|
| 643 |
def _gt_proxy_url(fixture: str) -> str | None:
|
| 644 |
-
"""Resolver returning the cached proxy URL for a fixture's GT WebP.
|
| 645 |
-
return f"/gt-render/{fixture}.webp"
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
def serve_render(submission_id: str, fixture: str) -> Response:
|
| 649 |
-
"""Stream a submission's per-fixture render WebP with long-lived caching.
|
| 650 |
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
read token)
|
| 654 |
-
cache it hard.
|
| 655 |
"""
|
| 656 |
-
|
| 657 |
-
if webp is None:
|
| 658 |
-
return Response(status_code=404)
|
| 659 |
-
return Response(
|
| 660 |
-
content=webp,
|
| 661 |
-
media_type="image/webp",
|
| 662 |
-
headers={"Cache-Control": RENDER_CACHE_CONTROL},
|
| 663 |
-
)
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
def serve_render_diff(submission_id: str, fixture: str) -> Response:
|
| 667 |
-
"""Stream a submission's edit-diff turntable WebP with long-lived caching."""
|
| 668 |
-
webp = _fetch_render_diff(submission_id, fixture)
|
| 669 |
-
if webp is None:
|
| 670 |
-
return Response(status_code=404)
|
| 671 |
-
return Response(
|
| 672 |
-
content=webp,
|
| 673 |
-
media_type="image/webp",
|
| 674 |
-
headers={"Cache-Control": RENDER_CACHE_CONTROL},
|
| 675 |
-
)
|
| 676 |
|
| 677 |
|
| 678 |
def serve_gt_render(fixture: str) -> Response:
|
|
@@ -1145,16 +1067,9 @@ app.add_api_route(
|
|
| 1145 |
# Cached render proxies the gallery's lazy-loaded turntables point at.
|
| 1146 |
# Registered before the Gradio mount so they're not shadowed by the
|
| 1147 |
# catch-all sub-app.
|
| 1148 |
-
|
| 1149 |
-
|
| 1150 |
-
|
| 1151 |
-
methods=["GET"],
|
| 1152 |
-
)
|
| 1153 |
-
app.add_api_route(
|
| 1154 |
-
"/render-diff/{submission_id}/{fixture}.webp",
|
| 1155 |
-
serve_render_diff,
|
| 1156 |
-
methods=["GET"],
|
| 1157 |
-
)
|
| 1158 |
app.add_api_route(
|
| 1159 |
"/gt-render/{fixture}.webp",
|
| 1160 |
serve_gt_render,
|
|
|
|
| 56 |
build_combined_csv,
|
| 57 |
load_admin_table,
|
| 58 |
load_leaderboard_split,
|
| 59 |
+
render_public_url,
|
| 60 |
)
|
| 61 |
from gallery import render_gallery_page
|
| 62 |
from tasks import load_tasks_from_dir, render_tasks_page
|
|
|
|
| 531 |
return Response(content=content, media_type="text/html; charset=utf-8")
|
| 532 |
|
| 533 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 534 |
def _fetch_gt_render(fixture: str) -> bytes | None:
|
| 535 |
"""Pull a fixture's ground-truth GIF from the private GT dataset.
|
| 536 |
|
|
|
|
| 566 |
|
| 567 |
|
| 568 |
def _render_proxy_url(submission_id: str, fixture: str) -> str | None:
|
| 569 |
+
"""Resolver for a submission's plain turntable: a public render-bucket URL.
|
| 570 |
+
|
| 571 |
+
The eval job uploads ``renders/<id>/<fixture>/rotating.webp`` to the public
|
| 572 |
+
bucket, so the browser fetches it straight from object storage (anonymous,
|
| 573 |
+
no Space proxy hop). The gallery only calls this for ``valid`` fixtures; a
|
| 574 |
+
missing upload 404s and degrades to the dashed cell via ``<img onerror>``.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 575 |
"""
|
| 576 |
+
return render_public_url(submission_id, fixture, "rotating.webp")
|
| 577 |
|
| 578 |
|
| 579 |
def _render_diff_proxy_url(submission_id: str, fixture: str) -> str | None:
|
| 580 |
+
"""Resolver for an editing fixture's edit-diff turntable (public bucket URL).
|
| 581 |
|
| 582 |
Used by the gallery grid for editing fixtures (see
|
| 583 |
``gallery.build_gallery_payload``). A miss (non-editing fixture, or an edit
|
| 584 |
+
that never rendered a diff) 404s and degrades to the dashed cell, no
|
| 585 |
+
fallback to the plain turntable.
|
| 586 |
"""
|
| 587 |
+
return render_public_url(submission_id, fixture, "edit_diff.webp")
|
| 588 |
|
| 589 |
|
| 590 |
def _gt_proxy_url(fixture: str) -> str | None:
|
| 591 |
+
"""Resolver returning the cached proxy URL for a fixture's GT WebP.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 592 |
|
| 593 |
+
GT renders stay in the **private** GT dataset, so they cannot be public
|
| 594 |
+
bucket URLs; they are still re-streamed through the Space proxy (which
|
| 595 |
+
holds the read token).
|
|
|
|
| 596 |
"""
|
| 597 |
+
return f"/gt-render/{fixture}.webp"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 598 |
|
| 599 |
|
| 600 |
def serve_gt_render(fixture: str) -> Response:
|
|
|
|
| 1067 |
# Cached render proxies the gallery's lazy-loaded turntables point at.
|
| 1068 |
# Registered before the Gradio mount so they're not shadowed by the
|
| 1069 |
# catch-all sub-app.
|
| 1070 |
+
# Candidate renders are served directly from the public render bucket (URLs
|
| 1071 |
+
# come from the gallery resolvers), so only the private GT render still needs a
|
| 1072 |
+
# token-holding Space proxy route.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1073 |
app.add_api_route(
|
| 1074 |
"/gt-render/{fixture}.webp",
|
| 1075 |
serve_gt_render,
|
|
@@ -50,10 +50,51 @@ HF_DATA_REPO = os.getenv("HF_DATA_REPO", f"{HF_ORG}/cadgenbench-data")
|
|
| 50 |
# read scope, same token the eval already uses for GT).
|
| 51 |
HF_DATA_GT_REPO = os.getenv("HF_DATA_GT_REPO", f"{HF_ORG}/cadgenbench-data-gt")
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
RESULTS_FILENAME = "results.jsonl"
|
| 54 |
HUB_FETCH_TIMEOUT_SECONDS = 30
|
| 55 |
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
class LeaderboardDataError(RuntimeError):
|
| 58 |
"""Raised when the live ``results.jsonl`` cannot be read from the Hub.
|
| 59 |
|
|
|
|
| 50 |
# read scope, same token the eval already uses for GT).
|
| 51 |
HF_DATA_GT_REPO = os.getenv("HF_DATA_GT_REPO", f"{HF_ORG}/cadgenbench-data-gt")
|
| 52 |
|
| 53 |
+
# Public HF Storage Bucket holding the per-submission gallery/report renders
|
| 54 |
+
# (candidate turntables + edit-diff WebP). Public so the browser can fetch a
|
| 55 |
+
# render straight from object storage with no token and no Space proxy; the
|
| 56 |
+
# eval job is the only writer. Submission renders are public anyway, the GT
|
| 57 |
+
# renders stay in the private GT dataset and are never published here.
|
| 58 |
+
HF_RENDER_BUCKET = os.getenv("HF_RENDER_BUCKET", f"{HF_ORG}/cadgenbench-eval-staging")
|
| 59 |
+
HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co").rstrip("/")
|
| 60 |
+
# Permanent renders live under this prefix; transient shard staging lives under
|
| 61 |
+
# its own prefix and is wiped after merge, so the two never collide.
|
| 62 |
+
RENDER_BUCKET_PREFIX = "renders"
|
| 63 |
+
|
| 64 |
RESULTS_FILENAME = "results.jsonl"
|
| 65 |
HUB_FETCH_TIMEOUT_SECONDS = 30
|
| 66 |
|
| 67 |
|
| 68 |
+
def render_object_path(submission_id: str, fixture: str, filename: str) -> str:
|
| 69 |
+
"""Bucket-relative path of one render: ``renders/<id>/<fixture>/<file>``."""
|
| 70 |
+
return f"{RENDER_BUCKET_PREFIX}/{submission_id}/{fixture}/{filename}"
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def render_submission_prefix(submission_id: str) -> str:
|
| 74 |
+
"""Bucket-relative prefix holding every render for *submission_id*."""
|
| 75 |
+
return f"{RENDER_BUCKET_PREFIX}/{submission_id}"
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def render_public_url(submission_id: str, fixture: str, filename: str) -> str:
|
| 79 |
+
"""Stable anonymous URL for one render (browser follows the 302 to the CDN)."""
|
| 80 |
+
return (
|
| 81 |
+
f"{HF_ENDPOINT}/buckets/{HF_RENDER_BUCKET}/resolve/"
|
| 82 |
+
f"{render_object_path(submission_id, fixture, filename)}"
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def render_submission_base_url(submission_id: str) -> str:
|
| 87 |
+
"""Public base URL for *submission_id*'s renders, ``.../resolve/renders/<id>``.
|
| 88 |
+
|
| 89 |
+
The report generator appends ``/<fixture>/<file>`` to this; passed to
|
| 90 |
+
``cadgenbench``'s ``generate_html`` as the display-only ``render_base_url``.
|
| 91 |
+
"""
|
| 92 |
+
return (
|
| 93 |
+
f"{HF_ENDPOINT}/buckets/{HF_RENDER_BUCKET}/resolve/"
|
| 94 |
+
f"{render_submission_prefix(submission_id)}"
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
|
| 98 |
class LeaderboardDataError(RuntimeError):
|
| 99 |
"""Raised when the live ``results.jsonl`` cannot be read from the Hub.
|
| 100 |
|
|
@@ -116,7 +116,14 @@ from huggingface_hub import (
|
|
| 116 |
from huggingface_hub.errors import EntryNotFoundError, HfHubHTTPError
|
| 117 |
|
| 118 |
import progress
|
| 119 |
-
from leaderboard import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
logger = logging.getLogger(__name__)
|
| 122 |
|
|
@@ -132,7 +139,6 @@ SUBMISSION_ID_SLUG_MAX = 40
|
|
| 132 |
RESULTS_FILENAME = "results.jsonl"
|
| 133 |
SUBMISSIONS_DIR = "submissions"
|
| 134 |
REPORTS_DIR = "reports"
|
| 135 |
-
RENDERS_DIR = "renders"
|
| 136 |
DATA_REV_SHORT_LEN = 12
|
| 137 |
FAILURE_REASON_MAX_CHARS = 200
|
| 138 |
SHA256_BLOCK_SIZE = 64 * 1024
|
|
@@ -1170,6 +1176,9 @@ def _dispatch_eval_command(
|
|
| 1170 |
env: dict[str, str] = {
|
| 1171 |
"HF_SUBMISSIONS_REPO": HF_SUBMISSIONS_REPO,
|
| 1172 |
"EVAL_WORKER_COUNT": EVAL_JOB_WORKER_COUNT,
|
|
|
|
|
|
|
|
|
|
| 1173 |
}
|
| 1174 |
for key in ("CADGENBENCH_DATA_REPO", "CADGENBENCH_DATA_GT_REPO"):
|
| 1175 |
value = os.environ.get(key)
|
|
@@ -1527,13 +1536,16 @@ def _merge_shards_and_publish(
|
|
| 1527 |
report_json = _build_report_json(merged_run)
|
| 1528 |
|
| 1529 |
run_data = discover_run(merged_run)
|
| 1530 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1531 |
html_path = tmp / f"{submission_id}.html"
|
| 1532 |
html_path.write_text(html, encoding="utf-8")
|
| 1533 |
|
| 1534 |
-
_publish_reports_and_gallery(
|
| 1535 |
-
submission_id, html_path, report_json, merged_run,
|
| 1536 |
-
)
|
| 1537 |
return report_json["run_summary"]
|
| 1538 |
finally:
|
| 1539 |
shutil.rmtree(tmp, ignore_errors=True)
|
|
@@ -1567,24 +1579,15 @@ def _publish_reports_and_gallery(
|
|
| 1567 |
submission_id: str,
|
| 1568 |
html_path: Path,
|
| 1569 |
report_json: dict[str, Any],
|
| 1570 |
-
run_dir: Path,
|
| 1571 |
) -> None:
|
| 1572 |
-
"""Publish report HTML
|
| 1573 |
-
|
| 1574 |
-
|
| 1575 |
-
|
| 1576 |
-
``renders/<id>/
|
| 1577 |
-
|
| 1578 |
-
|
| 1579 |
-
|
| 1580 |
-
the dataset's commit endpoint (the 429 "concurrency queue" failures
|
| 1581 |
-
that stranded earlier runs). One commit is atomic, fast, and
|
| 1582 |
-
rate-limit friendly.
|
| 1583 |
-
|
| 1584 |
-
A fixture with no render folder (missing output / render that never
|
| 1585 |
-
ran) is skipped, matching the single-job behaviour; the gallery
|
| 1586 |
-
draws the dashed "invalid" cell from the row, so an absent thumbnail
|
| 1587 |
-
is not an error.
|
| 1588 |
"""
|
| 1589 |
operations: list[CommitOperationAdd] = [
|
| 1590 |
CommitOperationAdd(
|
|
@@ -1598,41 +1601,16 @@ def _publish_reports_and_gallery(
|
|
| 1598 |
).encode("utf-8"),
|
| 1599 |
),
|
| 1600 |
]
|
| 1601 |
-
render_count = 0
|
| 1602 |
-
for fixture_dir in sorted(d for d in run_dir.iterdir() if d.is_dir()):
|
| 1603 |
-
renders_dir = fixture_dir / "renders"
|
| 1604 |
-
if not renders_dir.is_dir():
|
| 1605 |
-
continue
|
| 1606 |
-
for render_path in sorted(renders_dir.iterdir()):
|
| 1607 |
-
if render_path.suffix.lower() not in {".png", ".webp"}:
|
| 1608 |
-
continue
|
| 1609 |
-
operations.append(
|
| 1610 |
-
CommitOperationAdd(
|
| 1611 |
-
path_in_repo=(
|
| 1612 |
-
f"{RENDERS_DIR}/{submission_id}/"
|
| 1613 |
-
f"{fixture_dir.name}/{render_path.name}"
|
| 1614 |
-
),
|
| 1615 |
-
path_or_fileobj=str(render_path),
|
| 1616 |
-
)
|
| 1617 |
-
)
|
| 1618 |
-
render_count += 1
|
| 1619 |
-
|
| 1620 |
_with_hub_retries(
|
| 1621 |
lambda: _HF_API.create_commit(
|
| 1622 |
repo_id=HF_SUBMISSIONS_REPO,
|
| 1623 |
repo_type="dataset",
|
| 1624 |
operations=operations,
|
| 1625 |
-
commit_message=
|
| 1626 |
-
f"publish merged report + {render_count} gallery render(s) "
|
| 1627 |
-
f"for {submission_id}"
|
| 1628 |
-
),
|
| 1629 |
),
|
| 1630 |
-
what="merged report
|
| 1631 |
-
)
|
| 1632 |
-
logger.info(
|
| 1633 |
-
"Published reports/%s.{html,json} + %d gallery render(s) in one commit",
|
| 1634 |
-
submission_id, render_count,
|
| 1635 |
)
|
|
|
|
| 1636 |
|
| 1637 |
|
| 1638 |
def _cleanup_shard_artifacts(submission_id: str) -> None:
|
|
|
|
| 116 |
from huggingface_hub.errors import EntryNotFoundError, HfHubHTTPError
|
| 117 |
|
| 118 |
import progress
|
| 119 |
+
from leaderboard import (
|
| 120 |
+
HF_DATA_REPO,
|
| 121 |
+
HF_ENDPOINT,
|
| 122 |
+
HF_ORG,
|
| 123 |
+
HF_RENDER_BUCKET,
|
| 124 |
+
HF_SUBMISSIONS_REPO,
|
| 125 |
+
render_submission_base_url,
|
| 126 |
+
)
|
| 127 |
|
| 128 |
logger = logging.getLogger(__name__)
|
| 129 |
|
|
|
|
| 139 |
RESULTS_FILENAME = "results.jsonl"
|
| 140 |
SUBMISSIONS_DIR = "submissions"
|
| 141 |
REPORTS_DIR = "reports"
|
|
|
|
| 142 |
DATA_REV_SHORT_LEN = 12
|
| 143 |
FAILURE_REASON_MAX_CHARS = 200
|
| 144 |
SHA256_BLOCK_SIZE = 64 * 1024
|
|
|
|
| 1176 |
env: dict[str, str] = {
|
| 1177 |
"HF_SUBMISSIONS_REPO": HF_SUBMISSIONS_REPO,
|
| 1178 |
"EVAL_WORKER_COUNT": EVAL_JOB_WORKER_COUNT,
|
| 1179 |
+
# The job is the sole render uploader; tell it which public bucket.
|
| 1180 |
+
"CADGENBENCH_RENDER_BUCKET": HF_RENDER_BUCKET,
|
| 1181 |
+
"HF_ENDPOINT": HF_ENDPOINT,
|
| 1182 |
}
|
| 1183 |
for key in ("CADGENBENCH_DATA_REPO", "CADGENBENCH_DATA_GT_REPO"):
|
| 1184 |
value = os.environ.get(key)
|
|
|
|
| 1536 |
report_json = _build_report_json(merged_run)
|
| 1537 |
|
| 1538 |
run_data = discover_run(merged_run)
|
| 1539 |
+
# Hosted report references the candidate renders from the public bucket
|
| 1540 |
+
# (uploaded by the shard jobs), keeping the heavy WebP/PNG bytes out of
|
| 1541 |
+
# the committed HTML. GT/input stay inlined (GT is private).
|
| 1542 |
+
html = generate_html(
|
| 1543 |
+
run_data, render_base_url=render_submission_base_url(submission_id),
|
| 1544 |
+
)
|
| 1545 |
html_path = tmp / f"{submission_id}.html"
|
| 1546 |
html_path.write_text(html, encoding="utf-8")
|
| 1547 |
|
| 1548 |
+
_publish_reports_and_gallery(submission_id, html_path, report_json)
|
|
|
|
|
|
|
| 1549 |
return report_json["run_summary"]
|
| 1550 |
finally:
|
| 1551 |
shutil.rmtree(tmp, ignore_errors=True)
|
|
|
|
| 1579 |
submission_id: str,
|
| 1580 |
html_path: Path,
|
| 1581 |
report_json: dict[str, Any],
|
|
|
|
| 1582 |
) -> None:
|
| 1583 |
+
"""Publish the merged report HTML + JSON to the submissions dataset.
|
| 1584 |
+
|
| 1585 |
+
Commits ``reports/<id>.{html,json}`` in one ``create_commit``. The gallery
|
| 1586 |
+
renders are **not** committed here: each shard job already uploaded its
|
| 1587 |
+
fixtures' renders to the public render bucket under ``renders/<id>/``, and
|
| 1588 |
+
the report HTML references them by bucket URL. Keeping the binary renders
|
| 1589 |
+
out of the dataset repo is what avoids bloating its git history and the
|
| 1590 |
+
commit-queue 429s the per-file fan-out used to cause.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1591 |
"""
|
| 1592 |
operations: list[CommitOperationAdd] = [
|
| 1593 |
CommitOperationAdd(
|
|
|
|
| 1601 |
).encode("utf-8"),
|
| 1602 |
),
|
| 1603 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1604 |
_with_hub_retries(
|
| 1605 |
lambda: _HF_API.create_commit(
|
| 1606 |
repo_id=HF_SUBMISSIONS_REPO,
|
| 1607 |
repo_type="dataset",
|
| 1608 |
operations=operations,
|
| 1609 |
+
commit_message=f"publish merged report for {submission_id}",
|
|
|
|
|
|
|
|
|
|
| 1610 |
),
|
| 1611 |
+
what="merged report publish",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1612 |
)
|
| 1613 |
+
logger.info("Published reports/%s.{html,json}", submission_id)
|
| 1614 |
|
| 1615 |
|
| 1616 |
def _cleanup_shard_artifacts(submission_id: str) -> None:
|
|
@@ -59,6 +59,8 @@ def hub(monkeypatch):
|
|
| 59 |
"rows": [dict(r) for r in SEED_ROWS],
|
| 60 |
"uploads": 0,
|
| 61 |
"deleted_paths": [],
|
|
|
|
|
|
|
| 62 |
}
|
| 63 |
|
| 64 |
def fake_download() -> str:
|
|
@@ -78,9 +80,18 @@ def hub(monkeypatch):
|
|
| 78 |
def fake_delete_file(*, path_in_repo, **kwargs) -> None:
|
| 79 |
state["deleted_paths"].append(path_in_repo)
|
| 80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
monkeypatch.setattr(submit, "_download_results_jsonl", fake_download)
|
| 82 |
monkeypatch.setattr(submit._HF_API, "upload_file", fake_upload)
|
| 83 |
monkeypatch.setattr(submit._HF_API, "delete_file", fake_delete_file)
|
|
|
|
|
|
|
| 84 |
return state
|
| 85 |
|
| 86 |
|
|
@@ -180,6 +191,9 @@ def test_delete_rows_removes_rows_and_artifacts(hub):
|
|
| 180 |
"reports/alpha.html",
|
| 181 |
"reports/alpha.json",
|
| 182 |
]
|
|
|
|
|
|
|
|
|
|
| 183 |
assert hub["uploads"] == 1
|
| 184 |
|
| 185 |
|
|
|
|
| 59 |
"rows": [dict(r) for r in SEED_ROWS],
|
| 60 |
"uploads": 0,
|
| 61 |
"deleted_paths": [],
|
| 62 |
+
"bucket_listed_prefixes": [],
|
| 63 |
+
"bucket_deleted_paths": [],
|
| 64 |
}
|
| 65 |
|
| 66 |
def fake_download() -> str:
|
|
|
|
| 80 |
def fake_delete_file(*, path_in_repo, **kwargs) -> None:
|
| 81 |
state["deleted_paths"].append(path_in_repo)
|
| 82 |
|
| 83 |
+
def fake_list_bucket_tree(bucket_id, *, prefix, recursive=False, **kwargs):
|
| 84 |
+
state["bucket_listed_prefixes"].append(prefix)
|
| 85 |
+
return [SimpleNamespace(path=f"{prefix}/101/rotating.webp")]
|
| 86 |
+
|
| 87 |
+
def fake_batch_bucket_files(bucket_id, *, add=None, delete=None, **kwargs):
|
| 88 |
+
state["bucket_deleted_paths"].extend(delete or [])
|
| 89 |
+
|
| 90 |
monkeypatch.setattr(submit, "_download_results_jsonl", fake_download)
|
| 91 |
monkeypatch.setattr(submit._HF_API, "upload_file", fake_upload)
|
| 92 |
monkeypatch.setattr(submit._HF_API, "delete_file", fake_delete_file)
|
| 93 |
+
monkeypatch.setattr(submit._HF_API, "list_bucket_tree", fake_list_bucket_tree)
|
| 94 |
+
monkeypatch.setattr(submit._HF_API, "batch_bucket_files", fake_batch_bucket_files)
|
| 95 |
return state
|
| 96 |
|
| 97 |
|
|
|
|
| 191 |
"reports/alpha.html",
|
| 192 |
"reports/alpha.json",
|
| 193 |
]
|
| 194 |
+
# Renders for the deleted submission are purged from the public bucket too.
|
| 195 |
+
assert hub["bucket_listed_prefixes"] == ["renders/alpha"]
|
| 196 |
+
assert hub["bucket_deleted_paths"] == ["renders/alpha/101/rotating.webp"]
|
| 197 |
assert hub["uploads"] == 1
|
| 198 |
|
| 199 |
|
|
@@ -42,15 +42,20 @@ def test_serve_report_returns_404_when_file_missing(monkeypatch):
|
|
| 42 |
assert "Report not found" in resp.body.decode("utf-8")
|
| 43 |
|
| 44 |
|
| 45 |
-
def
|
| 46 |
-
"""
|
| 47 |
-
monkeypatch.setattr(app, "_fetch_render", lambda sid, fixture: b"RIFFwebp")
|
| 48 |
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
-
|
| 52 |
-
assert
|
| 53 |
-
assert
|
| 54 |
|
| 55 |
|
| 56 |
def test_serve_gt_render_returns_webp_when_file_exists(monkeypatch):
|
|
@@ -79,22 +84,6 @@ def test_fetch_report_html_returns_none_on_hub_failure(monkeypatch):
|
|
| 79 |
assert app._fetch_report_html("sub-failure-probe-unique-1") is None
|
| 80 |
|
| 81 |
|
| 82 |
-
def test_fetch_render_uses_nested_rotating_webp_path(monkeypatch, tmp_path):
|
| 83 |
-
"""Submission renders are fetched from the nested WebP artifact path."""
|
| 84 |
-
webp = tmp_path / "rotating.webp"
|
| 85 |
-
webp.write_bytes(b"RIFFwebp")
|
| 86 |
-
captured: dict = {}
|
| 87 |
-
|
| 88 |
-
def fake_download(**kwargs):
|
| 89 |
-
captured.update(kwargs)
|
| 90 |
-
return str(webp)
|
| 91 |
-
|
| 92 |
-
monkeypatch.setattr(app, "hf_hub_download", fake_download)
|
| 93 |
-
|
| 94 |
-
assert app._fetch_render("sub-test", "101") == b"RIFFwebp"
|
| 95 |
-
assert captured["filename"] == "renders/sub-test/101/rotating.webp"
|
| 96 |
-
|
| 97 |
-
|
| 98 |
def test_fetch_gt_render_uses_gt_rotating_webp_path(monkeypatch, tmp_path):
|
| 99 |
"""GT WebPs are fetched from the GT dataset's per-fixture render folder."""
|
| 100 |
webp = tmp_path / "rotating.webp"
|
|
@@ -120,7 +109,9 @@ def test_proxy_route_is_registered():
|
|
| 120 |
"""
|
| 121 |
routes = [getattr(r, "path", None) for r in app.app.routes]
|
| 122 |
assert "/reports/{submission_id}.html" in routes
|
| 123 |
-
|
|
|
|
|
|
|
| 124 |
assert "/gt-render/{fixture}.webp" in routes
|
| 125 |
|
| 126 |
|
|
|
|
| 42 |
assert "Report not found" in resp.body.decode("utf-8")
|
| 43 |
|
| 44 |
|
| 45 |
+
def test_render_resolvers_return_public_bucket_urls():
|
| 46 |
+
"""Candidate renders are served straight from the public bucket, no proxy.
|
|
|
|
| 47 |
|
| 48 |
+
The resolvers must return a stable ``/buckets/<id>/resolve/renders/...``
|
| 49 |
+
URL (anonymous, browser-followable 302) for the plain turntable and the
|
| 50 |
+
edit-diff WebP.
|
| 51 |
+
"""
|
| 52 |
+
url = app._render_proxy_url("sub-test", "101")
|
| 53 |
+
assert url.endswith("/renders/sub-test/101/rotating.webp")
|
| 54 |
+
assert "/buckets/" in url and "/resolve/" in url
|
| 55 |
|
| 56 |
+
diff = app._render_diff_proxy_url("sub-test", "207")
|
| 57 |
+
assert diff.endswith("/renders/sub-test/207/edit_diff.webp")
|
| 58 |
+
assert "/buckets/" in diff and "/resolve/" in diff
|
| 59 |
|
| 60 |
|
| 61 |
def test_serve_gt_render_returns_webp_when_file_exists(monkeypatch):
|
|
|
|
| 84 |
assert app._fetch_report_html("sub-failure-probe-unique-1") is None
|
| 85 |
|
| 86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
def test_fetch_gt_render_uses_gt_rotating_webp_path(monkeypatch, tmp_path):
|
| 88 |
"""GT WebPs are fetched from the GT dataset's per-fixture render folder."""
|
| 89 |
webp = tmp_path / "rotating.webp"
|
|
|
|
| 109 |
"""
|
| 110 |
routes = [getattr(r, "path", None) for r in app.app.routes]
|
| 111 |
assert "/reports/{submission_id}.html" in routes
|
| 112 |
+
# Candidate renders moved to the public bucket; only the private GT render
|
| 113 |
+
# still needs a token-holding Space proxy route.
|
| 114 |
+
assert "/render/{submission_id}/{fixture}.webp" not in routes
|
| 115 |
assert "/gt-render/{fixture}.webp" in routes
|
| 116 |
|
| 117 |
|