Michael Rabinovich Cursor commited on
Commit ·
5140b0a
1
Parent(s): d2161b1
leaderboard: serve GT report assets via proxy; link them in hosted report
Browse filesAdd a /gt/{fixture}/{relpath} proxy route that streams the private GT view
PNGs + PDF (token-holding), and pass gt_base_url/input_base_url to
generate_html in the merge path so the hosted report links GT/input/overlay
(lazy) instead of base64-inlining them. Bump CADGENBENCH_SHA to 25943a0.
Co-authored-by: Cursor <cursoragent@cursor.com>
- Dockerfile +1 -1
- app.py +55 -0
- submit.py +14 -4
Dockerfile
CHANGED
|
@@ -41,7 +41,7 @@ RUN pip install --no-cache-dir -r /tmp/requirements.txt \
|
|
| 41 |
# image rebuild picks up the latest code (pre-v1: always-updated). Lock
|
| 42 |
# to a specific commit SHA at the v1 release so published scores are
|
| 43 |
# reproducible (see space-setup/post-gt-swap.md Stage F).
|
| 44 |
-
ARG CADGENBENCH_SHA=
|
| 45 |
# Cache-bust the install below whenever the tracked ref moves: the
|
| 46 |
# GitHub commits endpoint's response changes with each new commit on
|
| 47 |
# `main`, so BuildKit re-fetches and invalidates the cached pip layer.
|
|
|
|
| 41 |
# image rebuild picks up the latest code (pre-v1: always-updated). Lock
|
| 42 |
# to a specific commit SHA at the v1 release so published scores are
|
| 43 |
# reproducible (see space-setup/post-gt-swap.md Stage F).
|
| 44 |
+
ARG CADGENBENCH_SHA=25943a0
|
| 45 |
# Cache-bust the install below whenever the tracked ref moves: the
|
| 46 |
# GitHub commits endpoint's response changes with each new commit on
|
| 47 |
# `main`, so BuildKit re-fetches and invalidates the cached pip layer.
|
app.py
CHANGED
|
@@ -609,6 +609,52 @@ def serve_gt_render(fixture: str) -> Response:
|
|
| 609 |
)
|
| 610 |
|
| 611 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 612 |
def _gallery_iframe_html() -> str:
|
| 613 |
"""Build the gallery as a self-contained ``srcdoc`` iframe.
|
| 614 |
|
|
@@ -1075,6 +1121,15 @@ app.add_api_route(
|
|
| 1075 |
serve_gt_render,
|
| 1076 |
methods=["GET"],
|
| 1077 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1078 |
# Task-browser input assets (drawings + starting-shape renders). The
|
| 1079 |
# `:path` converter lets `relpath` carry a slash (e.g. renders/iso.png).
|
| 1080 |
# Registered before the Gradio mount so it's not shadowed.
|
|
|
|
| 609 |
)
|
| 610 |
|
| 611 |
|
| 612 |
+
def _fetch_gt_file(fixture: str, relpath: str) -> bytes | None:
|
| 613 |
+
"""Pull an arbitrary GT asset (``<fixture>/<relpath>``) from the GT dataset.
|
| 614 |
+
|
| 615 |
+
Serves the hosted report's ground-truth column: the per-view PNGs
|
| 616 |
+
(``renders/<view>.png``) and the ``ground_truth.pdf``. The GT dataset is
|
| 617 |
+
**private**, so these are proxied through the Space (which holds the read
|
| 618 |
+
token) rather than linked directly. ``hf_hub_download`` does the
|
| 619 |
+
per-revision disk cache. Returns ``None`` on any failure (the report hides
|
| 620 |
+
the broken tile via the browser's normal missing-image handling).
|
| 621 |
+
"""
|
| 622 |
+
try:
|
| 623 |
+
local_path = hf_hub_download(
|
| 624 |
+
repo_id=HF_DATA_GT_REPO,
|
| 625 |
+
filename=f"{fixture}/{relpath}",
|
| 626 |
+
repo_type="dataset",
|
| 627 |
+
)
|
| 628 |
+
return Path(local_path).read_bytes()
|
| 629 |
+
except Exception as e: # noqa: BLE001 - any Hub failure -> 404
|
| 630 |
+
logger.warning(
|
| 631 |
+
"Failed to fetch GT file %s/%s (%s: %s)",
|
| 632 |
+
fixture, relpath, type(e).__name__, e,
|
| 633 |
+
)
|
| 634 |
+
return None
|
| 635 |
+
|
| 636 |
+
|
| 637 |
+
def serve_gt_file(fixture: str, relpath: str) -> Response:
|
| 638 |
+
"""Stream a GT asset (view PNG / PDF) with long-lived immutable caching.
|
| 639 |
+
|
| 640 |
+
Path-traversal-guarded (``..`` rejected). The hosted report references
|
| 641 |
+
``/gt/<fixture>/<relpath>`` and the browser fetches it lazily; the bytes
|
| 642 |
+
are a property of the data revision (not any submission), so the same
|
| 643 |
+
immutable ``Cache-Control`` as the render/input proxies applies.
|
| 644 |
+
"""
|
| 645 |
+
if ".." in fixture or ".." in relpath:
|
| 646 |
+
return Response(status_code=404)
|
| 647 |
+
data = _fetch_gt_file(fixture, relpath)
|
| 648 |
+
if data is None:
|
| 649 |
+
return Response(status_code=404)
|
| 650 |
+
media_type = mimetypes.guess_type(relpath)[0] or "application/octet-stream"
|
| 651 |
+
return Response(
|
| 652 |
+
content=data,
|
| 653 |
+
media_type=media_type,
|
| 654 |
+
headers={"Cache-Control": RENDER_CACHE_CONTROL},
|
| 655 |
+
)
|
| 656 |
+
|
| 657 |
+
|
| 658 |
def _gallery_iframe_html() -> str:
|
| 659 |
"""Build the gallery as a self-contained ``srcdoc`` iframe.
|
| 660 |
|
|
|
|
| 1121 |
serve_gt_render,
|
| 1122 |
methods=["GET"],
|
| 1123 |
)
|
| 1124 |
+
# Ground-truth assets the hosted report links lazily (per-view PNGs + PDF).
|
| 1125 |
+
# GT is private, so this token-holding proxy streams them; the `:path`
|
| 1126 |
+
# converter lets `relpath` carry a slash (e.g. renders/iso.png). Registered
|
| 1127 |
+
# before the Gradio mount so it isn't shadowed by the catch-all sub-app.
|
| 1128 |
+
app.add_api_route(
|
| 1129 |
+
"/gt/{fixture}/{relpath:path}",
|
| 1130 |
+
serve_gt_file,
|
| 1131 |
+
methods=["GET"],
|
| 1132 |
+
)
|
| 1133 |
# Task-browser input assets (drawings + starting-shape renders). The
|
| 1134 |
# `:path` converter lets `relpath` carry a slash (e.g. renders/iso.png).
|
| 1135 |
# Registered before the Gradio mount so it's not shadowed.
|
submit.py
CHANGED
|
@@ -139,6 +139,11 @@ SUBMISSION_ID_SLUG_MAX = 40
|
|
| 139 |
RESULTS_FILENAME = "results.jsonl"
|
| 140 |
SUBMISSIONS_DIR = "submissions"
|
| 141 |
REPORTS_DIR = "reports"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
DATA_REV_SHORT_LEN = 12
|
| 143 |
FAILURE_REASON_MAX_CHARS = 200
|
| 144 |
SHA256_BLOCK_SIZE = 64 * 1024
|
|
@@ -1536,11 +1541,16 @@ def _merge_shards_and_publish(
|
|
| 1536 |
report_json = _build_report_json(merged_run)
|
| 1537 |
|
| 1538 |
run_data = discover_run(merged_run)
|
| 1539 |
-
# Hosted report
|
| 1540 |
-
#
|
| 1541 |
-
#
|
|
|
|
|
|
|
| 1542 |
html = generate_html(
|
| 1543 |
-
run_data,
|
|
|
|
|
|
|
|
|
|
| 1544 |
)
|
| 1545 |
html_path = tmp / f"{submission_id}.html"
|
| 1546 |
html_path.write_text(html, encoding="utf-8")
|
|
|
|
| 139 |
RESULTS_FILENAME = "results.jsonl"
|
| 140 |
SUBMISSIONS_DIR = "submissions"
|
| 141 |
REPORTS_DIR = "reports"
|
| 142 |
+
# Space-relative proxy roots the hosted report links its *private* assets
|
| 143 |
+
# through (GT + inputs can't be public-bucket URLs). Must match the routes
|
| 144 |
+
# registered in app.py and the constants in the eval job's eval_job.py.
|
| 145 |
+
GT_PROXY_BASE_URL = "/gt"
|
| 146 |
+
INPUT_PROXY_BASE_URL = "/task-input"
|
| 147 |
DATA_REV_SHORT_LEN = 12
|
| 148 |
FAILURE_REASON_MAX_CHARS = 200
|
| 149 |
SHA256_BLOCK_SIZE = 64 * 1024
|
|
|
|
| 1541 |
report_json = _build_report_json(merged_run)
|
| 1542 |
|
| 1543 |
run_data = discover_run(merged_run)
|
| 1544 |
+
# Hosted report links every heavy asset (lazy-loaded) instead of
|
| 1545 |
+
# base64-inlining it, so the committed HTML stays small: candidate
|
| 1546 |
+
# renders + interface overlay come from the public bucket (uploaded by
|
| 1547 |
+
# the shard jobs); GT views + inputs are private, so they link through
|
| 1548 |
+
# the Space's token-holding proxy routes.
|
| 1549 |
html = generate_html(
|
| 1550 |
+
run_data,
|
| 1551 |
+
render_base_url=render_submission_base_url(submission_id),
|
| 1552 |
+
gt_base_url=GT_PROXY_BASE_URL,
|
| 1553 |
+
input_base_url=INPUT_PROXY_BASE_URL,
|
| 1554 |
)
|
| 1555 |
html_path = tmp / f"{submission_id}.html"
|
| 1556 |
html_path.write_text(html, encoding="utf-8")
|