Spaces:

HuggingAI4Engineering
/

cadgenbench-eval-gpu

Paused

Michael Rabinovich Cursor commited on Jun 5

Commit

ce583a4

1 Parent(s): 2cf3635

eval_job: warm the render CDN after upload

After uploading a submission's renders to the public bucket, fetch each
object's /resolve/ URL once (parallel, anonymous, best-effort) so the first
viewer of the gallery/report hits a warm Xet edge cache instead of paying the
cold chunk-reconstruction latency. New _warm_render_cdn helper; called in both
the single-job and shard paths. _upload_renders_to_bucket now returns the
uploaded object paths so they can be warmed.

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (1) hide show

eval_job.py +38 -8

eval_job.py CHANGED Viewed

@@ -94,11 +94,12 @@ def _render_base_url(submission_id: str) -> str:
 def _upload_renders_to_bucket(
     run_dir: Path, submission_id: str, token: str,
-) -> int:
     """Upload every fixture's renders to ``renders/<id>/<fixture>/`` in the bucket.
     One ``batch_bucket_files`` call for the whole submission (cheaper than a
-    per-file fan-out). Returns the number of render files uploaded.
     """
     add: list[tuple[str, str]] = []
     for fixture_dir in sorted(d for d in run_dir.iterdir() if d.is_dir()):
@@ -114,14 +115,42 @@ def _upload_renders_to_bucket(
                 f"{fixture_dir.name}/{render_path.name}",
             ))
     if not add:
-        return 0
     HfApi(token=token).batch_bucket_files(RENDER_BUCKET, add=add, token=token)
     print(
         f"[eval_job] uploaded {len(add)} render(s) -> "
         f"hf://buckets/{RENDER_BUCKET}/{RENDERS_DIR_IN_REPO}/{submission_id}",
         flush=True,
     )
-    return len(add)
 # Sub-prefix under ``reports/<id>/`` where each shard uploads its raw
 # per-fixture dirs in shard mode. The Space merges these and deletes the
@@ -186,7 +215,7 @@ def main() -> int:
         _run_eval(RUN_DIR, worker_count)
         # The shard job is the sole uploader of its fixtures' renders to the
         # permanent bucket prefix; the Space merge only assembles the report.
-        _upload_renders_to_bucket(RUN_DIR, submission_id, token)
         _upload_shard_artifacts(
             submission_id, args.shard_id, RUN_DIR, submissions_repo, token,
         )
@@ -204,9 +233,10 @@ def main() -> int:
     _prepare_run_dir(submission_id, zip_url, submissions_repo, token)
     _run_eval(RUN_DIR, worker_count)
-    # Upload renders to the public bucket, then build the report referencing
-    # them by URL (so the heavy WebP/PNG bytes never land in the HTML).
-    _upload_renders_to_bucket(RUN_DIR, submission_id, token)
     html_path = REPORT_HTML_DIR / f"{submission_id}.html"
     _run_report(RUN_DIR, html_path, render_base_url=_render_base_url(submission_id))
     report_json = _build_report_json(RUN_DIR)

 def _upload_renders_to_bucket(
     run_dir: Path, submission_id: str, token: str,
+) -> list[str]:
     """Upload every fixture's renders to ``renders/<id>/<fixture>/`` in the bucket.
     One ``batch_bucket_files`` call for the whole submission (cheaper than a
+    per-file fan-out). Returns the bucket object paths that were uploaded (so
+    the caller can warm the CDN for them).
     """
     add: list[tuple[str, str]] = []
     for fixture_dir in sorted(d for d in run_dir.iterdir() if d.is_dir()):
                 f"{fixture_dir.name}/{render_path.name}",
             ))
     if not add:
+        return []
     HfApi(token=token).batch_bucket_files(RENDER_BUCKET, add=add, token=token)
     print(
         f"[eval_job] uploaded {len(add)} render(s) -> "
         f"hf://buckets/{RENDER_BUCKET}/{RENDERS_DIR_IN_REPO}/{submission_id}",
         flush=True,
     )
+    return [dest for _, dest in add]
+def _warm_render_cdn(object_paths: list[str]) -> None:
+    """Prime the CDN by fetching each freshly-uploaded render once.
+    A bucket serves a render via a 302 to a signed Xet CDN URL, and the very
+    first fetch of a brand-new object pays the chunk-reconstruction cost, which
+    is the lag a viewer sees opening a just-published report. Fetching each
+    object here (in parallel, anonymously, best-effort) warms the edge cache so
+    the first human hits a warm object instead. Failures are swallowed: warming
+    is an optimisation, never a publish blocker.
+    """
+    import urllib.request
+    from concurrent.futures import ThreadPoolExecutor
+    def _warm(path: str) -> None:
+        url = f"{HF_ENDPOINT}/buckets/{RENDER_BUCKET}/resolve/{path}"
+        try:
+            with urllib.request.urlopen(url, timeout=30) as resp:
+                resp.read()
+        except Exception:
+            pass
+    if not object_paths:
+        return
+    with ThreadPoolExecutor(max_workers=16) as pool:
+        list(pool.map(_warm, object_paths))
+    print(f"[eval_job] warmed CDN for {len(object_paths)} render(s)", flush=True)
 # Sub-prefix under ``reports/<id>/`` where each shard uploads its raw
 # per-fixture dirs in shard mode. The Space merges these and deletes the
         _run_eval(RUN_DIR, worker_count)
         # The shard job is the sole uploader of its fixtures' renders to the
         # permanent bucket prefix; the Space merge only assembles the report.
+        _warm_render_cdn(_upload_renders_to_bucket(RUN_DIR, submission_id, token))
         _upload_shard_artifacts(
             submission_id, args.shard_id, RUN_DIR, submissions_repo, token,
         )
     _prepare_run_dir(submission_id, zip_url, submissions_repo, token)
     _run_eval(RUN_DIR, worker_count)
+    # Upload renders to the public bucket and warm the CDN, then build the
+    # report referencing them by URL (so the heavy WebP/PNG bytes never land in
+    # the HTML and the first viewer hits an already-warm edge cache).
+    _warm_render_cdn(_upload_renders_to_bucket(RUN_DIR, submission_id, token))
     html_path = REPORT_HTML_DIR / f"{submission_id}.html"
     _run_report(RUN_DIR, html_path, render_base_url=_render_base_url(submission_id))
     report_json = _build_report_json(RUN_DIR)