Michael Rabinovich Cursor commited on
Commit ·
ce583a4
1
Parent(s): 2cf3635
eval_job: warm the render CDN after upload
Browse filesAfter uploading a submission's renders to the public bucket, fetch each
object's /resolve/ URL once (parallel, anonymous, best-effort) so the first
viewer of the gallery/report hits a warm Xet edge cache instead of paying the
cold chunk-reconstruction latency. New _warm_render_cdn helper; called in both
the single-job and shard paths. _upload_renders_to_bucket now returns the
uploaded object paths so they can be warmed.
Co-authored-by: Cursor <cursoragent@cursor.com>
- eval_job.py +38 -8
eval_job.py
CHANGED
|
@@ -94,11 +94,12 @@ def _render_base_url(submission_id: str) -> str:
|
|
| 94 |
|
| 95 |
def _upload_renders_to_bucket(
|
| 96 |
run_dir: Path, submission_id: str, token: str,
|
| 97 |
-
) ->
|
| 98 |
"""Upload every fixture's renders to ``renders/<id>/<fixture>/`` in the bucket.
|
| 99 |
|
| 100 |
One ``batch_bucket_files`` call for the whole submission (cheaper than a
|
| 101 |
-
per-file fan-out). Returns the
|
|
|
|
| 102 |
"""
|
| 103 |
add: list[tuple[str, str]] = []
|
| 104 |
for fixture_dir in sorted(d for d in run_dir.iterdir() if d.is_dir()):
|
|
@@ -114,14 +115,42 @@ def _upload_renders_to_bucket(
|
|
| 114 |
f"{fixture_dir.name}/{render_path.name}",
|
| 115 |
))
|
| 116 |
if not add:
|
| 117 |
-
return
|
| 118 |
HfApi(token=token).batch_bucket_files(RENDER_BUCKET, add=add, token=token)
|
| 119 |
print(
|
| 120 |
f"[eval_job] uploaded {len(add)} render(s) -> "
|
| 121 |
f"hf://buckets/{RENDER_BUCKET}/{RENDERS_DIR_IN_REPO}/{submission_id}",
|
| 122 |
flush=True,
|
| 123 |
)
|
| 124 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
# Sub-prefix under ``reports/<id>/`` where each shard uploads its raw
|
| 127 |
# per-fixture dirs in shard mode. The Space merges these and deletes the
|
|
@@ -186,7 +215,7 @@ def main() -> int:
|
|
| 186 |
_run_eval(RUN_DIR, worker_count)
|
| 187 |
# The shard job is the sole uploader of its fixtures' renders to the
|
| 188 |
# permanent bucket prefix; the Space merge only assembles the report.
|
| 189 |
-
_upload_renders_to_bucket(RUN_DIR, submission_id, token)
|
| 190 |
_upload_shard_artifacts(
|
| 191 |
submission_id, args.shard_id, RUN_DIR, submissions_repo, token,
|
| 192 |
)
|
|
@@ -204,9 +233,10 @@ def main() -> int:
|
|
| 204 |
|
| 205 |
_prepare_run_dir(submission_id, zip_url, submissions_repo, token)
|
| 206 |
_run_eval(RUN_DIR, worker_count)
|
| 207 |
-
# Upload renders to the public bucket, then build the
|
| 208 |
-
# them by URL (so the heavy WebP/PNG bytes never land in
|
| 209 |
-
|
|
|
|
| 210 |
html_path = REPORT_HTML_DIR / f"{submission_id}.html"
|
| 211 |
_run_report(RUN_DIR, html_path, render_base_url=_render_base_url(submission_id))
|
| 212 |
report_json = _build_report_json(RUN_DIR)
|
|
|
|
| 94 |
|
| 95 |
def _upload_renders_to_bucket(
|
| 96 |
run_dir: Path, submission_id: str, token: str,
|
| 97 |
+
) -> list[str]:
|
| 98 |
"""Upload every fixture's renders to ``renders/<id>/<fixture>/`` in the bucket.
|
| 99 |
|
| 100 |
One ``batch_bucket_files`` call for the whole submission (cheaper than a
|
| 101 |
+
per-file fan-out). Returns the bucket object paths that were uploaded (so
|
| 102 |
+
the caller can warm the CDN for them).
|
| 103 |
"""
|
| 104 |
add: list[tuple[str, str]] = []
|
| 105 |
for fixture_dir in sorted(d for d in run_dir.iterdir() if d.is_dir()):
|
|
|
|
| 115 |
f"{fixture_dir.name}/{render_path.name}",
|
| 116 |
))
|
| 117 |
if not add:
|
| 118 |
+
return []
|
| 119 |
HfApi(token=token).batch_bucket_files(RENDER_BUCKET, add=add, token=token)
|
| 120 |
print(
|
| 121 |
f"[eval_job] uploaded {len(add)} render(s) -> "
|
| 122 |
f"hf://buckets/{RENDER_BUCKET}/{RENDERS_DIR_IN_REPO}/{submission_id}",
|
| 123 |
flush=True,
|
| 124 |
)
|
| 125 |
+
return [dest for _, dest in add]
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def _warm_render_cdn(object_paths: list[str]) -> None:
|
| 129 |
+
"""Prime the CDN by fetching each freshly-uploaded render once.
|
| 130 |
+
|
| 131 |
+
A bucket serves a render via a 302 to a signed Xet CDN URL, and the very
|
| 132 |
+
first fetch of a brand-new object pays the chunk-reconstruction cost, which
|
| 133 |
+
is the lag a viewer sees opening a just-published report. Fetching each
|
| 134 |
+
object here (in parallel, anonymously, best-effort) warms the edge cache so
|
| 135 |
+
the first human hits a warm object instead. Failures are swallowed: warming
|
| 136 |
+
is an optimisation, never a publish blocker.
|
| 137 |
+
"""
|
| 138 |
+
import urllib.request
|
| 139 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 140 |
+
|
| 141 |
+
def _warm(path: str) -> None:
|
| 142 |
+
url = f"{HF_ENDPOINT}/buckets/{RENDER_BUCKET}/resolve/{path}"
|
| 143 |
+
try:
|
| 144 |
+
with urllib.request.urlopen(url, timeout=30) as resp:
|
| 145 |
+
resp.read()
|
| 146 |
+
except Exception:
|
| 147 |
+
pass
|
| 148 |
+
|
| 149 |
+
if not object_paths:
|
| 150 |
+
return
|
| 151 |
+
with ThreadPoolExecutor(max_workers=16) as pool:
|
| 152 |
+
list(pool.map(_warm, object_paths))
|
| 153 |
+
print(f"[eval_job] warmed CDN for {len(object_paths)} render(s)", flush=True)
|
| 154 |
|
| 155 |
# Sub-prefix under ``reports/<id>/`` where each shard uploads its raw
|
| 156 |
# per-fixture dirs in shard mode. The Space merges these and deletes the
|
|
|
|
| 215 |
_run_eval(RUN_DIR, worker_count)
|
| 216 |
# The shard job is the sole uploader of its fixtures' renders to the
|
| 217 |
# permanent bucket prefix; the Space merge only assembles the report.
|
| 218 |
+
_warm_render_cdn(_upload_renders_to_bucket(RUN_DIR, submission_id, token))
|
| 219 |
_upload_shard_artifacts(
|
| 220 |
submission_id, args.shard_id, RUN_DIR, submissions_repo, token,
|
| 221 |
)
|
|
|
|
| 233 |
|
| 234 |
_prepare_run_dir(submission_id, zip_url, submissions_repo, token)
|
| 235 |
_run_eval(RUN_DIR, worker_count)
|
| 236 |
+
# Upload renders to the public bucket and warm the CDN, then build the
|
| 237 |
+
# report referencing them by URL (so the heavy WebP/PNG bytes never land in
|
| 238 |
+
# the HTML and the first viewer hits an already-warm edge cache).
|
| 239 |
+
_warm_render_cdn(_upload_renders_to_bucket(RUN_DIR, submission_id, token))
|
| 240 |
html_path = REPORT_HTML_DIR / f"{submission_id}.html"
|
| 241 |
_run_report(RUN_DIR, html_path, render_base_url=_render_base_url(submission_id))
|
| 242 |
report_json = _build_report_json(RUN_DIR)
|