Michael Rabinovich Cursor commited on
Commit
ce583a4
·
1 Parent(s): 2cf3635

eval_job: warm the render CDN after upload

Browse files

After uploading a submission's renders to the public bucket, fetch each
object's /resolve/ URL once (parallel, anonymous, best-effort) so the first
viewer of the gallery/report hits a warm Xet edge cache instead of paying the
cold chunk-reconstruction latency. New _warm_render_cdn helper; called in both
the single-job and shard paths. _upload_renders_to_bucket now returns the
uploaded object paths so they can be warmed.

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (1) hide show
  1. eval_job.py +38 -8
eval_job.py CHANGED
@@ -94,11 +94,12 @@ def _render_base_url(submission_id: str) -> str:
94
 
95
  def _upload_renders_to_bucket(
96
  run_dir: Path, submission_id: str, token: str,
97
- ) -> int:
98
  """Upload every fixture's renders to ``renders/<id>/<fixture>/`` in the bucket.
99
 
100
  One ``batch_bucket_files`` call for the whole submission (cheaper than a
101
- per-file fan-out). Returns the number of render files uploaded.
 
102
  """
103
  add: list[tuple[str, str]] = []
104
  for fixture_dir in sorted(d for d in run_dir.iterdir() if d.is_dir()):
@@ -114,14 +115,42 @@ def _upload_renders_to_bucket(
114
  f"{fixture_dir.name}/{render_path.name}",
115
  ))
116
  if not add:
117
- return 0
118
  HfApi(token=token).batch_bucket_files(RENDER_BUCKET, add=add, token=token)
119
  print(
120
  f"[eval_job] uploaded {len(add)} render(s) -> "
121
  f"hf://buckets/{RENDER_BUCKET}/{RENDERS_DIR_IN_REPO}/{submission_id}",
122
  flush=True,
123
  )
124
- return len(add)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  # Sub-prefix under ``reports/<id>/`` where each shard uploads its raw
127
  # per-fixture dirs in shard mode. The Space merges these and deletes the
@@ -186,7 +215,7 @@ def main() -> int:
186
  _run_eval(RUN_DIR, worker_count)
187
  # The shard job is the sole uploader of its fixtures' renders to the
188
  # permanent bucket prefix; the Space merge only assembles the report.
189
- _upload_renders_to_bucket(RUN_DIR, submission_id, token)
190
  _upload_shard_artifacts(
191
  submission_id, args.shard_id, RUN_DIR, submissions_repo, token,
192
  )
@@ -204,9 +233,10 @@ def main() -> int:
204
 
205
  _prepare_run_dir(submission_id, zip_url, submissions_repo, token)
206
  _run_eval(RUN_DIR, worker_count)
207
- # Upload renders to the public bucket, then build the report referencing
208
- # them by URL (so the heavy WebP/PNG bytes never land in the HTML).
209
- _upload_renders_to_bucket(RUN_DIR, submission_id, token)
 
210
  html_path = REPORT_HTML_DIR / f"{submission_id}.html"
211
  _run_report(RUN_DIR, html_path, render_base_url=_render_base_url(submission_id))
212
  report_json = _build_report_json(RUN_DIR)
 
94
 
95
  def _upload_renders_to_bucket(
96
  run_dir: Path, submission_id: str, token: str,
97
+ ) -> list[str]:
98
  """Upload every fixture's renders to ``renders/<id>/<fixture>/`` in the bucket.
99
 
100
  One ``batch_bucket_files`` call for the whole submission (cheaper than a
101
+ per-file fan-out). Returns the bucket object paths that were uploaded (so
102
+ the caller can warm the CDN for them).
103
  """
104
  add: list[tuple[str, str]] = []
105
  for fixture_dir in sorted(d for d in run_dir.iterdir() if d.is_dir()):
 
115
  f"{fixture_dir.name}/{render_path.name}",
116
  ))
117
  if not add:
118
+ return []
119
  HfApi(token=token).batch_bucket_files(RENDER_BUCKET, add=add, token=token)
120
  print(
121
  f"[eval_job] uploaded {len(add)} render(s) -> "
122
  f"hf://buckets/{RENDER_BUCKET}/{RENDERS_DIR_IN_REPO}/{submission_id}",
123
  flush=True,
124
  )
125
+ return [dest for _, dest in add]
126
+
127
+
128
+ def _warm_render_cdn(object_paths: list[str]) -> None:
129
+ """Prime the CDN by fetching each freshly-uploaded render once.
130
+
131
+ A bucket serves a render via a 302 to a signed Xet CDN URL, and the very
132
+ first fetch of a brand-new object pays the chunk-reconstruction cost, which
133
+ is the lag a viewer sees opening a just-published report. Fetching each
134
+ object here (in parallel, anonymously, best-effort) warms the edge cache so
135
+ the first human hits a warm object instead. Failures are swallowed: warming
136
+ is an optimisation, never a publish blocker.
137
+ """
138
+ import urllib.request
139
+ from concurrent.futures import ThreadPoolExecutor
140
+
141
+ def _warm(path: str) -> None:
142
+ url = f"{HF_ENDPOINT}/buckets/{RENDER_BUCKET}/resolve/{path}"
143
+ try:
144
+ with urllib.request.urlopen(url, timeout=30) as resp:
145
+ resp.read()
146
+ except Exception:
147
+ pass
148
+
149
+ if not object_paths:
150
+ return
151
+ with ThreadPoolExecutor(max_workers=16) as pool:
152
+ list(pool.map(_warm, object_paths))
153
+ print(f"[eval_job] warmed CDN for {len(object_paths)} render(s)", flush=True)
154
 
155
  # Sub-prefix under ``reports/<id>/`` where each shard uploads its raw
156
  # per-fixture dirs in shard mode. The Space merges these and deletes the
 
215
  _run_eval(RUN_DIR, worker_count)
216
  # The shard job is the sole uploader of its fixtures' renders to the
217
  # permanent bucket prefix; the Space merge only assembles the report.
218
+ _warm_render_cdn(_upload_renders_to_bucket(RUN_DIR, submission_id, token))
219
  _upload_shard_artifacts(
220
  submission_id, args.shard_id, RUN_DIR, submissions_repo, token,
221
  )
 
233
 
234
  _prepare_run_dir(submission_id, zip_url, submissions_repo, token)
235
  _run_eval(RUN_DIR, worker_count)
236
+ # Upload renders to the public bucket and warm the CDN, then build the
237
+ # report referencing them by URL (so the heavy WebP/PNG bytes never land in
238
+ # the HTML and the first viewer hits an already-warm edge cache).
239
+ _warm_render_cdn(_upload_renders_to_bucket(RUN_DIR, submission_id, token))
240
  html_path = REPORT_HTML_DIR / f"{submission_id}.html"
241
  _run_report(RUN_DIR, html_path, render_base_url=_render_base_url(submission_id))
242
  report_json = _build_report_json(RUN_DIR)