Michael Rabinovich Cursor commited on
Commit
2cf3635
·
1 Parent(s): e3b5dc2

eval_job: upload renders to the public bucket; report references them by URL

Browse files

The job is now the sole render uploader: after eval it pushes each fixture's
renders to renders/<id>/ in the public HF Storage Bucket (single batch), in
both single-job and shard modes. The report is then built with
--render-base-url so candidate renders (incl. the edit-diff WebP) are bucket
URLs, not base64, and only the report HTML/JSON is committed to the dataset.

Dockerfile: bump cadgenbench pin to 3d49822 (--render-base-url support).
Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (2) hide show
  1. Dockerfile +1 -1
  2. eval_job.py +73 -44
Dockerfile CHANGED
@@ -51,7 +51,7 @@ RUN python -m venv "$VIRTUAL_ENV" \
51
  # rebuild picks up the latest evaluator dependencies (including Open3D for
52
  # alignment). Lock to a commit SHA at the v1 release for reproducible scores
53
  # (see space-setup/post-gt-swap.md Stage F).
54
- ARG CADGENBENCH_SHA=a0f7205
55
  RUN python -m pip install --no-cache-dir \
56
  "cadgenbench @ git+https://github.com/huggingface/cadgenbench.git@${CADGENBENCH_SHA}"
57
 
 
51
  # rebuild picks up the latest evaluator dependencies (including Open3D for
52
  # alignment). Lock to a commit SHA at the v1 release for reproducible scores
53
  # (see space-setup/post-gt-swap.md Stage F).
54
+ ARG CADGENBENCH_SHA=3d49822
55
  RUN python -m pip install --no-cache-dir \
56
  "cadgenbench @ git+https://github.com/huggingface/cadgenbench.git@${CADGENBENCH_SHA}"
57
 
eval_job.py CHANGED
@@ -77,6 +77,52 @@ RENDERS_DIR_IN_REPO = "renders"
77
  SHARD_BUCKET_ENV = "CADGENBENCH_SHARD_BUCKET"
78
  SHARD_BUCKET_PREFIX_ENV = "CADGENBENCH_SHARD_BUCKET_PREFIX"
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  # Sub-prefix under ``reports/<id>/`` where each shard uploads its raw
81
  # per-fixture dirs in shard mode. The Space merges these and deletes the
82
  # whole ``shards/`` tree after a successful merge.
@@ -138,6 +184,9 @@ def main() -> int:
138
  _prepare_run_dir(submission_id, zip_url, submissions_repo, token)
139
  _prune_run_dir(RUN_DIR, shard_fixtures)
140
  _run_eval(RUN_DIR, worker_count)
 
 
 
141
  _upload_shard_artifacts(
142
  submission_id, args.shard_id, RUN_DIR, submissions_repo, token,
143
  )
@@ -155,11 +204,14 @@ def main() -> int:
155
 
156
  _prepare_run_dir(submission_id, zip_url, submissions_repo, token)
157
  _run_eval(RUN_DIR, worker_count)
 
 
 
158
  html_path = REPORT_HTML_DIR / f"{submission_id}.html"
159
- _run_report(RUN_DIR, html_path)
160
  report_json = _build_report_json(RUN_DIR)
161
  _publish_reports_and_gallery(
162
- submission_id, html_path, report_json, RUN_DIR, submissions_repo, token,
163
  )
164
  print(f"[eval_job] done: {submission_id}", flush=True)
165
  return 0
@@ -340,12 +392,20 @@ def _run_eval(run_dir: Path, workers: int) -> None:
340
  )
341
 
342
 
343
- def _run_report(run_dir: Path, html_out: Path) -> None:
344
- """Invoke ``cadgenbench report single`` for *run_dir*; raise on non-zero."""
 
 
 
 
 
 
345
  cmd = [
346
  sys.executable, "-m", "cadgenbench.cli", "report", "single",
347
  str(run_dir), "-o", str(html_out),
348
  ]
 
 
349
  print(f"[eval_job] {' '.join(cmd)}", flush=True)
350
  proc = subprocess.run(
351
  cmd,
@@ -387,24 +447,16 @@ def _publish_reports_and_gallery(
387
  submission_id: str,
388
  html_path: Path,
389
  report_json: dict[str, Any],
390
- run_dir: Path,
391
  submissions_repo: str,
392
  token: str,
393
  ) -> None:
394
- """Publish report HTML/JSON + every per-fixture gallery render in one commit.
395
-
396
- Stages ``reports/<id>.{html,json}`` plus each fixture's render folder
397
- under ``renders/<id>/<fixture>/`` (canonical PNG views and
398
- ``rotating.webp``), then pushes them all in a single ``create_commit``. A
399
- commit-per-file both serialises the publish and hammers the
400
- dataset's commit endpoint (the 429 "concurrency queue" failures that
401
- stranded earlier runs); one commit is atomic, fast, and rate-limit
402
- friendly. The standalone GIFs back the gallery's ``renderFor()``; the
403
- full multi-view renders stay base64-embedded in ``reports/<id>.html``
404
- for the self-contained report. A fixture with no render folder (missing
405
- output, or a render that never ran) is skipped; the gallery reads the
406
- per-fixture status from the row and draws the dashed "invalid
407
- generation" cell, so a render artifact's absence is not an error.
408
  """
409
  operations: list[CommitOperationAdd] = [
410
  CommitOperationAdd(
@@ -418,38 +470,15 @@ def _publish_reports_and_gallery(
418
  ).encode("utf-8"),
419
  ),
420
  ]
421
- render_count = 0
422
- for fixture_dir in sorted(d for d in run_dir.iterdir() if d.is_dir()):
423
- renders_dir = fixture_dir / "renders"
424
- if not renders_dir.is_dir():
425
- continue
426
- for render_path in sorted(renders_dir.iterdir()):
427
- if render_path.suffix.lower() not in {".png", ".webp"}:
428
- continue
429
- operations.append(
430
- CommitOperationAdd(
431
- path_in_repo=(
432
- f"{RENDERS_DIR_IN_REPO}/{submission_id}/"
433
- f"{fixture_dir.name}/{render_path.name}"
434
- ),
435
- path_or_fileobj=str(render_path),
436
- )
437
- )
438
- render_count += 1
439
-
440
  api = HfApi(token=token)
441
  api.create_commit(
442
  repo_id=submissions_repo,
443
  repo_type="dataset",
444
  operations=operations,
445
- commit_message=(
446
- f"publish report + {render_count} gallery render(s) "
447
- f"for {submission_id}"
448
- ),
449
  )
450
  print(
451
- f"[eval_job] published reports/{submission_id}.{{html,json}} + "
452
- f"{render_count} gallery render(s) in one commit",
453
  flush=True,
454
  )
455
 
 
77
  SHARD_BUCKET_ENV = "CADGENBENCH_SHARD_BUCKET"
78
  SHARD_BUCKET_PREFIX_ENV = "CADGENBENCH_SHARD_BUCKET_PREFIX"
79
 
80
+ # Public HF Storage Bucket the eval job uploads gallery/report renders to (the
81
+ # job is the sole render uploader; the Space never handles render bytes). The
82
+ # hosted report + gallery reference these by anonymous bucket URL. Submission
83
+ # renders only; GT renders stay in the private GT dataset.
84
+ RENDER_BUCKET = os.environ.get(
85
+ "CADGENBENCH_RENDER_BUCKET", "HuggingAI4Engineering/cadgenbench-eval-staging",
86
+ ).strip()
87
+ HF_ENDPOINT = os.environ.get("HF_ENDPOINT", "https://huggingface.co").rstrip("/")
88
+
89
+
90
+ def _render_base_url(submission_id: str) -> str:
91
+ """Public ``.../resolve/renders/<id>`` base; report appends ``/<fixture>/<file>``."""
92
+ return f"{HF_ENDPOINT}/buckets/{RENDER_BUCKET}/resolve/{RENDERS_DIR_IN_REPO}/{submission_id}"
93
+
94
+
95
+ def _upload_renders_to_bucket(
96
+ run_dir: Path, submission_id: str, token: str,
97
+ ) -> int:
98
+ """Upload every fixture's renders to ``renders/<id>/<fixture>/`` in the bucket.
99
+
100
+ One ``batch_bucket_files`` call for the whole submission (cheaper than a
101
+ per-file fan-out). Returns the number of render files uploaded.
102
+ """
103
+ add: list[tuple[str, str]] = []
104
+ for fixture_dir in sorted(d for d in run_dir.iterdir() if d.is_dir()):
105
+ renders_dir = fixture_dir / "renders"
106
+ if not renders_dir.is_dir():
107
+ continue
108
+ for render_path in sorted(renders_dir.iterdir()):
109
+ if render_path.suffix.lower() not in {".png", ".webp"}:
110
+ continue
111
+ add.append((
112
+ str(render_path),
113
+ f"{RENDERS_DIR_IN_REPO}/{submission_id}/"
114
+ f"{fixture_dir.name}/{render_path.name}",
115
+ ))
116
+ if not add:
117
+ return 0
118
+ HfApi(token=token).batch_bucket_files(RENDER_BUCKET, add=add, token=token)
119
+ print(
120
+ f"[eval_job] uploaded {len(add)} render(s) -> "
121
+ f"hf://buckets/{RENDER_BUCKET}/{RENDERS_DIR_IN_REPO}/{submission_id}",
122
+ flush=True,
123
+ )
124
+ return len(add)
125
+
126
  # Sub-prefix under ``reports/<id>/`` where each shard uploads its raw
127
  # per-fixture dirs in shard mode. The Space merges these and deletes the
128
  # whole ``shards/`` tree after a successful merge.
 
184
  _prepare_run_dir(submission_id, zip_url, submissions_repo, token)
185
  _prune_run_dir(RUN_DIR, shard_fixtures)
186
  _run_eval(RUN_DIR, worker_count)
187
+ # The shard job is the sole uploader of its fixtures' renders to the
188
+ # permanent bucket prefix; the Space merge only assembles the report.
189
+ _upload_renders_to_bucket(RUN_DIR, submission_id, token)
190
  _upload_shard_artifacts(
191
  submission_id, args.shard_id, RUN_DIR, submissions_repo, token,
192
  )
 
204
 
205
  _prepare_run_dir(submission_id, zip_url, submissions_repo, token)
206
  _run_eval(RUN_DIR, worker_count)
207
+ # Upload renders to the public bucket, then build the report referencing
208
+ # them by URL (so the heavy WebP/PNG bytes never land in the HTML).
209
+ _upload_renders_to_bucket(RUN_DIR, submission_id, token)
210
  html_path = REPORT_HTML_DIR / f"{submission_id}.html"
211
+ _run_report(RUN_DIR, html_path, render_base_url=_render_base_url(submission_id))
212
  report_json = _build_report_json(RUN_DIR)
213
  _publish_reports_and_gallery(
214
+ submission_id, html_path, report_json, submissions_repo, token,
215
  )
216
  print(f"[eval_job] done: {submission_id}", flush=True)
217
  return 0
 
392
  )
393
 
394
 
395
+ def _run_report(
396
+ run_dir: Path, html_out: Path, *, render_base_url: str | None = None,
397
+ ) -> None:
398
+ """Invoke ``cadgenbench report single`` for *run_dir*; raise on non-zero.
399
+
400
+ Passes ``--render-base-url`` so candidate renders are referenced from the
401
+ public bucket rather than base64-inlined into the hosted HTML.
402
+ """
403
  cmd = [
404
  sys.executable, "-m", "cadgenbench.cli", "report", "single",
405
  str(run_dir), "-o", str(html_out),
406
  ]
407
+ if render_base_url:
408
+ cmd += ["--render-base-url", render_base_url]
409
  print(f"[eval_job] {' '.join(cmd)}", flush=True)
410
  proc = subprocess.run(
411
  cmd,
 
447
  submission_id: str,
448
  html_path: Path,
449
  report_json: dict[str, Any],
 
450
  submissions_repo: str,
451
  token: str,
452
  ) -> None:
453
+ """Publish the report HTML + JSON to the submissions dataset in one commit.
454
+
455
+ Renders are **not** committed here: :func:`_upload_renders_to_bucket` has
456
+ already pushed them to the public render bucket under ``renders/<id>/``, and
457
+ the report HTML references them by bucket URL. Keeping the binary renders
458
+ out of the dataset repo avoids bloating its git history and the commit-queue
459
+ 429s the per-file fan-out used to cause.
 
 
 
 
 
 
 
460
  """
461
  operations: list[CommitOperationAdd] = [
462
  CommitOperationAdd(
 
470
  ).encode("utf-8"),
471
  ),
472
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
473
  api = HfApi(token=token)
474
  api.create_commit(
475
  repo_id=submissions_repo,
476
  repo_type="dataset",
477
  operations=operations,
478
+ commit_message=f"publish report for {submission_id}",
 
 
 
479
  )
480
  print(
481
+ f"[eval_job] published reports/{submission_id}.{{html,json}}",
 
482
  flush=True,
483
  )
484