Spaces:

HuggingAI4Engineering
/

cadgenbench-eval-gpu

Paused

Michael Rabinovich Cursor commited on Jun 5

Commit

2cf3635

1 Parent(s): e3b5dc2

eval_job: upload renders to the public bucket; report references them by URL

The job is now the sole render uploader: after eval it pushes each fixture's
renders to renders/<id>/ in the public HF Storage Bucket (single batch), in
both single-job and shard modes. The report is then built with
--render-base-url so candidate renders (incl. the edit-diff WebP) are bucket
URLs, not base64, and only the report HTML/JSON is committed to the dataset.

Dockerfile: bump cadgenbench pin to 3d49822 (--render-base-url support).
Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (2) hide show

Dockerfile +1 -1
eval_job.py +73 -44

Dockerfile CHANGED Viewed

@@ -51,7 +51,7 @@ RUN python -m venv "$VIRTUAL_ENV" \
 # rebuild picks up the latest evaluator dependencies (including Open3D for
 # alignment). Lock to a commit SHA at the v1 release for reproducible scores
 # (see space-setup/post-gt-swap.md Stage F).
-ARG CADGENBENCH_SHA=a0f7205
 RUN python -m pip install --no-cache-dir \
         "cadgenbench @ git+https://github.com/huggingface/cadgenbench.git@${CADGENBENCH_SHA}"

 # rebuild picks up the latest evaluator dependencies (including Open3D for
 # alignment). Lock to a commit SHA at the v1 release for reproducible scores
 # (see space-setup/post-gt-swap.md Stage F).
+ARG CADGENBENCH_SHA=3d49822
 RUN python -m pip install --no-cache-dir \
         "cadgenbench @ git+https://github.com/huggingface/cadgenbench.git@${CADGENBENCH_SHA}"

eval_job.py CHANGED Viewed

@@ -77,6 +77,52 @@ RENDERS_DIR_IN_REPO = "renders"
 SHARD_BUCKET_ENV = "CADGENBENCH_SHARD_BUCKET"
 SHARD_BUCKET_PREFIX_ENV = "CADGENBENCH_SHARD_BUCKET_PREFIX"
 # Sub-prefix under ``reports/<id>/`` where each shard uploads its raw
 # per-fixture dirs in shard mode. The Space merges these and deletes the
 # whole ``shards/`` tree after a successful merge.
@@ -138,6 +184,9 @@ def main() -> int:
         _prepare_run_dir(submission_id, zip_url, submissions_repo, token)
         _prune_run_dir(RUN_DIR, shard_fixtures)
         _run_eval(RUN_DIR, worker_count)
         _upload_shard_artifacts(
             submission_id, args.shard_id, RUN_DIR, submissions_repo, token,
         )
@@ -155,11 +204,14 @@ def main() -> int:
     _prepare_run_dir(submission_id, zip_url, submissions_repo, token)
     _run_eval(RUN_DIR, worker_count)
     html_path = REPORT_HTML_DIR / f"{submission_id}.html"
-    _run_report(RUN_DIR, html_path)
     report_json = _build_report_json(RUN_DIR)
     _publish_reports_and_gallery(
-        submission_id, html_path, report_json, RUN_DIR, submissions_repo, token,
     )
     print(f"[eval_job] done: {submission_id}", flush=True)
     return 0
@@ -340,12 +392,20 @@ def _run_eval(run_dir: Path, workers: int) -> None:
         )
-def _run_report(run_dir: Path, html_out: Path) -> None:
-    """Invoke ``cadgenbench report single`` for *run_dir*; raise on non-zero."""
     cmd = [
         sys.executable, "-m", "cadgenbench.cli", "report", "single",
         str(run_dir), "-o", str(html_out),
     ]
     print(f"[eval_job] {' '.join(cmd)}", flush=True)
     proc = subprocess.run(
         cmd,
@@ -387,24 +447,16 @@ def _publish_reports_and_gallery(
     submission_id: str,
     html_path: Path,
     report_json: dict[str, Any],
-    run_dir: Path,
     submissions_repo: str,
     token: str,
 ) -> None:
-    """Publish report HTML/JSON + every per-fixture gallery render in one commit.
-    Stages ``reports/<id>.{html,json}`` plus each fixture's render folder
-    under ``renders/<id>/<fixture>/`` (canonical PNG views and
-    ``rotating.webp``), then pushes them all in a single ``create_commit``. A
-    commit-per-file both serialises the publish and hammers the
-    dataset's commit endpoint (the 429 "concurrency queue" failures that
-    stranded earlier runs); one commit is atomic, fast, and rate-limit
-    friendly. The standalone GIFs back the gallery's ``renderFor()``; the
-    full multi-view renders stay base64-embedded in ``reports/<id>.html``
-    for the self-contained report. A fixture with no render folder (missing
-    output, or a render that never ran) is skipped; the gallery reads the
-    per-fixture status from the row and draws the dashed "invalid
-    generation" cell, so a render artifact's absence is not an error.
     """
     operations: list[CommitOperationAdd] = [
         CommitOperationAdd(
@@ -418,38 +470,15 @@ def _publish_reports_and_gallery(
             ).encode("utf-8"),
         ),
     ]
-    render_count = 0
-    for fixture_dir in sorted(d for d in run_dir.iterdir() if d.is_dir()):
-        renders_dir = fixture_dir / "renders"
-        if not renders_dir.is_dir():
-            continue
-        for render_path in sorted(renders_dir.iterdir()):
-            if render_path.suffix.lower() not in {".png", ".webp"}:
-                continue
-            operations.append(
-                CommitOperationAdd(
-                    path_in_repo=(
-                        f"{RENDERS_DIR_IN_REPO}/{submission_id}/"
-                        f"{fixture_dir.name}/{render_path.name}"
-                    ),
-                    path_or_fileobj=str(render_path),
-                )
-            )
-            render_count += 1
     api = HfApi(token=token)
     api.create_commit(
         repo_id=submissions_repo,
         repo_type="dataset",
         operations=operations,
-        commit_message=(
-            f"publish report + {render_count} gallery render(s) "
-            f"for {submission_id}"
-        ),
     )
     print(
-        f"[eval_job] published reports/{submission_id}.{{html,json}} + "
-        f"{render_count} gallery render(s) in one commit",
         flush=True,
     )

 SHARD_BUCKET_ENV = "CADGENBENCH_SHARD_BUCKET"
 SHARD_BUCKET_PREFIX_ENV = "CADGENBENCH_SHARD_BUCKET_PREFIX"
+# Public HF Storage Bucket the eval job uploads gallery/report renders to (the
+# job is the sole render uploader; the Space never handles render bytes). The
+# hosted report + gallery reference these by anonymous bucket URL. Submission
+# renders only; GT renders stay in the private GT dataset.
+RENDER_BUCKET = os.environ.get(
+    "CADGENBENCH_RENDER_BUCKET", "HuggingAI4Engineering/cadgenbench-eval-staging",
+).strip()
+HF_ENDPOINT = os.environ.get("HF_ENDPOINT", "https://huggingface.co").rstrip("/")
+def _render_base_url(submission_id: str) -> str:
+    """Public ``.../resolve/renders/<id>`` base; report appends ``/<fixture>/<file>``."""
+    return f"{HF_ENDPOINT}/buckets/{RENDER_BUCKET}/resolve/{RENDERS_DIR_IN_REPO}/{submission_id}"
+def _upload_renders_to_bucket(
+    run_dir: Path, submission_id: str, token: str,
+) -> int:
+    """Upload every fixture's renders to ``renders/<id>/<fixture>/`` in the bucket.
+    One ``batch_bucket_files`` call for the whole submission (cheaper than a
+    per-file fan-out). Returns the number of render files uploaded.
+    """
+    add: list[tuple[str, str]] = []
+    for fixture_dir in sorted(d for d in run_dir.iterdir() if d.is_dir()):
+        renders_dir = fixture_dir / "renders"
+        if not renders_dir.is_dir():
+            continue
+        for render_path in sorted(renders_dir.iterdir()):
+            if render_path.suffix.lower() not in {".png", ".webp"}:
+                continue
+            add.append((
+                str(render_path),
+                f"{RENDERS_DIR_IN_REPO}/{submission_id}/"
+                f"{fixture_dir.name}/{render_path.name}",
+            ))
+    if not add:
+        return 0
+    HfApi(token=token).batch_bucket_files(RENDER_BUCKET, add=add, token=token)
+    print(
+        f"[eval_job] uploaded {len(add)} render(s) -> "
+        f"hf://buckets/{RENDER_BUCKET}/{RENDERS_DIR_IN_REPO}/{submission_id}",
+        flush=True,
+    )
+    return len(add)
 # Sub-prefix under ``reports/<id>/`` where each shard uploads its raw
 # per-fixture dirs in shard mode. The Space merges these and deletes the
 # whole ``shards/`` tree after a successful merge.
         _prepare_run_dir(submission_id, zip_url, submissions_repo, token)
         _prune_run_dir(RUN_DIR, shard_fixtures)
         _run_eval(RUN_DIR, worker_count)
+        # The shard job is the sole uploader of its fixtures' renders to the
+        # permanent bucket prefix; the Space merge only assembles the report.
+        _upload_renders_to_bucket(RUN_DIR, submission_id, token)
         _upload_shard_artifacts(
             submission_id, args.shard_id, RUN_DIR, submissions_repo, token,
         )
     _prepare_run_dir(submission_id, zip_url, submissions_repo, token)
     _run_eval(RUN_DIR, worker_count)
+    # Upload renders to the public bucket, then build the report referencing
+    # them by URL (so the heavy WebP/PNG bytes never land in the HTML).
+    _upload_renders_to_bucket(RUN_DIR, submission_id, token)
     html_path = REPORT_HTML_DIR / f"{submission_id}.html"
+    _run_report(RUN_DIR, html_path, render_base_url=_render_base_url(submission_id))
     report_json = _build_report_json(RUN_DIR)
     _publish_reports_and_gallery(
+        submission_id, html_path, report_json, submissions_repo, token,
     )
     print(f"[eval_job] done: {submission_id}", flush=True)
     return 0
         )
+def _run_report(
+    run_dir: Path, html_out: Path, *, render_base_url: str | None = None,
+) -> None:
+    """Invoke ``cadgenbench report single`` for *run_dir*; raise on non-zero.
+    Passes ``--render-base-url`` so candidate renders are referenced from the
+    public bucket rather than base64-inlined into the hosted HTML.
+    """
     cmd = [
         sys.executable, "-m", "cadgenbench.cli", "report", "single",
         str(run_dir), "-o", str(html_out),
     ]
+    if render_base_url:
+        cmd += ["--render-base-url", render_base_url]
     print(f"[eval_job] {' '.join(cmd)}", flush=True)
     proc = subprocess.run(
         cmd,
     submission_id: str,
     html_path: Path,
     report_json: dict[str, Any],
     submissions_repo: str,
     token: str,
 ) -> None:
+    """Publish the report HTML + JSON to the submissions dataset in one commit.
+    Renders are **not** committed here: :func:`_upload_renders_to_bucket` has
+    already pushed them to the public render bucket under ``renders/<id>/``, and
+    the report HTML references them by bucket URL. Keeping the binary renders
+    out of the dataset repo avoids bloating its git history and the commit-queue
+    429s the per-file fan-out used to cause.
     """
     operations: list[CommitOperationAdd] = [
         CommitOperationAdd(
             ).encode("utf-8"),
         ),
     ]
     api = HfApi(token=token)
     api.create_commit(
         repo_id=submissions_repo,
         repo_type="dataset",
         operations=operations,
+        commit_message=f"publish report for {submission_id}",
     )
     print(
+        f"[eval_job] published reports/{submission_id}.{{html,json}}",
         flush=True,
     )