Spaces:

HuggingAI4Engineering
/

cadgenbench-eval-gpu

Paused

App Files Files Community

Michael Rabinovich commited on Jun 4

Commit

9368361

1 Parent(s): 8032255

sync eval shards to bucket via API (no mount)

Browse files

Files changed (2) hide show

Dockerfile +4 -0
eval_job.py +23 -22

Dockerfile CHANGED Viewed

@@ -55,6 +55,10 @@ ARG CADGENBENCH_SHA=5f312f3
 RUN python -m pip install --no-cache-dir \
         "cadgenbench @ git+https://github.com/huggingface/cadgenbench.git@${CADGENBENCH_SHA}"
 # The cadgenbench wheel pulls vanilla `vtk` from PyPI (built with
 # vtkXOpenGLRenderWindow, needs an X server). Swap for vtk-egl:
 # same VTK, compiled against EGL so it acquires an off-screen GL

 RUN python -m pip install --no-cache-dir \
         "cadgenbench @ git+https://github.com/huggingface/cadgenbench.git@${CADGENBENCH_SHA}"
+# Shard mode syncs per-fixture artifacts to an HF Storage Bucket via the
+# bucket API (HfApi.sync_bucket), which needs a recent huggingface_hub.
+RUN python -m pip install --no-cache-dir "huggingface_hub>=1.16.0"
 # The cadgenbench wheel pulls vanilla `vtk` from PyPI (built with
 # vtkXOpenGLRenderWindow, needs an X server). Swap for vtk-egl:
 # same VTK, compiled against EGL so it acquires an off-screen GL

eval_job.py CHANGED Viewed

@@ -40,9 +40,10 @@ the Space's sharded submit path (UC3) to fan a large submission across
 several jobs. Steps 1-2 are identical, then the run dir is pruned to
 just this shard's fixtures, ``cadgenbench evaluate`` runs over that
 subset, and the resulting per-fixture dirs (``result.json`` + renders)
-are staged *verbatim*. If ``CADGENBENCH_SHARD_BUCKET_MOUNT`` is set, the
-shard copies them into that mounted bucket; otherwise it uploads under
-``reports/<id>/shards/<shard_id>/`` in the submissions dataset. No
 report HTML, ``report.json``, or gallery render is produced per shard:
 the Space reads every shard's fixture dirs, merges them into one run dir,
 and builds the single ``run_summary`` + report + gallery from the merged
@@ -73,7 +74,7 @@ REPORT_TIMEOUT_SECONDS = 5 * 60
 REPORTS_DIR_IN_REPO = "reports"
 RENDERS_DIR_IN_REPO = "renders"
-SHARD_BUCKET_MOUNT_ENV = "CADGENBENCH_SHARD_BUCKET_MOUNT"
 SHARD_BUCKET_PREFIX_ENV = "CADGENBENCH_SHARD_BUCKET_PREFIX"
 # Sub-prefix under ``reports/<id>/`` where each shard uploads its raw
@@ -286,29 +287,29 @@ def _upload_shard_artifacts(
     Persists the pruned ``run_dir`` (each ``<fixture>/`` with its
     ``result.json`` + ``renders/`` + any overlay PNGs) verbatim. In
-    bucket mode, this is a filesystem copy into the mounted bucket; in
-    legacy mode, it is one dataset-repo commit under
-    ``reports/<id>/shards/<shard_id>/``. The Space reads every shard's
-    tree, copies the fixture dirs into a single merged run dir, and
-    builds the aggregate ``run_summary`` + report + gallery from the
-    whole. The per-shard ``run_summary.json`` written by
     ``cadgenbench evaluate`` rides along harmlessly; the merge recomputes
     it over the union and ignores the partials.
     """
-    bucket_mount = os.environ.get(SHARD_BUCKET_MOUNT_ENV)
-    if bucket_mount:
         prefix = os.environ.get(SHARD_BUCKET_PREFIX_ENV, "submissions").strip("/")
-        dest = Path(bucket_mount) / prefix / submission_id / SHARDS_DIR_NAME / shard_id
-        if not Path(bucket_mount).is_dir():
-            raise RuntimeError(
-                f"{SHARD_BUCKET_MOUNT_ENV}={bucket_mount!r} is not a mounted directory."
-            )
-        if dest.exists():
-            shutil.rmtree(dest)
-        dest.parent.mkdir(parents=True, exist_ok=True)
-        shutil.copytree(run_dir, dest)
         print(
-            f"[eval_job] staged shard {shard_id} -> {dest}",
             flush=True,
         )
         return

 several jobs. Steps 1-2 are identical, then the run dir is pruned to
 just this shard's fixtures, ``cadgenbench evaluate`` runs over that
 subset, and the resulting per-fixture dirs (``result.json`` + renders)
+are staged *verbatim*. If ``CADGENBENCH_SHARD_BUCKET`` is set, the shard
+syncs them into that HF Storage Bucket via the bucket API; otherwise it
+uploads under ``reports/<id>/shards/<shard_id>/`` in the submissions
+dataset. No
 report HTML, ``report.json``, or gallery render is produced per shard:
 the Space reads every shard's fixture dirs, merges them into one run dir,
 and builds the single ``run_summary`` + report + gallery from the merged
 REPORTS_DIR_IN_REPO = "reports"
 RENDERS_DIR_IN_REPO = "renders"
+SHARD_BUCKET_ENV = "CADGENBENCH_SHARD_BUCKET"
 SHARD_BUCKET_PREFIX_ENV = "CADGENBENCH_SHARD_BUCKET_PREFIX"
 # Sub-prefix under ``reports/<id>/`` where each shard uploads its raw
     Persists the pruned ``run_dir`` (each ``<fixture>/`` with its
     ``result.json`` + ``renders/`` + any overlay PNGs) verbatim. In
+    bucket mode, this syncs the dir into the HF Storage Bucket via the
+    bucket API (no volume mount); in legacy mode, it is one dataset-repo
+    commit under ``reports/<id>/shards/<shard_id>/``. The Space reads
+    every shard's tree, copies the fixture dirs into a single merged run
+    dir, and builds the aggregate ``run_summary`` + report + gallery from
+    the whole. The per-shard ``run_summary.json`` written by
     ``cadgenbench evaluate`` rides along harmlessly; the merge recomputes
     it over the union and ignores the partials.
     """
+    bucket_id = os.environ.get(SHARD_BUCKET_ENV, "").strip()
+    if bucket_id:
+        if bucket_id.startswith("hf://buckets/"):
+            bucket_id = bucket_id[len("hf://buckets/"):]
+        bucket_id = bucket_id.rstrip("/")
         prefix = os.environ.get(SHARD_BUCKET_PREFIX_ENV, "submissions").strip("/")
+        dest = (
+            f"hf://buckets/{bucket_id}/{prefix}/{submission_id}/"
+            f"{SHARDS_DIR_NAME}/{shard_id}"
+        )
+        api = HfApi(token=token)
+        api.sync_bucket(source=str(run_dir), dest=dest, token=token)
         print(
+            f"[eval_job] synced shard {shard_id} -> {dest}",
             flush=True,
         )
         return