Michael Rabinovich commited on
Commit ·
9368361
1
Parent(s): 8032255
sync eval shards to bucket via API (no mount)
Browse files- Dockerfile +4 -0
- eval_job.py +23 -22
Dockerfile
CHANGED
|
@@ -55,6 +55,10 @@ ARG CADGENBENCH_SHA=5f312f3
|
|
| 55 |
RUN python -m pip install --no-cache-dir \
|
| 56 |
"cadgenbench @ git+https://github.com/huggingface/cadgenbench.git@${CADGENBENCH_SHA}"
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
# The cadgenbench wheel pulls vanilla `vtk` from PyPI (built with
|
| 59 |
# vtkXOpenGLRenderWindow, needs an X server). Swap for vtk-egl:
|
| 60 |
# same VTK, compiled against EGL so it acquires an off-screen GL
|
|
|
|
| 55 |
RUN python -m pip install --no-cache-dir \
|
| 56 |
"cadgenbench @ git+https://github.com/huggingface/cadgenbench.git@${CADGENBENCH_SHA}"
|
| 57 |
|
| 58 |
+
# Shard mode syncs per-fixture artifacts to an HF Storage Bucket via the
|
| 59 |
+
# bucket API (HfApi.sync_bucket), which needs a recent huggingface_hub.
|
| 60 |
+
RUN python -m pip install --no-cache-dir "huggingface_hub>=1.16.0"
|
| 61 |
+
|
| 62 |
# The cadgenbench wheel pulls vanilla `vtk` from PyPI (built with
|
| 63 |
# vtkXOpenGLRenderWindow, needs an X server). Swap for vtk-egl:
|
| 64 |
# same VTK, compiled against EGL so it acquires an off-screen GL
|
eval_job.py
CHANGED
|
@@ -40,9 +40,10 @@ the Space's sharded submit path (UC3) to fan a large submission across
|
|
| 40 |
several jobs. Steps 1-2 are identical, then the run dir is pruned to
|
| 41 |
just this shard's fixtures, ``cadgenbench evaluate`` runs over that
|
| 42 |
subset, and the resulting per-fixture dirs (``result.json`` + renders)
|
| 43 |
-
are staged *verbatim*. If ``
|
| 44 |
-
|
| 45 |
-
``reports/<id>/shards/<shard_id>/`` in the submissions
|
|
|
|
| 46 |
report HTML, ``report.json``, or gallery render is produced per shard:
|
| 47 |
the Space reads every shard's fixture dirs, merges them into one run dir,
|
| 48 |
and builds the single ``run_summary`` + report + gallery from the merged
|
|
@@ -73,7 +74,7 @@ REPORT_TIMEOUT_SECONDS = 5 * 60
|
|
| 73 |
|
| 74 |
REPORTS_DIR_IN_REPO = "reports"
|
| 75 |
RENDERS_DIR_IN_REPO = "renders"
|
| 76 |
-
|
| 77 |
SHARD_BUCKET_PREFIX_ENV = "CADGENBENCH_SHARD_BUCKET_PREFIX"
|
| 78 |
|
| 79 |
# Sub-prefix under ``reports/<id>/`` where each shard uploads its raw
|
|
@@ -286,29 +287,29 @@ def _upload_shard_artifacts(
|
|
| 286 |
|
| 287 |
Persists the pruned ``run_dir`` (each ``<fixture>/`` with its
|
| 288 |
``result.json`` + ``renders/`` + any overlay PNGs) verbatim. In
|
| 289 |
-
bucket mode, this
|
| 290 |
-
legacy mode, it is one dataset-repo
|
| 291 |
-
``reports/<id>/shards/<shard_id>/``. The Space reads
|
| 292 |
-
tree, copies the fixture dirs into a single merged run
|
| 293 |
-
builds the aggregate ``run_summary`` + report + gallery from
|
| 294 |
-
whole. The per-shard ``run_summary.json`` written by
|
| 295 |
``cadgenbench evaluate`` rides along harmlessly; the merge recomputes
|
| 296 |
it over the union and ignores the partials.
|
| 297 |
"""
|
| 298 |
-
|
| 299 |
-
if
|
|
|
|
|
|
|
|
|
|
| 300 |
prefix = os.environ.get(SHARD_BUCKET_PREFIX_ENV, "submissions").strip("/")
|
| 301 |
-
dest =
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
shutil.rmtree(dest)
|
| 308 |
-
dest.parent.mkdir(parents=True, exist_ok=True)
|
| 309 |
-
shutil.copytree(run_dir, dest)
|
| 310 |
print(
|
| 311 |
-
f"[eval_job]
|
| 312 |
flush=True,
|
| 313 |
)
|
| 314 |
return
|
|
|
|
| 40 |
several jobs. Steps 1-2 are identical, then the run dir is pruned to
|
| 41 |
just this shard's fixtures, ``cadgenbench evaluate`` runs over that
|
| 42 |
subset, and the resulting per-fixture dirs (``result.json`` + renders)
|
| 43 |
+
are staged *verbatim*. If ``CADGENBENCH_SHARD_BUCKET`` is set, the shard
|
| 44 |
+
syncs them into that HF Storage Bucket via the bucket API; otherwise it
|
| 45 |
+
uploads under ``reports/<id>/shards/<shard_id>/`` in the submissions
|
| 46 |
+
dataset. No
|
| 47 |
report HTML, ``report.json``, or gallery render is produced per shard:
|
| 48 |
the Space reads every shard's fixture dirs, merges them into one run dir,
|
| 49 |
and builds the single ``run_summary`` + report + gallery from the merged
|
|
|
|
| 74 |
|
| 75 |
REPORTS_DIR_IN_REPO = "reports"
|
| 76 |
RENDERS_DIR_IN_REPO = "renders"
|
| 77 |
+
SHARD_BUCKET_ENV = "CADGENBENCH_SHARD_BUCKET"
|
| 78 |
SHARD_BUCKET_PREFIX_ENV = "CADGENBENCH_SHARD_BUCKET_PREFIX"
|
| 79 |
|
| 80 |
# Sub-prefix under ``reports/<id>/`` where each shard uploads its raw
|
|
|
|
| 287 |
|
| 288 |
Persists the pruned ``run_dir`` (each ``<fixture>/`` with its
|
| 289 |
``result.json`` + ``renders/`` + any overlay PNGs) verbatim. In
|
| 290 |
+
bucket mode, this syncs the dir into the HF Storage Bucket via the
|
| 291 |
+
bucket API (no volume mount); in legacy mode, it is one dataset-repo
|
| 292 |
+
commit under ``reports/<id>/shards/<shard_id>/``. The Space reads
|
| 293 |
+
every shard's tree, copies the fixture dirs into a single merged run
|
| 294 |
+
dir, and builds the aggregate ``run_summary`` + report + gallery from
|
| 295 |
+
the whole. The per-shard ``run_summary.json`` written by
|
| 296 |
``cadgenbench evaluate`` rides along harmlessly; the merge recomputes
|
| 297 |
it over the union and ignores the partials.
|
| 298 |
"""
|
| 299 |
+
bucket_id = os.environ.get(SHARD_BUCKET_ENV, "").strip()
|
| 300 |
+
if bucket_id:
|
| 301 |
+
if bucket_id.startswith("hf://buckets/"):
|
| 302 |
+
bucket_id = bucket_id[len("hf://buckets/"):]
|
| 303 |
+
bucket_id = bucket_id.rstrip("/")
|
| 304 |
prefix = os.environ.get(SHARD_BUCKET_PREFIX_ENV, "submissions").strip("/")
|
| 305 |
+
dest = (
|
| 306 |
+
f"hf://buckets/{bucket_id}/{prefix}/{submission_id}/"
|
| 307 |
+
f"{SHARDS_DIR_NAME}/{shard_id}"
|
| 308 |
+
)
|
| 309 |
+
api = HfApi(token=token)
|
| 310 |
+
api.sync_bucket(source=str(run_dir), dest=dest, token=token)
|
|
|
|
|
|
|
|
|
|
| 311 |
print(
|
| 312 |
+
f"[eval_job] synced shard {shard_id} -> {dest}",
|
| 313 |
flush=True,
|
| 314 |
)
|
| 315 |
return
|