Michael Rabinovich commited on
Commit
9368361
·
1 Parent(s): 8032255

sync eval shards to bucket via API (no mount)

Browse files
Files changed (2) hide show
  1. Dockerfile +4 -0
  2. eval_job.py +23 -22
Dockerfile CHANGED
@@ -55,6 +55,10 @@ ARG CADGENBENCH_SHA=5f312f3
55
  RUN python -m pip install --no-cache-dir \
56
  "cadgenbench @ git+https://github.com/huggingface/cadgenbench.git@${CADGENBENCH_SHA}"
57
 
 
 
 
 
58
  # The cadgenbench wheel pulls vanilla `vtk` from PyPI (built with
59
  # vtkXOpenGLRenderWindow, needs an X server). Swap for vtk-egl:
60
  # same VTK, compiled against EGL so it acquires an off-screen GL
 
55
  RUN python -m pip install --no-cache-dir \
56
  "cadgenbench @ git+https://github.com/huggingface/cadgenbench.git@${CADGENBENCH_SHA}"
57
 
58
+ # Shard mode syncs per-fixture artifacts to an HF Storage Bucket via the
59
+ # bucket API (HfApi.sync_bucket), which needs a recent huggingface_hub.
60
+ RUN python -m pip install --no-cache-dir "huggingface_hub>=1.16.0"
61
+
62
  # The cadgenbench wheel pulls vanilla `vtk` from PyPI (built with
63
  # vtkXOpenGLRenderWindow, needs an X server). Swap for vtk-egl:
64
  # same VTK, compiled against EGL so it acquires an off-screen GL
eval_job.py CHANGED
@@ -40,9 +40,10 @@ the Space's sharded submit path (UC3) to fan a large submission across
40
  several jobs. Steps 1-2 are identical, then the run dir is pruned to
41
  just this shard's fixtures, ``cadgenbench evaluate`` runs over that
42
  subset, and the resulting per-fixture dirs (``result.json`` + renders)
43
- are staged *verbatim*. If ``CADGENBENCH_SHARD_BUCKET_MOUNT`` is set, the
44
- shard copies them into that mounted bucket; otherwise it uploads under
45
- ``reports/<id>/shards/<shard_id>/`` in the submissions dataset. No
 
46
  report HTML, ``report.json``, or gallery render is produced per shard:
47
  the Space reads every shard's fixture dirs, merges them into one run dir,
48
  and builds the single ``run_summary`` + report + gallery from the merged
@@ -73,7 +74,7 @@ REPORT_TIMEOUT_SECONDS = 5 * 60
73
 
74
  REPORTS_DIR_IN_REPO = "reports"
75
  RENDERS_DIR_IN_REPO = "renders"
76
- SHARD_BUCKET_MOUNT_ENV = "CADGENBENCH_SHARD_BUCKET_MOUNT"
77
  SHARD_BUCKET_PREFIX_ENV = "CADGENBENCH_SHARD_BUCKET_PREFIX"
78
 
79
  # Sub-prefix under ``reports/<id>/`` where each shard uploads its raw
@@ -286,29 +287,29 @@ def _upload_shard_artifacts(
286
 
287
  Persists the pruned ``run_dir`` (each ``<fixture>/`` with its
288
  ``result.json`` + ``renders/`` + any overlay PNGs) verbatim. In
289
- bucket mode, this is a filesystem copy into the mounted bucket; in
290
- legacy mode, it is one dataset-repo commit under
291
- ``reports/<id>/shards/<shard_id>/``. The Space reads every shard's
292
- tree, copies the fixture dirs into a single merged run dir, and
293
- builds the aggregate ``run_summary`` + report + gallery from the
294
- whole. The per-shard ``run_summary.json`` written by
295
  ``cadgenbench evaluate`` rides along harmlessly; the merge recomputes
296
  it over the union and ignores the partials.
297
  """
298
- bucket_mount = os.environ.get(SHARD_BUCKET_MOUNT_ENV)
299
- if bucket_mount:
 
 
 
300
  prefix = os.environ.get(SHARD_BUCKET_PREFIX_ENV, "submissions").strip("/")
301
- dest = Path(bucket_mount) / prefix / submission_id / SHARDS_DIR_NAME / shard_id
302
- if not Path(bucket_mount).is_dir():
303
- raise RuntimeError(
304
- f"{SHARD_BUCKET_MOUNT_ENV}={bucket_mount!r} is not a mounted directory."
305
- )
306
- if dest.exists():
307
- shutil.rmtree(dest)
308
- dest.parent.mkdir(parents=True, exist_ok=True)
309
- shutil.copytree(run_dir, dest)
310
  print(
311
- f"[eval_job] staged shard {shard_id} -> {dest}",
312
  flush=True,
313
  )
314
  return
 
40
  several jobs. Steps 1-2 are identical, then the run dir is pruned to
41
  just this shard's fixtures, ``cadgenbench evaluate`` runs over that
42
  subset, and the resulting per-fixture dirs (``result.json`` + renders)
43
+ are staged *verbatim*. If ``CADGENBENCH_SHARD_BUCKET`` is set, the shard
44
+ syncs them into that HF Storage Bucket via the bucket API; otherwise it
45
+ uploads under ``reports/<id>/shards/<shard_id>/`` in the submissions
46
+ dataset. No
47
  report HTML, ``report.json``, or gallery render is produced per shard:
48
  the Space reads every shard's fixture dirs, merges them into one run dir,
49
  and builds the single ``run_summary`` + report + gallery from the merged
 
74
 
75
  REPORTS_DIR_IN_REPO = "reports"
76
  RENDERS_DIR_IN_REPO = "renders"
77
+ SHARD_BUCKET_ENV = "CADGENBENCH_SHARD_BUCKET"
78
  SHARD_BUCKET_PREFIX_ENV = "CADGENBENCH_SHARD_BUCKET_PREFIX"
79
 
80
  # Sub-prefix under ``reports/<id>/`` where each shard uploads its raw
 
287
 
288
  Persists the pruned ``run_dir`` (each ``<fixture>/`` with its
289
  ``result.json`` + ``renders/`` + any overlay PNGs) verbatim. In
290
+ bucket mode, this syncs the dir into the HF Storage Bucket via the
291
+ bucket API (no volume mount); in legacy mode, it is one dataset-repo
292
+ commit under ``reports/<id>/shards/<shard_id>/``. The Space reads
293
+ every shard's tree, copies the fixture dirs into a single merged run
294
+ dir, and builds the aggregate ``run_summary`` + report + gallery from
295
+ the whole. The per-shard ``run_summary.json`` written by
296
  ``cadgenbench evaluate`` rides along harmlessly; the merge recomputes
297
  it over the union and ignores the partials.
298
  """
299
+ bucket_id = os.environ.get(SHARD_BUCKET_ENV, "").strip()
300
+ if bucket_id:
301
+ if bucket_id.startswith("hf://buckets/"):
302
+ bucket_id = bucket_id[len("hf://buckets/"):]
303
+ bucket_id = bucket_id.rstrip("/")
304
  prefix = os.environ.get(SHARD_BUCKET_PREFIX_ENV, "submissions").strip("/")
305
+ dest = (
306
+ f"hf://buckets/{bucket_id}/{prefix}/{submission_id}/"
307
+ f"{SHARDS_DIR_NAME}/{shard_id}"
308
+ )
309
+ api = HfApi(token=token)
310
+ api.sync_bucket(source=str(run_dir), dest=dest, token=token)
 
 
 
311
  print(
312
+ f"[eval_job] synced shard {shard_id} -> {dest}",
313
  flush=True,
314
  )
315
  return