Spaces:

HuggingAI4Engineering
/

cadgenbench-leaderboard

Running

App Files Files Community

Michael Rabinovich commited on 1 day ago

Commit

6ffd043

1 Parent(s): 1004adf

stage shard artifacts through HF bucket

Browse files

Files changed (3) hide show

requirements.txt +4 -4
submit.py +100 -30
tests/test_submit.py +90 -0

requirements.txt CHANGED Viewed

@@ -12,9 +12,9 @@
 gradio[oauth]==5.50.0
 gradio-leaderboard==0.0.14
 pandas>=2.0
-# huggingface_hub >=1.1 for the Jobs Python API (run_job, inspect_job,
-# fetch_job_logs). Used by submit.py to dispatch + poll per-submission
-# GPU evals on HF Jobs (Step 10, space-setup/jobs-migration.md).
-huggingface_hub>=1.1.0
 datasets>=3.0
 requests>=2.31

 gradio[oauth]==5.50.0
 gradio-leaderboard==0.0.14
 pandas>=2.0
+# huggingface_hub >=1.8 for the Jobs Python API plus bucket volume
+# mounts. Used by submit.py to dispatch + poll per-submission GPU evals
+# and stage sharded artifacts through HF Buckets.
+huggingface_hub>=1.8.0
 datasets>=3.0
 requests>=2.31

submit.py CHANGED Viewed

@@ -115,6 +115,11 @@ from huggingface_hub import (
 )
 from huggingface_hub.errors import EntryNotFoundError, HfHubHTTPError
 import progress
 from leaderboard import HF_DATA_REPO, HF_ORG, HF_SUBMISSIONS_REPO
@@ -199,16 +204,25 @@ JOB_POLL_MAX_CONSECUTIVE_ERRORS = 5
 # fixtures fans out across several jobs of SHARD_CHUNK_SIZE fixtures
 # each, dispatched all at once (HF queues any overflow past the
 # account's ~8 concurrent slots; queueing is a speed variable, never a
-# failure). Each shard uploads its per-fixture dirs under
-# ``reports/<id>/shards/<shard_id>/``; the Space merges them into one
-# run dir, recomputes the aggregate run_summary + report + gallery, and
-# deletes the shards tree. Eval is CPU-bound (tessellation + Manifold
-# booleans), so more machines is the throughput lever. At/under the
-# threshold a submission stays a single job (the original path), so the
-# extra dispatch/merge machinery only kicks in when it pays off.
 SHARD_THRESHOLD = 12
 SHARD_CHUNK_SIZE = 12
 SHARDS_SUBDIR = "shards"
 # ERROR-only retries per shard before the whole submission fails. A
 # shard re-run is idempotent (it re-evals its own fixture slice and
 # overwrites its upload prefix), so one cheap retry absorbs a transient
@@ -255,6 +269,35 @@ def _retry_after_seconds(error: HfHubHTTPError) -> float | None:
         return None
 def _with_hub_retries(fn, *, what: str):
     """Run *fn* (a Hub commit) retrying transient HTTP errors with backoff.
@@ -1138,6 +1181,26 @@ def _dispatch_eval_command(
         value = os.environ.get(key)
         if value:
             env[key] = value
     job = run_job(
         image=f"hf.co/spaces/{EVAL_GPU_SPACE}",
         command=[
@@ -1150,6 +1213,7 @@ def _dispatch_eval_command(
         secrets={"HF_TOKEN": token},
         timeout=EVAL_JOB_TIMEOUT,
         token=token,
     )
     return job.id
@@ -1165,7 +1229,7 @@ def _dispatch_shard(
     Mutates *state* in place: sets ``job_id``, bumps ``attempts``, and
     clears the prior ``stage``/``message`` so a retried shard is polled
     fresh. The shard re-evals its own fixture slice and overwrites its
-    ``reports/<id>/shards/<shard_id>/`` prefix, so a retry is idempotent.
     """
     job_id = _dispatch_eval_command(
         submission_id,
@@ -1408,22 +1472,25 @@ def _merge_shards_and_publish(
     tmp = Path(tempfile.mkdtemp(prefix=f"cgb-merge-{submission_id}-"))
     try:
-        download_root = Path(
-            snapshot_download(
-                repo_id=HF_SUBMISSIONS_REPO,
-                repo_type="dataset",
-                allow_patterns=[
-                    f"{REPORTS_DIR}/{submission_id}/{SHARDS_SUBDIR}/**"
-                ],
-                local_dir=str(tmp / "dl"),
             )
-        )
-        shards_root = (
-            download_root / REPORTS_DIR / submission_id / SHARDS_SUBDIR
-        )
         if not shards_root.is_dir():
             raise RuntimeError(
-                f"No shard artifacts found under {shards_root} after download."
             )
         merged_run = tmp / "run"
@@ -1570,15 +1637,18 @@ def _cleanup_shard_artifacts(submission_id: str) -> None:
     submission.
     """
     try:
-        _with_hub_retries(
-            lambda: _HF_API.delete_folder(
-                path_in_repo=f"{REPORTS_DIR}/{submission_id}/{SHARDS_SUBDIR}",
-                repo_id=HF_SUBMISSIONS_REPO,
-                repo_type="dataset",
-                commit_message=f"clean up eval shards for {submission_id}",
-            ),
-            what="shard cleanup",
-        )
         logger.info("Cleaned up shard artifacts for %s", submission_id)
     except Exception as e:  # noqa: BLE001 - cleanup is best-effort
         logger.warning(

 )
 from huggingface_hub.errors import EntryNotFoundError, HfHubHTTPError
+try:
+    from huggingface_hub import Volume
+except ImportError:  # pragma: no cover - exercised only on old deploy images
+    Volume = None  # type: ignore[assignment]
 import progress
 from leaderboard import HF_DATA_REPO, HF_ORG, HF_SUBMISSIONS_REPO
 # fixtures fans out across several jobs of SHARD_CHUNK_SIZE fixtures
 # each, dispatched all at once (HF queues any overflow past the
 # account's ~8 concurrent slots; queueing is a speed variable, never a
+# failure). Each shard stages its per-fixture dirs into a mounted bucket
+# when CADGENBENCH_SHARD_BUCKET is set, or under
+# ``reports/<id>/shards/<shard_id>/`` in the submissions dataset
+# otherwise; the Space merges them into one run dir, recomputes the
+# aggregate run_summary + report + gallery, and deletes the shards tree.
+# Eval is CPU-bound (tessellation + Manifold booleans), so more machines
+# is the throughput lever. At/under the threshold a submission stays a
+# single job (the original path), so the extra dispatch/merge machinery
+# only kicks in when it pays off.
 SHARD_THRESHOLD = 12
 SHARD_CHUNK_SIZE = 12
 SHARDS_SUBDIR = "shards"
+SHARD_BUCKET = os.getenv("CADGENBENCH_SHARD_BUCKET", "").strip()
+SHARD_BUCKET_MOUNT = os.getenv(
+    "CADGENBENCH_SHARD_BUCKET_MOUNT", "/mnt/cadgenbench-shards",
+).strip()
+SHARD_BUCKET_PREFIX = os.getenv(
+    "CADGENBENCH_SHARD_BUCKET_PREFIX", SUBMISSIONS_DIR,
+).strip("/")
 # ERROR-only retries per shard before the whole submission fails. A
 # shard re-run is idempotent (it re-evals its own fixture slice and
 # overwrites its upload prefix), so one cheap retry absorbs a transient
         return None
+def _shard_bucket_enabled() -> bool:
+    """Whether shard scratch should be staged through a mounted bucket."""
+    return bool(SHARD_BUCKET)
+def _shard_bucket_source() -> str:
+    """Return the bucket id accepted by ``huggingface_hub.Volume``."""
+    source = SHARD_BUCKET
+    if source.startswith("hf://buckets/"):
+        source = source[len("hf://buckets/"):]
+    return source.rstrip("/")
+def _shard_bucket_relative_root(submission_id: str) -> Path:
+    """Relative bucket path containing one directory per shard."""
+    parts = [p for p in SHARD_BUCKET_PREFIX.split("/") if p]
+    return Path(*parts, submission_id, SHARDS_SUBDIR)
+def _shard_bucket_root(submission_id: str) -> Path:
+    """Mounted bucket path containing staged shard artifacts."""
+    if not SHARD_BUCKET_MOUNT:
+        raise RuntimeError(
+            "CADGENBENCH_SHARD_BUCKET is set but "
+            "CADGENBENCH_SHARD_BUCKET_MOUNT is empty."
+        )
+    return Path(SHARD_BUCKET_MOUNT) / _shard_bucket_relative_root(submission_id)
 def _with_hub_retries(fn, *, what: str):
     """Run *fn* (a Hub commit) retrying transient HTTP errors with backoff.
         value = os.environ.get(key)
         if value:
             env[key] = value
+    run_kwargs: dict[str, Any] = {}
+    if _shard_bucket_enabled() and "--shard-id" in extra_args:
+        if Volume is None:
+            raise RuntimeError(
+                "CADGENBENCH_SHARD_BUCKET requires huggingface_hub>=1.8.0 "
+                "for HF Jobs volume mounts."
+            )
+        env.update(
+            {
+                "CADGENBENCH_SHARD_BUCKET_MOUNT": SHARD_BUCKET_MOUNT,
+                "CADGENBENCH_SHARD_BUCKET_PREFIX": SHARD_BUCKET_PREFIX,
+            }
+        )
+        run_kwargs["volumes"] = [
+            Volume(
+                type="bucket",
+                source=_shard_bucket_source(),
+                mount_path=SHARD_BUCKET_MOUNT,
+            )
+        ]
     job = run_job(
         image=f"hf.co/spaces/{EVAL_GPU_SPACE}",
         command=[
         secrets={"HF_TOKEN": token},
         timeout=EVAL_JOB_TIMEOUT,
         token=token,
+        **run_kwargs,
     )
     return job.id
     Mutates *state* in place: sets ``job_id``, bumps ``attempts``, and
     clears the prior ``stage``/``message`` so a retried shard is polled
     fresh. The shard re-evals its own fixture slice and overwrites its
+    configured shard-staging prefix, so a retry is idempotent.
     """
     job_id = _dispatch_eval_command(
         submission_id,
     tmp = Path(tempfile.mkdtemp(prefix=f"cgb-merge-{submission_id}-"))
     try:
+        if _shard_bucket_enabled():
+            shards_root = _shard_bucket_root(submission_id)
+        else:
+            download_root = Path(
+                snapshot_download(
+                    repo_id=HF_SUBMISSIONS_REPO,
+                    repo_type="dataset",
+                    allow_patterns=[
+                        f"{REPORTS_DIR}/{submission_id}/{SHARDS_SUBDIR}/**"
+                    ],
+                    local_dir=str(tmp / "dl"),
+                )
+            )
+            shards_root = (
+                download_root / REPORTS_DIR / submission_id / SHARDS_SUBDIR
             )
         if not shards_root.is_dir():
             raise RuntimeError(
+                f"No shard artifacts found under {shards_root}."
             )
         merged_run = tmp / "run"
     submission.
     """
     try:
+        if _shard_bucket_enabled():
+            shutil.rmtree(_shard_bucket_root(submission_id), ignore_errors=True)
+        else:
+            _with_hub_retries(
+                lambda: _HF_API.delete_folder(
+                    path_in_repo=f"{REPORTS_DIR}/{submission_id}/{SHARDS_SUBDIR}",
+                    repo_id=HF_SUBMISSIONS_REPO,
+                    repo_type="dataset",
+                    commit_message=f"clean up eval shards for {submission_id}",
+                ),
+                what="shard cleanup",
+            )
         logger.info("Cleaned up shard artifacts for %s", submission_id)
     except Exception as e:  # noqa: BLE001 - cleanup is best-effort
         logger.warning(

tests/test_submit.py CHANGED Viewed

@@ -11,6 +11,7 @@ network traffic.
 """
 from __future__ import annotations
 from pathlib import Path
 from types import SimpleNamespace
@@ -104,6 +105,95 @@ def test_retry_after_header_is_honored(monkeypatch):
     assert slept and slept[0] >= 7.0
 def _stub_meta() -> dict:
     """Minimum meta.json shape that survives ``_load_and_validate_meta``."""
     return {

 """
 from __future__ import annotations
+import importlib.util
 from pathlib import Path
 from types import SimpleNamespace
     assert slept and slept[0] >= 7.0
+def test_dispatch_shard_mounts_configured_bucket(monkeypatch):
+    """Bucket-configured shard jobs get a read/write bucket volume."""
+    captured: dict = {}
+    class FakeVolume:
+        def __init__(self, **kwargs):
+            self.kwargs = kwargs
+    def fake_run_job(**kwargs):
+        captured.update(kwargs)
+        return SimpleNamespace(id="job-123")
+    monkeypatch.setenv("HF_TOKEN", "hf_test")
+    monkeypatch.setattr(
+        submit, "SHARD_BUCKET",
+        "hf://buckets/HuggingAI4Engineering/cadgenbench-eval-staging",
+    )
+    monkeypatch.setattr(submit, "SHARD_BUCKET_MOUNT", "/mnt/cgb-shards")
+    monkeypatch.setattr(submit, "SHARD_BUCKET_PREFIX", "submissions")
+    monkeypatch.setattr(submit, "Volume", FakeVolume)
+    monkeypatch.setattr(submit, "run_job", fake_run_job)
+    job_id = submit._dispatch_eval_command(
+        "sub-1", "https://example.test/sub-1.zip",
+        ["--shard-id", "shard_000", "--fixtures", "101,102"],
+    )
+    assert job_id == "job-123"
+    assert captured["env"]["CADGENBENCH_SHARD_BUCKET_MOUNT"] == "/mnt/cgb-shards"
+    assert captured["env"]["CADGENBENCH_SHARD_BUCKET_PREFIX"] == "submissions"
+    volume = captured["volumes"][0]
+    assert volume.kwargs == {
+        "type": "bucket",
+        "source": "HuggingAI4Engineering/cadgenbench-eval-staging",
+        "mount_path": "/mnt/cgb-shards",
+    }
+def test_dispatch_whole_submission_does_not_mount_bucket(monkeypatch):
+    """Configured bucket staging is only for sharded eval jobs."""
+    captured: dict = {}
+    def fake_run_job(**kwargs):
+        captured.update(kwargs)
+        return SimpleNamespace(id="job-456")
+    monkeypatch.setenv("HF_TOKEN", "hf_test")
+    monkeypatch.setattr(submit, "SHARD_BUCKET", "org/bucket")
+    monkeypatch.setattr(submit, "run_job", fake_run_job)
+    job_id = submit._dispatch_eval_command(
+        "sub-1", "https://example.test/sub-1.zip", [],
+    )
+    assert job_id == "job-456"
+    assert "volumes" not in captured
+    assert "CADGENBENCH_SHARD_BUCKET_MOUNT" not in captured["env"]
+def test_eval_job_stages_shard_to_mounted_bucket(tmp_path: Path, monkeypatch):
+    """In bucket mode the eval job copies shard outputs to the mount."""
+    eval_job_path = (
+        Path(__file__).resolve().parents[2]
+        / "cadgenbench-eval-gpu"
+        / "eval_job.py"
+    )
+    spec = importlib.util.spec_from_file_location("eval_job_for_test", eval_job_path)
+    assert spec and spec.loader
+    eval_job = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(eval_job)
+    run_dir = tmp_path / "run"
+    fixture_dir = run_dir / "101"
+    fixture_dir.mkdir(parents=True)
+    (fixture_dir / "result.json").write_text("{}", encoding="utf-8")
+    bucket_mount = tmp_path / "bucket"
+    bucket_mount.mkdir()
+    monkeypatch.setenv(eval_job.SHARD_BUCKET_MOUNT_ENV, str(bucket_mount))
+    monkeypatch.setenv(eval_job.SHARD_BUCKET_PREFIX_ENV, "submissions")
+    eval_job._upload_shard_artifacts(
+        "sub-1", "shard_000", run_dir, "ignored/submissions", "ignored-token",
+    )
+    staged = bucket_mount / "submissions" / "sub-1" / "shards" / "shard_000"
+    assert (staged / "101" / "result.json").read_text(encoding="utf-8") == "{}"
 def _stub_meta() -> dict:
     """Minimum meta.json shape that survives ``_load_and_validate_meta``."""
     return {