Michael Rabinovich commited on
Commit ·
0a04e61
1
Parent(s): 3dba2e2
submit: derive eval-gpu image repo from HF_ORG
Browse files
submit.py
CHANGED
|
@@ -20,8 +20,8 @@ poll of an HF Jobs GPU eval + boot-time stuck-pending sweep. The
|
|
| 20 |
handler validates the upload, uploads the zip to
|
| 21 |
``submissions/<id>.zip``, appends a ``status: pending`` row to
|
| 22 |
``results.jsonl`` (under a process-wide lock), spawns a daemon thread
|
| 23 |
-
that dispatches a per-submission HF Job against the
|
| 24 |
-
|
| 25 |
``inspect_job`` until the job's stage is terminal. On COMPLETED the
|
| 26 |
worker downloads ``reports/<id>.json`` (the Job already uploaded
|
| 27 |
``reports/<id>.{html,json}`` to the submissions dataset), reads
|
|
@@ -111,7 +111,7 @@ from huggingface_hub import (
|
|
| 111 |
)
|
| 112 |
from huggingface_hub.errors import EntryNotFoundError
|
| 113 |
|
| 114 |
-
from leaderboard import HF_DATA_REPO, HF_SUBMISSIONS_REPO
|
| 115 |
|
| 116 |
logger = logging.getLogger(__name__)
|
| 117 |
|
|
@@ -136,11 +136,16 @@ STUCK_PENDING_REASON = "evaluation interrupted by Space restart"
|
|
| 136 |
BOOT_SWEEP_ENV = "CADGENBENCH_DISABLE_BOOT_SWEEP"
|
| 137 |
|
| 138 |
# HF Jobs target. The eval-gpu image is hosted as a Docker Space
|
| 139 |
-
# (paused; image-only)
|
| 140 |
-
#
|
| 141 |
-
#
|
| 142 |
-
#
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
EVAL_JOB_FLAVOR = "a10g-large"
|
| 145 |
EVAL_JOB_NAMESPACE = "michaelr27"
|
| 146 |
EVAL_JOB_TIMEOUT = "30m"
|
|
|
|
| 20 |
handler validates the upload, uploads the zip to
|
| 21 |
``submissions/<id>.zip``, appends a ``status: pending`` row to
|
| 22 |
``results.jsonl`` (under a process-wide lock), spawns a daemon thread
|
| 23 |
+
that dispatches a per-submission HF Job against the eval-gpu
|
| 24 |
+
Docker Space image and polls
|
| 25 |
``inspect_job`` until the job's stage is terminal. On COMPLETED the
|
| 26 |
worker downloads ``reports/<id>.json`` (the Job already uploaded
|
| 27 |
``reports/<id>.{html,json}`` to the submissions dataset), reads
|
|
|
|
| 111 |
)
|
| 112 |
from huggingface_hub.errors import EntryNotFoundError
|
| 113 |
|
| 114 |
+
from leaderboard import HF_DATA_REPO, HF_ORG, HF_SUBMISSIONS_REPO
|
| 115 |
|
| 116 |
logger = logging.getLogger(__name__)
|
| 117 |
|
|
|
|
| 136 |
BOOT_SWEEP_ENV = "CADGENBENCH_DISABLE_BOOT_SWEEP"
|
| 137 |
|
| 138 |
# HF Jobs target. The eval-gpu image is hosted as a Docker Space
|
| 139 |
+
# (paused; image-only) under the org; the repo id is derived from
|
| 140 |
+
# HF_ORG so an org rename is one Space-variable change. Jobs run
|
| 141 |
+
# under the personal `michaelr27` namespace, which is required and
|
| 142 |
+
# stays a fixed constant: dispatch is billed to that account
|
| 143 |
+
# (no-bill for HF employees per Round 6 of space-setup/leandro.md),
|
| 144 |
+
# so it is deliberately not configurable. a10g-large fits
|
| 145 |
+
# cadgenbench evaluate --workers 8 comfortably in 46 GB RAM.
|
| 146 |
+
EVAL_GPU_SPACE = os.getenv(
|
| 147 |
+
"CADGENBENCH_EVAL_GPU_SPACE", f"{HF_ORG}/cadgenbench-eval-gpu"
|
| 148 |
+
)
|
| 149 |
EVAL_JOB_FLAVOR = "a10g-large"
|
| 150 |
EVAL_JOB_NAMESPACE = "michaelr27"
|
| 151 |
EVAL_JOB_TIMEOUT = "30m"
|