Michael Rabinovich commited on
Commit
c78e980
·
1 Parent(s): bd9b6ba

submit: derive eval-gpu image repo from HF_ORG

Browse files

the image lives in the org, so the repo id tracks HF_ORG (matching
leaderboard.py) and an org rename stays one space-variable change.
on the space HF_ORG is set, so this resolves to the same value as
before. the job namespace stays a fixed michaelr27 constant since
dispatch is billed to that account. verified resolution for the
space, override, and no-HF_ORG cases.

Files changed (1) hide show
  1. submit.py +13 -9
submit.py CHANGED
@@ -20,8 +20,8 @@ poll of an HF Jobs GPU eval + boot-time stuck-pending sweep. The
20
  handler validates the upload, uploads the zip to
21
  ``submissions/<id>.zip``, appends a ``status: pending`` row to
22
  ``results.jsonl`` (under a process-wide lock), spawns a daemon thread
23
- that dispatches a per-submission HF Job against the
24
- ``HuggingAI4Engineering/cadgenbench-eval-gpu`` image and polls
25
  ``inspect_job`` until the job's stage is terminal. On COMPLETED the
26
  worker downloads ``reports/<id>.json`` (the Job already uploaded
27
  ``reports/<id>.{html,json}`` to the submissions dataset), reads
@@ -111,7 +111,7 @@ from huggingface_hub import (
111
  )
112
  from huggingface_hub.errors import EntryNotFoundError
113
 
114
- from leaderboard import HF_DATA_REPO, HF_SUBMISSIONS_REPO
115
 
116
  logger = logging.getLogger(__name__)
117
 
@@ -135,12 +135,16 @@ SUBMITTED_AT_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
135
  STUCK_PENDING_REASON = "evaluation interrupted by Space restart"
136
  BOOT_SWEEP_ENV = "CADGENBENCH_DISABLE_BOOT_SWEEP"
137
 
138
- # HF Jobs target. The eval-gpu image is hosted as a Docker Space
139
- # (paused; image-only) at HuggingAI4Engineering/cadgenbench-eval-gpu.
140
- # Jobs run under the personal `michaelr27` namespace (no-bill for
141
- # HF employees per Round 6 of space-setup/leandro.md). a10g-large
142
- # fits cadgenbench evaluate --workers 8 comfortably in 46 GB RAM.
143
- EVAL_GPU_SPACE = "HuggingAI4Engineering/cadgenbench-eval-gpu"
 
 
 
 
144
  EVAL_JOB_FLAVOR = "a10g-large"
145
  EVAL_JOB_NAMESPACE = "michaelr27"
146
  EVAL_JOB_TIMEOUT = "30m"
 
20
  handler validates the upload, uploads the zip to
21
  ``submissions/<id>.zip``, appends a ``status: pending`` row to
22
  ``results.jsonl`` (under a process-wide lock), spawns a daemon thread
23
+ that dispatches a per-submission HF Job against the eval-gpu
24
+ Docker Space image and polls
25
  ``inspect_job`` until the job's stage is terminal. On COMPLETED the
26
  worker downloads ``reports/<id>.json`` (the Job already uploaded
27
  ``reports/<id>.{html,json}`` to the submissions dataset), reads
 
111
  )
112
  from huggingface_hub.errors import EntryNotFoundError
113
 
114
+ from leaderboard import HF_DATA_REPO, HF_ORG, HF_SUBMISSIONS_REPO
115
 
116
  logger = logging.getLogger(__name__)
117
 
 
135
  STUCK_PENDING_REASON = "evaluation interrupted by Space restart"
136
  BOOT_SWEEP_ENV = "CADGENBENCH_DISABLE_BOOT_SWEEP"
137
 
138
+ # HF Jobs target. The eval-gpu image is a Docker Space (paused;
139
+ # image-only) that lives in the org, so its repo id is derived from
140
+ # HF_ORG and an org rename stays a single Space-variable change.
141
+ # The job NAMESPACE is the account dispatch is billed to and must
142
+ # stay `michaelr27` (no-bill for HF employees per Round 6 of
143
+ # space-setup/leandro.md); it is a fixed constant, never env-driven.
144
+ # a10g-large fits cadgenbench evaluate --workers 8 in 46 GB RAM.
145
+ EVAL_GPU_SPACE = os.getenv(
146
+ "CADGENBENCH_EVAL_GPU_SPACE", f"{HF_ORG}/cadgenbench-eval-gpu"
147
+ )
148
  EVAL_JOB_FLAVOR = "a10g-large"
149
  EVAL_JOB_NAMESPACE = "michaelr27"
150
  EVAL_JOB_TIMEOUT = "30m"