Michael Rabinovich commited on
Commit ·
972ff3a
1
Parent(s): 0384079
remove temp _probe_job.py
Browse filesOne-off diagnostic probe; no longer referenced.
- _probe_job.py +0 -63
_probe_job.py
DELETED
|
@@ -1,63 +0,0 @@
|
|
| 1 |
-
"""Run a tiny cpu-basic Job on the eval-gpu image to verify it has the fix.
|
| 2 |
-
|
| 3 |
-
Reads HF_TOKEN (jobs-hf PAT, job.write on michaelr27) from ../cadgenbench/.env.
|
| 4 |
-
Prints the job's HAS_FIX line and exit status.
|
| 5 |
-
"""
|
| 6 |
-
from __future__ import annotations
|
| 7 |
-
|
| 8 |
-
import sys
|
| 9 |
-
import time
|
| 10 |
-
from pathlib import Path
|
| 11 |
-
|
| 12 |
-
IMAGE = "hf.co/spaces/HuggingAI4Engineering/cadgenbench-eval-gpu"
|
| 13 |
-
NAMESPACE = "michaelr27"
|
| 14 |
-
|
| 15 |
-
PROBE = (
|
| 16 |
-
"import pathlib, cadgenbench; "
|
| 17 |
-
"b = pathlib.Path(cadgenbench.__file__).parent / 'baseline'; "
|
| 18 |
-
"cl = (b / 'compare_llms.py').read_text(); "
|
| 19 |
-
"ag = (b / 'agent.py').read_text(); "
|
| 20 |
-
"print('HAS_FIX', "
|
| 21 |
-
"('_model_pool_backstop_s' in cl) and ('_terminate_pool_workers' in cl) "
|
| 22 |
-
"and ('_shutdown_render_pool()' in ag)); "
|
| 23 |
-
"print('HAS_PACKAGE', (b / 'package.py').is_file()); "
|
| 24 |
-
"import importlib; importlib.import_module('cadgenbench.common.baseline_models'); "
|
| 25 |
-
"print('HAS_BASELINE_MODELS', True)"
|
| 26 |
-
)
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
def main() -> int:
|
| 30 |
-
from huggingface_hub import HfApi
|
| 31 |
-
|
| 32 |
-
# Use the cached login token (jobs-hf PAT, job.write on michaelr27); the
|
| 33 |
-
# .env HF_TOKEN is the job's *secret* for reading GT, not the submitter.
|
| 34 |
-
api = HfApi()
|
| 35 |
-
job = api.run_job(
|
| 36 |
-
image=IMAGE,
|
| 37 |
-
command=["python", "-c", PROBE],
|
| 38 |
-
flavor="cpu-basic",
|
| 39 |
-
namespace=NAMESPACE,
|
| 40 |
-
timeout=600,
|
| 41 |
-
)
|
| 42 |
-
print(f"[probe] job_id={job.id} status={job.status}", flush=True)
|
| 43 |
-
|
| 44 |
-
t0 = time.monotonic()
|
| 45 |
-
while time.monotonic() - t0 < 600:
|
| 46 |
-
info = api.inspect_job(job_id=job.id, namespace=NAMESPACE)
|
| 47 |
-
stage = getattr(info.status, "stage", info.status)
|
| 48 |
-
print(f"[probe] {time.monotonic()-t0:5.0f}s stage={stage}", flush=True)
|
| 49 |
-
if str(stage) in {"COMPLETED", "ERROR", "DELETED"}:
|
| 50 |
-
break
|
| 51 |
-
time.sleep(8)
|
| 52 |
-
|
| 53 |
-
print("[probe] ---- logs ----", flush=True)
|
| 54 |
-
try:
|
| 55 |
-
for line in api.fetch_job_logs(job_id=job.id, namespace=NAMESPACE):
|
| 56 |
-
print(line, flush=True)
|
| 57 |
-
except Exception as e: # noqa: BLE001
|
| 58 |
-
print(f"[probe] log fetch error: {e}", flush=True)
|
| 59 |
-
return 0
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
if __name__ == "__main__":
|
| 63 |
-
sys.exit(main())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|