cadgenbench-eval-gpu / _rebuild_space.py
Michael Rabinovich
space: add rebuild and probe helpers
8762996
"""Trigger a rebuild of the (paused) eval-gpu Space image and poll status.
Usage: python _rebuild_space.py [--pause-when-done]
Reads HF_TOKEN from ../cadgenbench/.env.
"""
from __future__ import annotations
import argparse
import os
import sys
import time
from pathlib import Path
SID = "HuggingAI4Engineering/cadgenbench-eval-gpu"
def _load_token() -> str:
# Prefer an explicitly-provided token (e.g. pulled from git's credential
# helper, which has write on the Space) over the .env jobs-hf PAT, which
# is read-only on this Space and 403s on restart_space.
explicit = os.environ.get("HF_REBUILD_TOKEN")
if explicit:
return explicit.strip()
env = Path(__file__).resolve().parent.parent / "cadgenbench" / ".env"
for line in env.read_text().splitlines():
if line.startswith("HF_TOKEN="):
return line.split("=", 1)[1].strip()
raise SystemExit("HF_TOKEN not found in .env")
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--pause-when-done", action="store_true")
ap.add_argument("--timeout", type=float, default=900.0)
args = ap.parse_args()
from huggingface_hub import HfApi
api = HfApi(token=_load_token())
print(f"[rebuild] restart_space({SID})", flush=True)
api.restart_space(SID)
t0 = time.monotonic()
last = None
terminal_ok = {"RUNNING", "RUNNING_APP_STARTING", "APP_STARTING"}
terminal_bad = {"BUILD_ERROR", "RUNTIME_ERROR", "CONFIG_ERROR"}
while time.monotonic() - t0 < args.timeout:
rt = api.get_space_runtime(SID)
stage = rt.stage
if stage != last:
print(f"[rebuild] {time.monotonic() - t0:6.0f}s stage={stage}", flush=True)
last = stage
if stage in terminal_ok:
print(f"[rebuild] BUILD OK (stage={stage})", flush=True)
if args.pause_when_done:
print("[rebuild] pausing space", flush=True)
api.pause_space(SID)
print("[rebuild] paused", flush=True)
return 0
if stage in terminal_bad:
print(f"[rebuild] BUILD FAILED (stage={stage})", flush=True)
return 1
time.sleep(15)
print("[rebuild] TIMEOUT waiting for build", flush=True)
return 2
if __name__ == "__main__":
sys.exit(main())