""" Submit ultimate_sota_training.py to Hugging Face GPU Jobs (HfApi.run_job). The Job command must be a single robust shell line (semicolon-separated). Hugging Face has been observed to flatten multiline `bash -lc` payloads, which breaks `set` and can leave the job stuck or failing silently. Requires: huggingface_hub, `huggingface-cli login`. Secrets: if SKIP_HUB_PUSH is not 1, the job requests Hub secret name HF_TOKEN mapped into the container as env HF_TOKEN (Settings → Access Tokens / Job secrets). Environment (optional): HF_JOB_NAMESPACE default: whoami HF_JOB_FLAVOR default: l4x1 (often faster than T4 for this workload; override with t4-small to save $) HF_JOB_IMAGE default: pytorch CUDA 12.4 devel HF_JOB_TIMEOUT default: 8h TRAIN_REPO_GIT_URL, OPENENV_BASE_URL TRAIN_MAX_STEPS default: 80 (faster run; raise for stronger fit) ROWS_PER_TASK default: 32 GRPO_NUM_GENERATIONS default: 2 SKIP_HUB_PUSH default: 0 """ from __future__ import annotations import os import shlex from huggingface_hub import HfApi from huggingface_hub.utils import get_token _DEFAULT_REPO = "https://huggingface.co/spaces/md896/sql-debug-env" _REPO_URL = os.environ.get("TRAIN_REPO_GIT_URL", _DEFAULT_REPO) _OPENENV = os.environ.get("OPENENV_BASE_URL", "https://md896-sql-debug-env.hf.space") _MAX_STEPS = os.environ.get("TRAIN_MAX_STEPS", "80") _ROWS = os.environ.get("ROWS_PER_TASK", "32") _NUM_GEN = os.environ.get("GRPO_NUM_GENERATIONS", "2") _SKIP_PUSH = os.environ.get("SKIP_HUB_PUSH", "0") _TIMEOUT = os.environ.get("HF_JOB_TIMEOUT", "8h") # l4x1: newer GPU, good for Unsloth; use HF_JOB_FLAVOR=t4-small if queue or cost is better for you _FLAVOR = os.environ.get("HF_JOB_FLAVOR", "l4x1") _IMAGE = os.environ.get( "HF_JOB_IMAGE", "pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel", ) _NAMESPACE = os.environ.get("HF_JOB_NAMESPACE", "md896") _SECRETS = None _local_hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or get_token() if _SKIP_PUSH.strip().lower() not in ("1", "true", "yes"): if _local_hf_token: _SECRETS = {"HF_TOKEN": _local_hf_token} else: # Job can still train; push/upload steps in script will gracefully skip/fail with clear logs. _SECRETS = None # One line only — survives UI/API newline flattening. _bash = ( "set -euxo pipefail; " "export DEBIAN_FRONTEND=noninteractive; " "apt-get update -qq && apt-get install -y -qq git ca-certificates; " "export PIP_BREAK_SYSTEM_PACKAGES=1; " f"rm -rf train-repo; git clone {shlex.quote(_REPO_URL)} train-repo; " "cd train-repo; " "python -u ultimate_sota_training.py" ) _job_env = { "OPENENV_BASE_URL": _OPENENV, "TRAIN_MAX_STEPS": _MAX_STEPS, "ROWS_PER_TASK": _ROWS, "GRPO_NUM_GENERATIONS": _NUM_GEN, "SKIP_HUB_PUSH": _SKIP_PUSH, "ARTIFACT_SPACE_ID": os.environ.get("ARTIFACT_SPACE_ID", "md896/sql-debug-env"), "MODEL_HUB_REPO_ID": os.environ.get("MODEL_HUB_REPO_ID", "md896/sql-debug-agent-qwen05b-grpo"), "HARD_EVAL_SAMPLES": os.environ.get("HARD_EVAL_SAMPLES", "16"), } if __name__ == "__main__": api = HfApi() ns = _NAMESPACE job = api.run_job( image=_IMAGE, command=["bash", "-lc", _bash], flavor=_FLAVOR, namespace=ns, timeout=_TIMEOUT, secrets=_SECRETS, env=_job_env, ) print("JOB_ID:", job.id) print("JOB_URL:", job.url) print("FLAVOR:", _FLAVOR, "| TRAIN_MAX_STEPS:", _MAX_STEPS, "| ROWS_PER_TASK:", _ROWS) print( "Note: SCHEDULING is Hugging Face queue time, not your script. " "Cancel stuck jobs and retry, or try HF_JOB_FLAVOR=t4-small / t4-medium." )