#!/usr/bin/env bash # ClarifyRL preflight — sanity-check the full pipeline BEFORE spending real # budget on parallel HF Jobs runs. Catches: # - hf CLI not installed # - HF_TOKEN missing / invalid # - env Space not reachable / wrong concurrency config # - TRL/transformers/jmespath/bnb local import errors (your laptop) # - max_concurrent_envs not yet bumped on the live Space # # Usage: # HF_TOKEN=hf_xxx ./scripts/preflight.sh # # Exits with code 0 on success, non-zero with a clear diagnostic on failure. set -euo pipefail GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[1;33m'; NC='\033[0m' ok() { printf "${GREEN}[OK]${NC} %s\n" "$1"; } fail() { printf "${RED}[FAIL]${NC} %s\n" "$1"; exit 1; } warn() { printf "${YELLOW}[WARN]${NC} %s\n" "$1"; } : "${ENV_BASE_URL:=https://agarwalanu3103-clarify-rl.hf.space}" echo "=====================================================================" echo "ClarifyRL preflight" echo " Env Space: $ENV_BASE_URL" echo "=====================================================================" # 1. hf CLI present if ! command -v hf >/dev/null 2>&1; then fail "hf CLI not installed. Run: pip install -U 'huggingface_hub[cli]'" fi ok "hf CLI present: $(hf --version 2>&1 | head -1)" # 2. HF_TOKEN set + valid if [ -z "${HF_TOKEN:-}" ]; then fail "HF_TOKEN env var is not set" fi if ! HF_HUB_DISABLE_PROGRESS_BARS=1 hf auth whoami >/dev/null 2>&1; then fail "HF_TOKEN appears invalid (hf auth whoami failed)" fi WHOAMI="$(HF_HUB_DISABLE_PROGRESS_BARS=1 hf auth whoami 2>&1 | head -1)" ok "HF_TOKEN valid: $WHOAMI" # 3. Space reachable. The OpenEnv server exposes /health (not /healthz). HEALTH_HTTP="$(curl -sS -o /dev/null -w "%{http_code}" --max-time 10 "$ENV_BASE_URL/health" 2>/dev/null || echo "000")" if [ "$HEALTH_HTTP" != "200" ]; then fail "Env Space at $ENV_BASE_URL/health returned HTTP $HEALTH_HTTP (expected 200)" fi ok "Env Space /health returned 200" # 4. Probe the WS endpoint with a quick reset to confirm capacity > 1 WS_URL="$(echo "$ENV_BASE_URL" | sed 's|^https|wss|; s|^http|ws|')/ws" # Use the project's venv python so truststore is available on macOS w/ corp proxies. PYBIN="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/.venv/bin/python" [ -x "$PYBIN" ] || PYBIN="python3" "$PYBIN" - <&1; then ok "training/train_grpo.py parses cleanly" else fail "training/train_grpo.py has syntax errors" fi # 6. run_eval.py / make_plots.py --help (light imports OK) if "$PYBIN" scripts/run_eval.py --help >/dev/null 2>&1; then ok "scripts/run_eval.py --help works" else fail "scripts/run_eval.py raised on --help" fi if "$PYBIN" scripts/make_plots.py --help >/dev/null 2>&1; then ok "scripts/make_plots.py --help works" else fail "scripts/make_plots.py raised on --help" fi echo echo "=====================================================================" echo "Preflight complete. You're cleared to launch." echo "====================================================================="