clarify-rl / scripts /launch_all.sh
agarwalanu3103's picture
plots: add training progression + diagnostics, drop W&B links
099bec8 verified
#!/usr/bin/env bash
# Fire ALL production runs in parallel across multiple HF accounts.
#
# Each account fires one job. The plan defaults to 3 simultaneous runs
# (matching the 3 Qwen3 sizes), with optional 4th and 5th insurance runs.
#
# Required env vars (one HF_TOKEN per account):
# HF_TOKEN_1 token for account 1 (drives Qwen3-0.6B)
# HF_TOKEN_2 token for account 2 (drives Qwen3-1.7B)
# HF_TOKEN_3 token for account 3 (drives Qwen3-4B)
# HF_TOKEN_4 (optional) token for account 4 β€” drives insurance run if INSURANCE=1
#
# Optional env:
# ENV_BASE_URL default: https://agarwalanu3103-clarify-rl.hf.space
# INSURANCE "1" β†’ also launch a backup Qwen3-1.7B run (different seed)
# DRY_RUN "1" β†’ print all commands but do not launch anything
#
# Usage:
# HF_TOKEN_1=hf_a HF_TOKEN_2=hf_b HF_TOKEN_3=hf_c ./scripts/launch_all.sh
#
# Recommended budget for the default plan (without insurance): ~$70
# With INSURANCE=1: ~$95
# Either way well within the $120 cap.
set -euo pipefail
: "${HF_TOKEN_1:?HF_TOKEN_1 required (account 1 β†’ Qwen3-0.6B)}"
: "${HF_TOKEN_2:?HF_TOKEN_2 required (account 2 β†’ Qwen3-1.7B)}"
: "${HF_TOKEN_3:?HF_TOKEN_3 required (account 3 β†’ Qwen3-4B)}"
: "${ENV_BASE_URL:=https://agarwalanu3103-clarify-rl.hf.space}"
: "${INSURANCE:=0}"
: "${DRY_RUN:=0}"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
LAUNCHER="$SCRIPT_DIR/launch_hf_job.sh"
cat <<EOF
=========================================================================
ClarifyRL multi-account parallel launch
=========================================================================
Env Space: $ENV_BASE_URL
Insurance: $INSURANCE
Dry run: $DRY_RUN
=========================================================================
EOF
# ----------------------------------------------------------------------
# Plan (revised after `hf jobs hardware` 2026-04-25):
# Account 1: Qwen3-0.6B / a10g-large / 500 steps / num_gen=4 / ~$7.50 (5h * $1.50)
# Account 2: Qwen3-1.7B / a100-large / 400 steps / num_gen=8 / ~$12.50 (5h * $2.50)
# Account 3: Qwen3-4B / h200 / 250 steps / num_gen=8 / ~$25 (5h * $5.00)
# Account 4: Qwen3-1.7B / a100-large / 400 steps / num_gen=8 / seed=84 / ~$12.50 (insurance)
# Total without insurance: ~$45 With insurance: ~$57.50
# Well within the $120 cap β†’ leaves headroom for retries / longer runs / second pass.
# ----------------------------------------------------------------------
run() {
local label="$1"; shift
echo
echo "──────────────────────────────────────────────────────────────────"
echo " Launching: $label"
echo "──────────────────────────────────────────────────────────────────"
if [ "$DRY_RUN" = "1" ]; then
DRY_RUN=1 "$@"
else
"$@"
fi
}
# Account 1 β†’ 0.6B
HF_TOKEN="$HF_TOKEN_1" \
ENV_BASE_URL="$ENV_BASE_URL" \
SEED=42 \
run "Account 1: Qwen3-0.6B / a10g-large / 500 steps" \
"$LAUNCHER" Qwen/Qwen3-0.6B a10g-large 500 &
PID1=$!
# Account 2 β†’ 1.7B
HF_TOKEN="$HF_TOKEN_2" \
ENV_BASE_URL="$ENV_BASE_URL" \
SEED=42 \
run "Account 2: Qwen3-1.7B / a100-large / 400 steps" \
"$LAUNCHER" Qwen/Qwen3-1.7B a100-large 400 &
PID2=$!
# Account 3 β†’ 4B
HF_TOKEN="$HF_TOKEN_3" \
ENV_BASE_URL="$ENV_BASE_URL" \
SEED=42 \
run "Account 3: Qwen3-4B / h200 / 250 steps" \
"$LAUNCHER" Qwen/Qwen3-4B h200 250 &
PID3=$!
PIDS=("$PID1" "$PID2" "$PID3")
LABELS=("0.6B" "1.7B" "4B")
# Optional 4th insurance run
if [ "$INSURANCE" = "1" ]; then
: "${HF_TOKEN_4:?HF_TOKEN_4 required when INSURANCE=1}"
HF_TOKEN="$HF_TOKEN_4" \
ENV_BASE_URL="$ENV_BASE_URL" \
SEED=84 \
OUTPUT_DIR="clarify-rl-grpo-qwen3-1-7b-seed84" \
run "Account 4: Qwen3-1.7B / a100-large / 400 steps / seed=84 (insurance)" \
"$LAUNCHER" Qwen/Qwen3-1.7B a100-large 400 &
PIDS+=("$!")
LABELS+=("1.7B-seed84")
fi
# Wait for all launchers to exit. Each launcher submits the job and returns
# fairly fast β€” the actual training happens server-side on HF.
echo
echo "Waiting for all launches to complete (this only waits for *submission*,"
echo "not for the training itself β€” that runs server-side on HF Jobs)..."
echo
declare -i FAILED=0
for i in "${!PIDS[@]}"; do
if wait "${PIDS[$i]}"; then
echo "[OK] ${LABELS[$i]} submitted"
else
echo "[FAIL] ${LABELS[$i]} submission exited non-zero"
FAILED=$((FAILED + 1))
fi
done
echo
echo "====================================================================="
if [ "$FAILED" = "0" ]; then
echo "All ${#PIDS[@]} jobs submitted. Track them at:"
echo " https://huggingface.co/jobs (per account)"
echo " https://huggingface.co/spaces (trackio dashboards)"
else
echo "$FAILED of ${#PIDS[@]} submissions failed β€” check output above."
exit 1
fi
echo "====================================================================="