| # Launch two strawman variants (single-stage cell-policy at stage_i=3, no | |
| # curriculum, no thought tokens) on GPUs 0 and 1. | |
| set -euo pipefail | |
| ROOT="${ROOT:-/home/ubuntu/curriculum_cot}" | |
| TS="$(date +%Y%m%d_%H%M%S)" | |
| SWEEP_ROOT="${ROOT}/_runs/strawman_cellpolicy_${TS}" | |
| mkdir -p "${SWEEP_ROOT}" | |
| PIPE="${ROOT}/_runs/strawman_cellpolicy_pipeline.sh" | |
| chmod +x "${PIPE}" | |
| launch() { | |
| # Usage: launch <variant> <gpu> <KEY=VALUE>... | |
| local variant="$1" gpu="$2" | |
| shift 2 | |
| local out="${SWEEP_ROOT}/${variant}" | |
| mkdir -p "${out}" | |
| echo "[launch] ${variant} on GPU ${gpu} out=${out}" | |
| nohup env VARIANT="${variant}" GPU="${gpu}" OUTPUT_ROOT="${out}" "$@" \ | |
| bash "${PIPE}" > "${out}/console.log" 2>&1 & | |
| local pid=$! | |
| disown "${pid}" || true | |
| echo "${variant}=${pid}" >> "${SWEEP_ROOT}/PIDS.txt" | |
| } | |
| launch strawman_a_lr2e5 0 \ | |
| SFT_LR=2e-5 GRPO_LR=5e-6 SFT_MAX_STEPS=3000 GRPO_MAX_STEPS=1500 \ | |
| PENALTY_MISSING=1.0 EXACT_MATCH_BONUS=1.0 CARD_MISMATCH_PEN=1.5 \ | |
| SFT_OVERSAMPLE=3 | |
| launch strawman_b_lr5e5 1 \ | |
| SFT_LR=5e-5 GRPO_LR=5e-6 SFT_MAX_STEPS=4000 GRPO_MAX_STEPS=1500 \ | |
| PENALTY_MISSING=1.0 EXACT_MATCH_BONUS=1.0 CARD_MISMATCH_PEN=1.5 \ | |
| SFT_OVERSAMPLE=3 | |
| echo "[launch] sweep root: ${SWEEP_ROOT}" | |
| echo "[launch] PIDs:" | |
| cat "${SWEEP_ROOT}/PIDS.txt" | |