#!/usr/bin/env bash # Launch two strawman variants (single-stage cell-policy at stage_i=3, no # curriculum, no thought tokens) on GPUs 0 and 1. set -euo pipefail ROOT="${ROOT:-/home/ubuntu/curriculum_cot}" TS="$(date +%Y%m%d_%H%M%S)" SWEEP_ROOT="${ROOT}/_runs/strawman_cellpolicy_${TS}" mkdir -p "${SWEEP_ROOT}" PIPE="${ROOT}/_runs/strawman_cellpolicy_pipeline.sh" chmod +x "${PIPE}" launch() { # Usage: launch ... local variant="$1" gpu="$2" shift 2 local out="${SWEEP_ROOT}/${variant}" mkdir -p "${out}" echo "[launch] ${variant} on GPU ${gpu} out=${out}" nohup env VARIANT="${variant}" GPU="${gpu}" OUTPUT_ROOT="${out}" "$@" \ bash "${PIPE}" > "${out}/console.log" 2>&1 & local pid=$! disown "${pid}" || true echo "${variant}=${pid}" >> "${SWEEP_ROOT}/PIDS.txt" } launch strawman_a_lr2e5 0 \ SFT_LR=2e-5 GRPO_LR=5e-6 SFT_MAX_STEPS=3000 GRPO_MAX_STEPS=1500 \ PENALTY_MISSING=1.0 EXACT_MATCH_BONUS=1.0 CARD_MISMATCH_PEN=1.5 \ SFT_OVERSAMPLE=3 launch strawman_b_lr5e5 1 \ SFT_LR=5e-5 GRPO_LR=5e-6 SFT_MAX_STEPS=4000 GRPO_MAX_STEPS=1500 \ PENALTY_MISSING=1.0 EXACT_MATCH_BONUS=1.0 CARD_MISMATCH_PEN=1.5 \ SFT_OVERSAMPLE=3 echo "[launch] sweep root: ${SWEEP_ROOT}" echo "[launch] PIDs:" cat "${SWEEP_ROOT}/PIDS.txt"