curriculum-cot-code / _runs /launch_strawman_cellpolicy.sh
Avra98's picture
Add data/ JSONLs + _runs/ launch scripts (override .gitignore)
48c96cf verified
#!/usr/bin/env bash
# Launch two strawman variants (single-stage cell-policy at stage_i=3, no
# curriculum, no thought tokens) on GPUs 0 and 1.
set -euo pipefail
ROOT="${ROOT:-/home/ubuntu/curriculum_cot}"
TS="$(date +%Y%m%d_%H%M%S)"
SWEEP_ROOT="${ROOT}/_runs/strawman_cellpolicy_${TS}"
mkdir -p "${SWEEP_ROOT}"
PIPE="${ROOT}/_runs/strawman_cellpolicy_pipeline.sh"
chmod +x "${PIPE}"
launch() {
# Usage: launch <variant> <gpu> <KEY=VALUE>...
local variant="$1" gpu="$2"
shift 2
local out="${SWEEP_ROOT}/${variant}"
mkdir -p "${out}"
echo "[launch] ${variant} on GPU ${gpu} out=${out}"
nohup env VARIANT="${variant}" GPU="${gpu}" OUTPUT_ROOT="${out}" "$@" \
bash "${PIPE}" > "${out}/console.log" 2>&1 &
local pid=$!
disown "${pid}" || true
echo "${variant}=${pid}" >> "${SWEEP_ROOT}/PIDS.txt"
}
launch strawman_a_lr2e5 0 \
SFT_LR=2e-5 GRPO_LR=5e-6 SFT_MAX_STEPS=3000 GRPO_MAX_STEPS=1500 \
PENALTY_MISSING=1.0 EXACT_MATCH_BONUS=1.0 CARD_MISMATCH_PEN=1.5 \
SFT_OVERSAMPLE=3
launch strawman_b_lr5e5 1 \
SFT_LR=5e-5 GRPO_LR=5e-6 SFT_MAX_STEPS=4000 GRPO_MAX_STEPS=1500 \
PENALTY_MISSING=1.0 EXACT_MATCH_BONUS=1.0 CARD_MISMATCH_PEN=1.5 \
SFT_OVERSAMPLE=3
echo "[launch] sweep root: ${SWEEP_ROOT}"
echo "[launch] PIDs:"
cat "${SWEEP_ROOT}/PIDS.txt"