curriculum-cot-code / _runs /launch_adaptive_k_cellpolicy.sh
Avra98's picture
Add data/ JSONLs + _runs/ launch scripts (override .gitignore)
48c96cf verified
#!/usr/bin/env bash
# Launch two adaptive-k variants (single-stage cell-policy at stage_i=3,
# no curriculum, but with growing recurrent-hidden thought tokens k).
set -euo pipefail
ROOT="${ROOT:-/home/ubuntu/curriculum_cot}"
TS="$(date +%Y%m%d_%H%M%S)"
SWEEP_ROOT="${ROOT}/_runs/adaptive_k_cellpolicy_${TS}"
mkdir -p "${SWEEP_ROOT}"
PY="${ROOT}/_runs/adaptive_k_cellpolicy_pipeline.py"
launch() {
# Usage: launch <variant> <gpu> <KEY=VALUE>... (ignored, args passed via positional CLI args)
local variant="$1" gpu="$2"
shift 2
local out="${SWEEP_ROOT}/${variant}"
mkdir -p "${out}"
echo "[launch] ${variant} on GPU ${gpu} out=${out}"
nohup /opt/pytorch/bin/python -u "${PY}" \
--variant "${variant}" \
--gpu "${gpu}" \
--output_root "${out}" \
"$@" > "${out}/console.log" 2>&1 &
local pid=$!
disown "${pid}" || true
echo "${variant}=${pid}" >> "${SWEEP_ROOT}/PIDS.txt"
}
# adaptive_a: classic schedule (start at k=0, plateau-bumps with eps=0.01).
launch adaptive_a_eps01 2 \
--start_k 0 --max_k 4 --steps_per_phase 600 --max_phases_per_k 2 \
--plateau_eps 0.01 --sft_lr 2e-5 --sft_bs 8 --sft_ga 4 \
--grpo_steps 1500 --grpo_lr 5e-6 --grpo_bs 8 --grpo_ga 4 --grpo_ng 8
# adaptive_b: faster k-growth (max_phases_per_k=1, force bump every phase).
launch adaptive_b_fastgrow 3 \
--start_k 0 --max_k 4 --steps_per_phase 800 --max_phases_per_k 1 \
--plateau_eps 1.0 --sft_lr 2e-5 --sft_bs 8 --sft_ga 4 \
--grpo_steps 1500 --grpo_lr 5e-6 --grpo_bs 8 --grpo_ga 4 --grpo_ng 8
echo "[launch] sweep root: ${SWEEP_ROOT}"
echo "[launch] PIDs:"
cat "${SWEEP_ROOT}/PIDS.txt"