| # Launch two adaptive-k variants (single-stage cell-policy at stage_i=3, | |
| # no curriculum, but with growing recurrent-hidden thought tokens k). | |
| set -euo pipefail | |
| ROOT="${ROOT:-/home/ubuntu/curriculum_cot}" | |
| TS="$(date +%Y%m%d_%H%M%S)" | |
| SWEEP_ROOT="${ROOT}/_runs/adaptive_k_cellpolicy_${TS}" | |
| mkdir -p "${SWEEP_ROOT}" | |
| PY="${ROOT}/_runs/adaptive_k_cellpolicy_pipeline.py" | |
| launch() { | |
| # Usage: launch <variant> <gpu> <KEY=VALUE>... (ignored, args passed via positional CLI args) | |
| local variant="$1" gpu="$2" | |
| shift 2 | |
| local out="${SWEEP_ROOT}/${variant}" | |
| mkdir -p "${out}" | |
| echo "[launch] ${variant} on GPU ${gpu} out=${out}" | |
| nohup /opt/pytorch/bin/python -u "${PY}" \ | |
| --variant "${variant}" \ | |
| --gpu "${gpu}" \ | |
| --output_root "${out}" \ | |
| "$@" > "${out}/console.log" 2>&1 & | |
| local pid=$! | |
| disown "${pid}" || true | |
| echo "${variant}=${pid}" >> "${SWEEP_ROOT}/PIDS.txt" | |
| } | |
| # adaptive_a: classic schedule (start at k=0, plateau-bumps with eps=0.01). | |
| launch adaptive_a_eps01 2 \ | |
| --start_k 0 --max_k 4 --steps_per_phase 600 --max_phases_per_k 2 \ | |
| --plateau_eps 0.01 --sft_lr 2e-5 --sft_bs 8 --sft_ga 4 \ | |
| --grpo_steps 1500 --grpo_lr 5e-6 --grpo_bs 8 --grpo_ga 4 --grpo_ng 8 | |
| # adaptive_b: faster k-growth (max_phases_per_k=1, force bump every phase). | |
| launch adaptive_b_fastgrow 3 \ | |
| --start_k 0 --max_k 4 --steps_per_phase 800 --max_phases_per_k 1 \ | |
| --plateau_eps 1.0 --sft_lr 2e-5 --sft_bs 8 --sft_ga 4 \ | |
| --grpo_steps 1500 --grpo_lr 5e-6 --grpo_bs 8 --grpo_ga 4 --grpo_ng 8 | |
| echo "[launch] sweep root: ${SWEEP_ROOT}" | |
| echo "[launch] PIDs:" | |
| cat "${SWEEP_ROOT}/PIDS.txt" | |