#!/usr/bin/env bash # Adaptive-k thought-token baseline (experiment D in the 2x2 ablation). # # Same single-stage, whole-puzzle setup as launch_simple_baseline.sh # (experiment C, the "strawman"); same model, LoRA, JSONL, chat template. # The ONLY change is that the SFT loss uses the recurrent_hidden mechanism # with k thought tokens, and k grows automatically when the rolling-mean # loss plateaus. set -euo pipefail ROOT=/home/ubuntu/curriculum_cot SCRIPT=${ROOT}/_runs/adaptive_latent_baseline_sudoku_train.py PYTHON_BIN=/opt/pytorch/bin/python TRAIN_JSONL=${ROOT}/data/sudoku_t3_20empty_value_qwen_text_stage1_train.jsonl EVAL_JSONL=${ROOT}/data/sudoku_t3_20empty_value_qwen_text_stage1_eval.jsonl SWEEP_ROOT=${ROOT}/_runs/adaptive_latent_$(date +%Y%m%d_%H%M%S) mkdir -p "${SWEEP_ROOT}" echo "${SWEEP_ROOT}" > "${ROOT}/_runs/current_adaptive_latent_sweep_dir" echo "SWEEP_ROOT=${SWEEP_ROOT}" export TOKENIZERS_PARALLELISM=false export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True export HF_HOME="${ROOT}/.hf_cache" export TRANSFORMERS_CACHE="${ROOT}/.hf_cache" export WANDB_MODE=offline run_variant() { local gpu="$1" tag="$2" lr="$3" max_k="$4" min_steps_per_k="$5" local out=${SWEEP_ROOT}/${tag} mkdir -p "${out}" local log=${out}/train.log : > "${log}" ( export CUDA_VISIBLE_DEVICES="${gpu}" "${PYTHON_BIN}" -u "${SCRIPT}" \ --train_jsonl "${TRAIN_JSONL}" \ --eval_jsonl "${EVAL_JSONL}" \ --output_dir "${out}" \ --learning_rate "${lr}" \ --max_steps 4000 \ --per_device_train_batch_size 4 \ --gradient_accumulation_steps 2 \ --logging_steps 25 \ --save_steps 500 \ --eval_every_steps 500 \ --eval_rows 50 \ --max_completion_length 96 \ --max_prompt_length 1024 \ --lora_r 32 --lora_alpha 64 --lora_dropout 0.05 \ --enable_gradient_checkpointing \ --start_k 0 \ --max_k "${max_k}" \ --min_steps_per_k "${min_steps_per_k}" \ --plateau_window 100 \ --plateau_eps 0.005 \ --converged_eps 0.001 \ --seed 0 \ >> "${log}" 2>&1 ) >/dev/null 2>&1 & local pid=$! echo "$pid $gpu $tag" >> "${SWEEP_ROOT}/PIDS.txt" disown $pid 2>/dev/null || true printf 'GPU %s -> %s pid=%s log=%s\n' "$gpu" "$tag" "$pid" "$log" } # 2 variants on idle GPUs 2,3: # - adaptive_a: same LR (5e-5) as strawman variant a, max_k=4, min_steps_per_k=400 # - adaptive_b: smaller min_steps_per_k=250 to grow k more aggressively run_variant 2 adaptive_a_lr5e5_maxk4 5e-5 4 400 run_variant 3 adaptive_b_lr5e5_fastgrow 5e-5 4 250 echo echo "=== launched ===" cat "${SWEEP_ROOT}/PIDS.txt"