| #!/usr/bin/env bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| set -euo pipefail |
|
|
| ROOT=/home/ubuntu/curriculum_cot |
| SCRIPT=${ROOT}/_runs/simple_baseline_sudoku_train.py |
| PYTHON_BIN=/opt/pytorch/bin/python |
|
|
| TRAIN_JSONL=${ROOT}/data/sudoku_t3_20empty_value_qwen_text_stage1_train.jsonl |
| EVAL_JSONL=${ROOT}/data/sudoku_t3_20empty_value_qwen_text_stage1_eval.jsonl |
|
|
| SWEEP_ROOT=${ROOT}/_runs/strawman_baseline_$(date +%Y%m%d_%H%M%S) |
| mkdir -p "${SWEEP_ROOT}" |
| echo "${SWEEP_ROOT}" > "${ROOT}/_runs/current_strawman_sweep_dir" |
| echo "SWEEP_ROOT=${SWEEP_ROOT}" |
|
|
| export TOKENIZERS_PARALLELISM=false |
| export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True |
| export HF_HOME="${ROOT}/.hf_cache" |
| export TRANSFORMERS_CACHE="${ROOT}/.hf_cache" |
| export WANDB_MODE=offline |
|
|
| run_pipeline() { |
| local gpu="$1" tag="$2" sft_lr="$3" grpo_lr="$4" sft_max="$5" grpo_max="$6" |
| local out=${SWEEP_ROOT}/${tag} |
| mkdir -p "${out}" |
| local log=${out}/pipeline.log |
| : > "${log}" |
| ( |
| export CUDA_VISIBLE_DEVICES="${gpu}" |
| echo "[$(date +%H:%M:%S)] === ${tag} on GPU ${gpu}: SFT lr=${sft_lr} max_steps=${sft_max} ===" >> "${log}" |
| "${PYTHON_BIN}" -u "${SCRIPT}" \ |
| --phase sft \ |
| --train_jsonl "${TRAIN_JSONL}" \ |
| --eval_jsonl "${EVAL_JSONL}" \ |
| --output_dir "${out}/sft" \ |
| --learning_rate "${sft_lr}" \ |
| --max_steps "${sft_max}" \ |
| --per_device_train_batch_size 8 \ |
| --gradient_accumulation_steps 2 \ |
| --num_epochs 8 \ |
| --logging_steps 25 \ |
| --save_steps 200 \ |
| --eval_rows 100 \ |
| --max_completion_length 96 \ |
| --max_prompt_length 1024 \ |
| --lora_r 32 --lora_alpha 64 --lora_dropout 0.05 \ |
| --seed 0 \ |
| >> "${log}" 2>&1 |
|
|
| echo "[$(date +%H:%M:%S)] === ${tag} on GPU ${gpu}: GRPO lr=${grpo_lr} max_steps=${grpo_max} ===" >> "${log}" |
| "${PYTHON_BIN}" -u "${SCRIPT}" \ |
| --phase grpo \ |
| --init_adapter_dir "${out}/sft/final" \ |
| --train_jsonl "${TRAIN_JSONL}" \ |
| --eval_jsonl "${EVAL_JSONL}" \ |
| --output_dir "${out}/grpo" \ |
| --learning_rate "${grpo_lr}" \ |
| --max_steps "${grpo_max}" \ |
| --per_device_train_batch_size 4 \ |
| --gradient_accumulation_steps 2 \ |
| --num_generations 8 \ |
| --beta 0.0 \ |
| --temperature 1.0 \ |
| --num_epochs 50 \ |
| --logging_steps 25 \ |
| --save_steps 200 \ |
| --eval_rows 100 \ |
| --max_completion_length 96 \ |
| --max_prompt_length 1024 \ |
| --lora_r 32 --lora_alpha 64 --lora_dropout 0.05 \ |
| --seed 0 \ |
| >> "${log}" 2>&1 |
| echo "[$(date +%H:%M:%S)] === ${tag} DONE ===" >> "${log}" |
| ) >/dev/null 2>&1 & |
| local pid=$! |
| echo "$pid $gpu $tag" >> "${SWEEP_ROOT}/PIDS.txt" |
| disown $pid 2>/dev/null || true |
| printf 'GPU %s -> %s pid=%s log=%s\n' "$gpu" "$tag" "$pid" "$log" |
| } |
|
|
| |
| run_pipeline 0 strawman_a_sft5e5_grpo5e6 5e-5 5e-6 2000 1500 |
| run_pipeline 1 strawman_b_sft1e4_grpo5e6 1e-4 5e-6 2000 1500 |
|
|
| echo |
| echo "=== launched ===" |
| cat "${SWEEP_ROOT}/PIDS.txt" |
|
|