curriculum-cot-code / small_model_20empty /launch_small_baseline_pipeline.sh
Avra98's picture
Initial code dump (rebuttal-ready snapshot)
76de008 verified
#!/usr/bin/env bash
set -euo pipefail
ROOT="/home/ubuntu/curriculum-CoT"
PYTHON_BIN="${PYTHON_BIN:-${ROOT}/.venv/bin/python}"
PIPELINE="${ROOT}/sudoku/llm_policy_icon/small_model_20empty/run_small_baseline_pipeline.py"
TRAIN_JSONL="${TRAIN_JSONL:-${ROOT}/sudoku/llm_policy_icon/data/sudoku_t3_20empty_value_qwen_text.jsonl}"
CACHE_DIR="${CACHE_DIR:-${ROOT}/.hf_cache}"
MODEL_NAME="${MODEL_NAME:-Qwen/Qwen2.5-0.5B-Instruct}"
GPU_IDS="${GPU_IDS:-0}"
NUM_PROCESSES="${NUM_PROCESSES:-1}"
MIN_STAGE="${MIN_STAGE:-1}"
MAX_STAGE="${MAX_STAGE:-4}"
RUN_TAG="${RUN_TAG:-$(date +%Y%m%d_%H%M%S)}"
CHECKPOINT_ROOT="${CHECKPOINT_ROOT:-${ROOT}/sudoku/llm_policy_icon/final_checkpoint/small_model_20empty/baseline}"
OUTPUT_ROOT="${OUTPUT_ROOT:-${CHECKPOINT_ROOT}/${RUN_TAG}/baseline_pipeline_20empty_4stage_small}"
cmd=(
"${PYTHON_BIN}" "${PIPELINE}"
--python_executable "${PYTHON_BIN}"
--train_jsonl "${TRAIN_JSONL}"
--cache_dir "${CACHE_DIR}"
--model_name "${MODEL_NAME}"
--checkpoint_root "${CHECKPOINT_ROOT}"
--output_root "${OUTPUT_ROOT}"
--run_tag "${RUN_TAG}"
--min_stage "${MIN_STAGE}"
--max_stage "${MAX_STAGE}"
--distributed_gpu_ids "${GPU_IDS}"
--sft_num_processes "${NUM_PROCESSES}"
--grpo_num_processes "${NUM_PROCESSES}"
--total_empties_hint "${TOTAL_EMPTIES_HINT:-20}"
--sft_num_epochs "${SFT_NUM_EPOCHS:-1.0}"
--grpo_num_train_epochs "${GRPO_NUM_TRAIN_EPOCHS:-0.5}"
--sft_gradient_accumulation_steps "${SFT_GRADIENT_ACCUMULATION_STEPS:-8}"
--grpo_per_device_train_batch_size "${GRPO_PER_DEVICE_TRAIN_BATCH_SIZE:-2}"
--grpo_gradient_accumulation_steps "${GRPO_GRADIENT_ACCUMULATION_STEPS:-4}"
--grpo_num_generations "${GRPO_NUM_GENERATIONS:-2}"
--sft_enable_gradient_checkpointing
--grpo_enable_gradient_checkpointing
--sft_save_steps "${SFT_SAVE_STEPS:-100}"
--sft_eval_steps "${SFT_EVAL_STEPS:-100}"
--grpo_save_steps "${GRPO_SAVE_STEPS:-25}"
--grpo_eval_steps "${GRPO_EVAL_STEPS:-25}"
--phase_max_wall_clock_seconds "${PHASE_MAX_WALL_CLOCK_SECONDS:-21600}"
--wandb_mode "${WANDB_MODE:-offline}"
)
if [[ -n "${BOOTSTRAP_ADAPTER_DIR:-}" ]]; then
cmd+=(--bootstrap_adapter_dir "${BOOTSTRAP_ADAPTER_DIR}")
fi
if [[ -n "${STAGE1_INIT_ADAPTER_DIR:-}" ]]; then
cmd+=(--stage1_init_adapter_dir "${STAGE1_INIT_ADAPTER_DIR}")
fi
if [[ -n "${LIMIT_TRAIN_ROWS:-}" ]]; then
cmd+=(--limit_train_rows "${LIMIT_TRAIN_ROWS}")
fi
if [[ -n "${SFT_STAGE_MAX_STEPS:-}" ]]; then
cmd+=(--sft_stage_max_steps "${SFT_STAGE_MAX_STEPS}")
fi
if [[ -n "${GRPO_STAGE_MAX_STEPS:-}" ]]; then
cmd+=(--grpo_stage_max_steps "${GRPO_STAGE_MAX_STEPS}")
fi
if [[ -n "${WANDB_ENTITY:-}" ]]; then
cmd+=(--use_wandb --wandb_entity "${WANDB_ENTITY}")
fi
printf 'Launching small baseline pipeline on GPUs %s\n' "${GPU_IDS}"
printf 'Output root: %s\n' "${OUTPUT_ROOT}"
printf 'Stages: %s -> %s, processes=%s\n' "${MIN_STAGE}" "${MAX_STAGE}" "${NUM_PROCESSES}"
"${cmd[@]}"