curriculum-cot-code / large_baseline_extension /launch_nonlocation_sft.sh
Avra98's picture
Initial code dump (rebuttal-ready snapshot)
76de008 verified
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
PYTHON_BIN="${PYTHON_BIN:-${ROOT}/.venv/bin/python}"
TRAINER="${ROOT}/multi_output_cell_policy/sft_multi_output_train.py"
TRAIN_JSONL="${TRAIN_JSONL:-${ROOT}/data/sudoku_t3_30empty_value_qwen_text.jsonl}"
CACHE_DIR="${CACHE_DIR:-${ROOT}/.hf_cache}"
MODEL_NAME="${MODEL_NAME:-Qwen/Qwen2.5-0.5B-Instruct}"
GPU_ID="${GPU_ID:-0}"
GPU_IDS="${GPU_IDS:-0,1,2,3,4,5,6,7}"
NUM_PROCESSES="${NUM_PROCESSES:-1}"
STAGE_I="${STAGE_I:-2}"
RUN_TAG="${RUN_TAG:-$(date +%Y%m%d_%H%M%S)}"
OUTPUT_ROOT="${OUTPUT_ROOT:-${ROOT}/final_checkpoint/large_baseline_extension/hard_9x9_qwen05b/baseline/sft}"
OUTPUT_DIR="${OUTPUT_DIR:-${OUTPUT_ROOT}/i${STAGE_I}_${RUN_TAG}}"
WANDB_PROJECT="${WANDB_PROJECT:-sudoku-multi-output-sft}"
WANDB_RUN_NAME="${WANDB_RUN_NAME:-large_baseline_noloc_sft_i${STAGE_I}_${RUN_TAG}}"
mkdir -p "${OUTPUT_DIR}"
export CUDA_DEVICE_ORDER=PCI_BUS_ID
if [[ "${NUM_PROCESSES}" -gt 1 ]]; then
export CUDA_VISIBLE_DEVICES="${GPU_IDS}"
else
export CUDA_VISIBLE_DEVICES="${GPU_ID}"
fi
export PYTORCH_CUDA_ALLOC_CONF="${PYTORCH_CUDA_ALLOC_CONF:-expandable_segments:True}"
if [[ "${NUM_PROCESSES}" -gt 1 ]]; then
cmd=(
"${PYTHON_BIN}" -m torch.distributed.run --standalone --nproc_per_node "${NUM_PROCESSES}" "${TRAINER}"
)
else
cmd=(
"${PYTHON_BIN}" -u "${TRAINER}"
)
fi
cmd+=(
--model_name "${MODEL_NAME}"
--train_jsonl "${TRAIN_JSONL}"
--output_dir "${OUTPUT_DIR}"
--cache_dir "${CACHE_DIR}"
--gpu_id 0
--stage_i "${STAGE_I}"
--total_empties_hint "${TOTAL_EMPTIES_HINT:-30}"
--gradient_accumulation_steps "${GRADIENT_ACCUMULATION_STEPS:-4}"
--num_epochs "${NUM_EPOCHS:-1.0}"
--learning_rate "${LEARNING_RATE:-2e-4}"
--weight_decay "${WEIGHT_DECAY:-0.0}"
--enable_gradient_checkpointing
--logging_steps "${LOGGING_STEPS:-10}"
--save_steps "${SAVE_STEPS:-100}"
--eval_steps "${EVAL_STEPS:-100}"
--eval_rows "${EVAL_ROWS:-20}"
--max_completion_length "${MAX_COMPLETION_LENGTH:-24}"
--wandb_project "${WANDB_PROJECT}"
--wandb_run_name "${WANDB_RUN_NAME}"
--wandb_mode "${WANDB_MODE:-offline}"
)
if [[ -n "${INIT_ADAPTER_DIR:-}" ]]; then
cmd+=(--init_adapter_dir "${INIT_ADAPTER_DIR}")
fi
if [[ "${WANDB_MODE:-offline}" != "offline" ]]; then
cmd+=(--use_wandb)
fi
if [[ -n "${WANDB_ENTITY:-}" ]]; then
cmd+=(--wandb_entity "${WANDB_ENTITY}")
fi
if [[ -n "${LIMIT_TRAIN_ROWS:-}" ]]; then
cmd+=(--limit_train_rows "${LIMIT_TRAIN_ROWS}")
fi
if [[ -n "${MAX_STEPS:-}" ]]; then
cmd+=(--max_steps "${MAX_STEPS}")
fi
printf 'Launching hard 9x9 baseline SFT on GPUs %s\n' "${CUDA_VISIBLE_DEVICES}"
printf 'Output dir: %s\n' "${OUTPUT_DIR}"
printf 'Stage=%s processes=%s\n' "${STAGE_I}" "${NUM_PROCESSES}"
"${cmd[@]}"