#!/usr/bin/env bash set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" PYTHON_BIN="${PYTHON_BIN:-${ROOT}/.venv/bin/python}" TRAINER="${ROOT}/multi_output_cell_policy/sft_multi_output_train.py" TRAIN_JSONL="${TRAIN_JSONL:-${ROOT}/data/sudoku_t3_30empty_value_qwen_text.jsonl}" CACHE_DIR="${CACHE_DIR:-${ROOT}/.hf_cache}" MODEL_NAME="${MODEL_NAME:-Qwen/Qwen2.5-0.5B-Instruct}" GPU_ID="${GPU_ID:-0}" GPU_IDS="${GPU_IDS:-0,1,2,3,4,5,6,7}" NUM_PROCESSES="${NUM_PROCESSES:-1}" STAGE_I="${STAGE_I:-2}" RUN_TAG="${RUN_TAG:-$(date +%Y%m%d_%H%M%S)}" OUTPUT_ROOT="${OUTPUT_ROOT:-${ROOT}/final_checkpoint/large_baseline_extension/hard_9x9_qwen05b/baseline/sft}" OUTPUT_DIR="${OUTPUT_DIR:-${OUTPUT_ROOT}/i${STAGE_I}_${RUN_TAG}}" WANDB_PROJECT="${WANDB_PROJECT:-sudoku-multi-output-sft}" WANDB_RUN_NAME="${WANDB_RUN_NAME:-large_baseline_noloc_sft_i${STAGE_I}_${RUN_TAG}}" mkdir -p "${OUTPUT_DIR}" export CUDA_DEVICE_ORDER=PCI_BUS_ID if [[ "${NUM_PROCESSES}" -gt 1 ]]; then export CUDA_VISIBLE_DEVICES="${GPU_IDS}" else export CUDA_VISIBLE_DEVICES="${GPU_ID}" fi export PYTORCH_CUDA_ALLOC_CONF="${PYTORCH_CUDA_ALLOC_CONF:-expandable_segments:True}" if [[ "${NUM_PROCESSES}" -gt 1 ]]; then cmd=( "${PYTHON_BIN}" -m torch.distributed.run --standalone --nproc_per_node "${NUM_PROCESSES}" "${TRAINER}" ) else cmd=( "${PYTHON_BIN}" -u "${TRAINER}" ) fi cmd+=( --model_name "${MODEL_NAME}" --train_jsonl "${TRAIN_JSONL}" --output_dir "${OUTPUT_DIR}" --cache_dir "${CACHE_DIR}" --gpu_id 0 --stage_i "${STAGE_I}" --total_empties_hint "${TOTAL_EMPTIES_HINT:-30}" --gradient_accumulation_steps "${GRADIENT_ACCUMULATION_STEPS:-4}" --num_epochs "${NUM_EPOCHS:-1.0}" --learning_rate "${LEARNING_RATE:-2e-4}" --weight_decay "${WEIGHT_DECAY:-0.0}" --enable_gradient_checkpointing --logging_steps "${LOGGING_STEPS:-10}" --save_steps "${SAVE_STEPS:-100}" --eval_steps "${EVAL_STEPS:-100}" --eval_rows "${EVAL_ROWS:-20}" --max_completion_length "${MAX_COMPLETION_LENGTH:-24}" --wandb_project "${WANDB_PROJECT}" --wandb_run_name "${WANDB_RUN_NAME}" --wandb_mode "${WANDB_MODE:-offline}" ) if [[ -n "${INIT_ADAPTER_DIR:-}" ]]; then cmd+=(--init_adapter_dir "${INIT_ADAPTER_DIR}") fi if [[ "${WANDB_MODE:-offline}" != "offline" ]]; then cmd+=(--use_wandb) fi if [[ -n "${WANDB_ENTITY:-}" ]]; then cmd+=(--wandb_entity "${WANDB_ENTITY}") fi if [[ -n "${LIMIT_TRAIN_ROWS:-}" ]]; then cmd+=(--limit_train_rows "${LIMIT_TRAIN_ROWS}") fi if [[ -n "${MAX_STEPS:-}" ]]; then cmd+=(--max_steps "${MAX_STEPS}") fi printf 'Launching hard 9x9 baseline SFT on GPUs %s\n' "${CUDA_VISIBLE_DEVICES}" printf 'Output dir: %s\n' "${OUTPUT_DIR}" printf 'Stage=%s processes=%s\n' "${STAGE_I}" "${NUM_PROCESSES}" "${cmd[@]}"