| #!/usr/bin/env bash |
| set -euo pipefail |
|
|
| SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" |
| ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" |
| PYTHON_BIN="${PYTHON_BIN:-${ROOT}/.venv/bin/python}" |
| TRAINER="${ROOT}/multi_output_cell_policy/sft_multi_output_train.py" |
| TRAIN_JSONL="${TRAIN_JSONL:-${ROOT}/data/sudoku_t3_30empty_value_qwen_text.jsonl}" |
| CACHE_DIR="${CACHE_DIR:-${ROOT}/.hf_cache}" |
| MODEL_NAME="${MODEL_NAME:-Qwen/Qwen2.5-0.5B-Instruct}" |
| GPU_ID="${GPU_ID:-0}" |
| GPU_IDS="${GPU_IDS:-0,1,2,3,4,5,6,7}" |
| NUM_PROCESSES="${NUM_PROCESSES:-1}" |
| STAGE_I="${STAGE_I:-2}" |
| RUN_TAG="${RUN_TAG:-$(date +%Y%m%d_%H%M%S)}" |
| OUTPUT_ROOT="${OUTPUT_ROOT:-${ROOT}/final_checkpoint/large_baseline_extension/hard_9x9_qwen05b/baseline/sft}" |
| OUTPUT_DIR="${OUTPUT_DIR:-${OUTPUT_ROOT}/i${STAGE_I}_${RUN_TAG}}" |
| WANDB_PROJECT="${WANDB_PROJECT:-sudoku-multi-output-sft}" |
| WANDB_RUN_NAME="${WANDB_RUN_NAME:-large_baseline_noloc_sft_i${STAGE_I}_${RUN_TAG}}" |
|
|
| mkdir -p "${OUTPUT_DIR}" |
| export CUDA_DEVICE_ORDER=PCI_BUS_ID |
| if [[ "${NUM_PROCESSES}" -gt 1 ]]; then |
| export CUDA_VISIBLE_DEVICES="${GPU_IDS}" |
| else |
| export CUDA_VISIBLE_DEVICES="${GPU_ID}" |
| fi |
| export PYTORCH_CUDA_ALLOC_CONF="${PYTORCH_CUDA_ALLOC_CONF:-expandable_segments:True}" |
|
|
| if [[ "${NUM_PROCESSES}" -gt 1 ]]; then |
| cmd=( |
| "${PYTHON_BIN}" -m torch.distributed.run --standalone --nproc_per_node "${NUM_PROCESSES}" "${TRAINER}" |
| ) |
| else |
| cmd=( |
| "${PYTHON_BIN}" -u "${TRAINER}" |
| ) |
| fi |
|
|
| cmd+=( |
| --model_name "${MODEL_NAME}" |
| --train_jsonl "${TRAIN_JSONL}" |
| --output_dir "${OUTPUT_DIR}" |
| --cache_dir "${CACHE_DIR}" |
| --gpu_id 0 |
| --stage_i "${STAGE_I}" |
| --total_empties_hint "${TOTAL_EMPTIES_HINT:-30}" |
| --gradient_accumulation_steps "${GRADIENT_ACCUMULATION_STEPS:-4}" |
| --num_epochs "${NUM_EPOCHS:-1.0}" |
| --learning_rate "${LEARNING_RATE:-2e-4}" |
| --weight_decay "${WEIGHT_DECAY:-0.0}" |
| --enable_gradient_checkpointing |
| --logging_steps "${LOGGING_STEPS:-10}" |
| --save_steps "${SAVE_STEPS:-100}" |
| --eval_steps "${EVAL_STEPS:-100}" |
| --eval_rows "${EVAL_ROWS:-20}" |
| --max_completion_length "${MAX_COMPLETION_LENGTH:-24}" |
| --wandb_project "${WANDB_PROJECT}" |
| --wandb_run_name "${WANDB_RUN_NAME}" |
| --wandb_mode "${WANDB_MODE:-offline}" |
| ) |
|
|
| if [[ -n "${INIT_ADAPTER_DIR:-}" ]]; then |
| cmd+=(--init_adapter_dir "${INIT_ADAPTER_DIR}") |
| fi |
|
|
| if [[ "${WANDB_MODE:-offline}" != "offline" ]]; then |
| cmd+=(--use_wandb) |
| fi |
|
|
| if [[ -n "${WANDB_ENTITY:-}" ]]; then |
| cmd+=(--wandb_entity "${WANDB_ENTITY}") |
| fi |
|
|
| if [[ -n "${LIMIT_TRAIN_ROWS:-}" ]]; then |
| cmd+=(--limit_train_rows "${LIMIT_TRAIN_ROWS}") |
| fi |
|
|
| if [[ -n "${MAX_STEPS:-}" ]]; then |
| cmd+=(--max_steps "${MAX_STEPS}") |
| fi |
|
|
| printf 'Launching hard 9x9 baseline SFT on GPUs %s\n' "${CUDA_VISIBLE_DEVICES}" |
| printf 'Output dir: %s\n' "${OUTPUT_DIR}" |
| printf 'Stage=%s processes=%s\n' "${STAGE_I}" "${NUM_PROCESSES}" |
|
|
| "${cmd[@]}" |
|
|