| # Usage: bash scripts/run_pretrain.sh [additional torchrun args] | |
| # Runs 8-GPU DDP pretraining via torchrun. | |
| # | |
| # Any extra arguments are forwarded verbatim to pretrain.py. | |
| # Examples: | |
| # bash scripts/run_pretrain.sh --max_steps 200000 | |
| # bash scripts/run_pretrain.sh --resume checkpoints/checkpoint-0010000 | |
| set -euo pipefail | |
| SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | |
| PROJECT_DIR="$(dirname "$SCRIPT_DIR")" | |
| torchrun \ | |
| --nproc_per_node=8 \ | |
| --master_port=29500 \ | |
| "$PROJECT_DIR/train/pretrain.py" \ | |
| --config "$PROJECT_DIR/configs/small.yaml" \ | |
| --train_data "$PROJECT_DIR/data/train.bin" \ | |
| --val_data "$PROJECT_DIR/data/val.bin" \ | |
| --checkpoint_dir "$PROJECT_DIR/checkpoints" \ | |
| --batch_size 8 \ | |
| --grad_accum 4 \ | |
| --warmup_steps 2000 \ | |
| "$@" | |