File size: 788 Bytes
48ecd01 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | #!/bin/bash
# Usage: bash scripts/run_pretrain.sh [additional torchrun args]
# Runs 8-GPU DDP pretraining via torchrun.
#
# Any extra arguments are forwarded verbatim to pretrain.py.
# Examples:
# bash scripts/run_pretrain.sh --max_steps 200000
# bash scripts/run_pretrain.sh --resume checkpoints/checkpoint-0010000
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
torchrun \
--nproc_per_node=8 \
--master_port=29500 \
"$PROJECT_DIR/train/pretrain.py" \
--config "$PROJECT_DIR/configs/small.yaml" \
--train_data "$PROJECT_DIR/data/train.bin" \
--val_data "$PROJECT_DIR/data/val.bin" \
--checkpoint_dir "$PROJECT_DIR/checkpoints" \
--batch_size 8 \
--grad_accum 4 \
--warmup_steps 2000 \
"$@"
|