File size: 788 Bytes
48ecd01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/bin/bash
# Usage: bash scripts/run_pretrain.sh [additional torchrun args]
# Runs 8-GPU DDP pretraining via torchrun.
#
# Any extra arguments are forwarded verbatim to pretrain.py.
# Examples:
#   bash scripts/run_pretrain.sh --max_steps 200000
#   bash scripts/run_pretrain.sh --resume checkpoints/checkpoint-0010000

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"

torchrun \
  --nproc_per_node=8 \
  --master_port=29500 \
  "$PROJECT_DIR/train/pretrain.py" \
  --config "$PROJECT_DIR/configs/small.yaml" \
  --train_data "$PROJECT_DIR/data/train.bin" \
  --val_data "$PROJECT_DIR/data/val.bin" \
  --checkpoint_dir "$PROJECT_DIR/checkpoints" \
  --batch_size 8 \
  --grad_accum 4 \
  --warmup_steps 2000 \
  "$@"