| set -x | |
| export TOKENIZERS_PARALLELISM=false | |
| if [ -z "$CUDA_VISIBLE_DEVICES" ]; then | |
| NPROC_PER_NODE=$(nvidia-smi -L | wc -l) | |
| else | |
| # 可见 GPU 数量 | |
| NPROC_PER_NODE=$(echo $CUDA_VISIBLE_DEVICES | tr ',' '\n' | wc -l) | |
| fi | |
| echo "Using NPROC_PER_NODE=$NPROC_PER_NODE GPUs" | |
| NNODES=${NNODES:=1} | |
| NPROC_PER_NODE=${NPROC_PER_NODE:=$NPROC_PER_NODE} | |
| NODE_RANK=${NODE_RANK:=0} | |
| MASTER_ADDR=${MASTER_ADDR:=0.0.0.0} | |
| MASTER_PORT=${MASTER_PORT:=62500} | |
| torchrun --nnodes=$NNODES --nproc-per-node $NPROC_PER_NODE --node-rank $NODE_RANK \ | |
| --master-addr=$MASTER_ADDR --master-port=$MASTER_PORT $@ 2>&1 | tee log.txt | |