File size: 333 Bytes
7e56b55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#!/bin/bash
set -x

source ~/miniconda3/etc/profile.d/conda.sh
conda activate verl_new

cd ~/RL4Phyx/SFT

# Use GPUs 4,5,6,7
export CUDA_VISIBLE_DEVICES=4,5,6,7
export PYTHONUNBUFFERED=1

# Full fine-tuning with DeepSpeed ZeRO-2
torchrun --nproc_per_node=4 --master_port=29501 \
    train_sft.py \
    2>&1 | tee sft_training_v5.log