File size: 507 Bytes
9645783
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
#!/bin/bash
cd /workspace/rl4phyx/RL4Phyx/SFT
export CUDA_VISIBLE_DEVICES=2,5,6,7
export PYTHONUNBUFFERED=1
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
LOG="/workspace/rl4phyx/logs/train_fullft_phyx_f_${TIMESTAMP}.log"
echo "Starting fullft_phyx_f training at $(date)" > "$LOG"
echo "4 GPUs (2,5,6,7), lr=1e-5, warmup=0.03, grad_accum=16, DeepSpeed ZeRO-2, batch=64" >> "$LOG"
torchrun --nproc_per_node=4 --master_port=29501 train_sft_phyx_fullft_freeze.py >> "$LOG" 2>&1
echo "TRAINING_COMPLETE at $(date)" >> "$LOG"