Yuhan123's picture
Upload folder using huggingface_hub
a454c23 verified
#!/bin/bash
# Script to run PPO training with proper distributed setup
# Activate environment
source ~/miniconda3/bin/activate trl
# Set CUDA devices to avoid problematic GPU3
export CUDA_VISIBLE_DEVICES=0,1,2
# Disable NCCL shared memory to avoid hanging
export NCCL_SHM_DISABLE=1
export NCCL_P2P_DISABLE=1
# Remove CUDA debugging flag that was causing initialization issues
# export CUDA_LAUNCH_BLOCKING=1
# Run PPO training with accelerate following official TRL script format
accelerate launch --config_file accelerate_config.yaml \
train_ppo.py \
--dataset_name ./data/multipref_train.hf \
--model_name_or_path allenai/OLMo-2-0425-1B-SFT \
--sft_model_path allenai/OLMo-2-0425-1B-SFT \
--reward_model_path ./results/reward_model_qwen_single \
--output_dir ./results/ppo_model \
--learning_rate 1.4e-5 \
--per_device_train_batch_size 3 \
--gradient_accumulation_steps 14 \
--total_episodes 500 \
--num_ppo_epochs 4 \
--response_length 128 \
--local_rollout_forward_batch_size 1 \
> ppo_training.log 2>&1 &
echo "PPO training started. Monitor progress with: tail -f ppo_training.log"