Spaces:
Sleeping
Sleeping
| # launch_rlhf.sh - 启动PPO RLHF训练 | |
| echo "🚀 Starting PPO RLHF Training..." | |
| # 检查前置条件 | |
| echo "📋 Checking prerequisites..." | |
| # 检查Teacher模型是否存在 | |
| if [ ! -d "./merged_model" ]; then | |
| echo "❌ Error: Teacher model not found at ./merged_model" | |
| echo " Please run SFT training first and merge the model" | |
| exit 1 | |
| fi | |
| # 检查GPU资源 | |
| echo "📊 GPU Resources:" | |
| nvidia-smi --query-gpu=index,name,memory.total,memory.free --format=csv | |
| # 检查可用显存(建议至少80GB用于RLHF) | |
| AVAILABLE_MEMORY=$(nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits | awk '{sum+=$1} END {print sum}') | |
| echo "Available GPU Memory: ${AVAILABLE_MEMORY} MB" | |
| if [ "$AVAILABLE_MEMORY" -lt 80000 ]; then | |
| echo "⚠️ Warning: RLHF training requires significant GPU memory (>80GB recommended)" | |
| echo " Consider using gradient checkpointing or smaller batch sizes" | |
| fi | |
| # 设置环境变量 | |
| export CUDA_VISIBLE_DEVICES=0,1,2,3 # 根据可用GPU调整 | |
| export TOKENIZERS_PARALLELISM=false | |
| export WANDB_PROJECT="rlhf-teacher-training" | |
| export WANDB_RUN_NAME="ppo-rlhf-$(date +%Y%m%d_%H%M%S)" | |
| # 创建输出目录 | |
| mkdir -p ./rlhf_teacher_model | |
| mkdir -p ./rlhf_logs | |
| # 安装额外依赖 | |
| echo "📦 Installing RLHF dependencies..." | |
| pip install -r rlhf_requirements.txt | |
| # 启动训练 | |
| echo "🔥 Starting PPO RLHF training..." | |
| # 单GPU训练 | |
| if [ "$1" = "single" ]; then | |
| CUDA_VISIBLE_DEVICES=0 python ppo_rlhf_teacher.py 2>&1 | tee ./rlhf_logs/rlhf_$(date +%Y%m%d_%H%M%S).log | |
| # 多GPU训练(推荐) | |
| else | |
| accelerate launch \ | |
| --config_file accelerate_config.yaml \ | |
| --num_processes 4 \ | |
| --main_process_port 29500 \ | |
| ppo_rlhf_teacher.py 2>&1 | tee ./rlhf_logs/rlhf_$(date +%Y%m%d_%H%M%S).log | |
| fi | |
| echo "✅ RLHF training completed. Check logs for details." | |
| # 训练后评估 | |
| echo "🧪 Running post-training evaluation..." | |
| python evaluate_rlhf_model.py --model_path ./rlhf_teacher_model |