YUNTA88 commited on
Commit
37a7702
·
verified ·
1 Parent(s): 2ff6c5a

Upload root_scripts/run_rlvr.sh with huggingface_hub

Browse files
Files changed (1) hide show
  1. root_scripts/run_rlvr.sh +100 -0
root_scripts/run_rlvr.sh ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # One-Shot RLVR: Physics (VLM) - Corrected for absolute paths
3
+ # Based on training_physics_mechanics_oe.sh (canonical VLM adaptation)
4
+ # Usage: bash run_rlvr.sh <MODEL_PATH> <EXPERIMENT_NAME>
5
+
6
+ set -x
7
+
8
+ # ==================== Accept CLI args ====================
9
+ MODEL_PATH="${1:-/workspace/rl4phyx/models/Qwen2.5-VL-3B-Instruct}"
10
+ EXPERIMENT_NAME="${2:-rlvr_baseline}"
11
+
12
+ # ==================== Configuration ====================
13
+ WANDB_PROJECT=${WANDB_PROJECT:-"physics_one_shot_rlvr"}
14
+ BASE="/workspace/rl4phyx/RL4Phyx/ZeroSearch/One-Shot-RLVR"
15
+ DATA_FILE="${BASE}/data/train/physics_vlm/mechanics/mechanics_1_rl_numerical.parquet"
16
+ VAL_FILE="/workspace/rl4phyx/RL4Phyx/oneshot/validation_data/metaphyx_oe_1533.parquet"
17
+ IMAGE_DIR="${BASE}/data/train/physics_vlm/mechanics"
18
+
19
+ # GPU
20
+ N_GPUS=4
21
+ GPUS="0,1,2,3"
22
+
23
+ # Training Hyperparameters (aligned with original One-Shot RLVR)
24
+ BATCH_SIZE="128"
25
+ LEARNING_RATE="1e-6"
26
+ ROLLOUT_N="8"
27
+ TEMPERATURE="0.7"
28
+ TENSOR_PARALLEL="2"
29
+
30
+ # Checkpoints
31
+ CHECKPOINT_DIR="/workspace/rl4phyx/checkpoints/${EXPERIMENT_NAME}"
32
+
33
+ echo "=========================================="
34
+ echo "One-Shot RLVR: Physics (VLM)"
35
+ echo "Model: ${MODEL_PATH}"
36
+ echo "Experiment: ${EXPERIMENT_NAME}"
37
+ echo "Data: ${DATA_FILE}"
38
+ echo "Checkpoint: ${CHECKPOINT_DIR}"
39
+ echo "=========================================="
40
+
41
+ mkdir -p $CHECKPOINT_DIR
42
+ mkdir -p /workspace/rl4phyx/logs
43
+
44
+ # IMPORTANT: Run from /workspace/rl4phyx so Python imports pip veRL v0.7
45
+ # NOT from One-Shot-RLVR dir (which has local verl/ v0.2)
46
+ cd /workspace/rl4phyx
47
+
48
+ # vLLM backend
49
+ export VLLM_ATTENTION_BACKEND=XFORMERS
50
+ export VLLM_USE_TRITON_FLASH_ATTN=0
51
+ export WANDB_API_KEY=${WANDB_API_KEY:-""}
52
+
53
+ # ==================== Training ====================
54
+ CUDA_VISIBLE_DEVICES=${GPUS} python3 -m verl.trainer.main_ppo \
55
+ algorithm.adv_estimator=grpo \
56
+ +data_domain=physics \
57
+ data.train_files=${DATA_FILE} \
58
+ data.val_files=${VAL_FILE} \
59
+ data.train_batch_size=${BATCH_SIZE} \
60
+ data.val_batch_size=1533 \
61
+ data.max_prompt_length=2048 \
62
+ data.max_response_length=3072 \
63
+ +data.is_multimodal=True \
64
+ +data.vlm_model=${MODEL_PATH} \
65
+ +data.image_dir=${IMAGE_DIR} \
66
+ reward_model.reward_manager='naive' \
67
+ actor_rollout_ref.model.path=${MODEL_PATH} \
68
+ actor_rollout_ref.actor.optim.lr=${LEARNING_RATE} \
69
+ actor_rollout_ref.model.use_remove_padding=False \
70
+ actor_rollout_ref.actor.ppo_mini_batch_size=${BATCH_SIZE} \
71
+ actor_rollout_ref.actor.use_dynamic_bsz=True \
72
+ actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \
73
+ actor_rollout_ref.actor.use_kl_loss=True \
74
+ actor_rollout_ref.actor.kl_loss_coef=0.001 \
75
+ actor_rollout_ref.actor.kl_loss_type=low_var_kl \
76
+ actor_rollout_ref.model.enable_gradient_checkpointing=True \
77
+ actor_rollout_ref.actor.fsdp_config.param_offload=False \
78
+ +actor_rollout_ref.actor.fsdp_config.grad_offload=False \
79
+ actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
80
+ actor_rollout_ref.rollout.tensor_model_parallel_size=${TENSOR_PARALLEL} \
81
+ actor_rollout_ref.rollout.name=vllm \
82
+ actor_rollout_ref.rollout.temperature=${TEMPERATURE} \
83
+ +actor_rollout_ref.rollout.val_temperature=${TEMPERATURE} \
84
+ actor_rollout_ref.rollout.gpu_memory_utilization=0.7 \
85
+ actor_rollout_ref.rollout.n=${ROLLOUT_N} \
86
+ +actor_rollout_ref.rollout.n_val=1 \
87
+ actor_rollout_ref.ref.fsdp_config.param_offload=True \
88
+ algorithm.kl_ctrl.kl_coef=0.001 \
89
+ trainer.critic_warmup=0 \
90
+ trainer.logger=['console','wandb'] \
91
+ trainer.project_name=${WANDB_PROJECT} \
92
+ trainer.experiment_name=${EXPERIMENT_NAME} \
93
+ trainer.checkpoints_dir=$CHECKPOINT_DIR \
94
+ +trainer.val_before_train=True \
95
+ trainer.n_gpus_per_node=${N_GPUS} \
96
+ trainer.nnodes=1 \
97
+ trainer.save_freq=20 \
98
+ trainer.test_freq=20 \
99
+ trainer.default_hdfs_dir=null \
100
+ trainer.total_epochs=2000 2>&1 | tee /workspace/rl4phyx/logs/${EXPERIMENT_NAME}.log