Update SofT-GRPO-deepscaler-8k-qwen7.sh
Browse files
SofT-GRPO-deepscaler-8k-qwen7.sh
CHANGED
|
@@ -50,7 +50,7 @@ actor_rollout_ref.ref.strategy=fsdp2 \
|
|
| 50 |
algorithm.use_kl_in_reward=False \
|
| 51 |
trainer.critic_warmup=0 \
|
| 52 |
trainer.logger=['console','tensorboard'] \
|
| 53 |
-
trainer.project_name='verl_soft-thinking-
|
| 54 |
trainer.experiment_name='8k-train' \
|
| 55 |
trainer.val_before_train=True \
|
| 56 |
trainer.n_gpus_per_node=8 \
|
|
@@ -58,7 +58,7 @@ trainer.nnodes=1 \
|
|
| 58 |
trainer.save_freq=200 \
|
| 59 |
trainer.test_freq=20 \
|
| 60 |
trainer.default_hdfs_dir=null \
|
| 61 |
-
trainer.default_local_dir=********************************/verl-save/verl_soft-thinking-grpo_deepscaler-0-1-0-
|
| 62 |
trainer.total_epochs=1
|
| 63 |
|
| 64 |
EOF
|
|
|
|
| 50 |
algorithm.use_kl_in_reward=False \
|
| 51 |
trainer.critic_warmup=0 \
|
| 52 |
trainer.logger=['console','tensorboard'] \
|
| 53 |
+
trainer.project_name='verl_soft-thinking-grpo_example_deepscaler_0_1_0_95_qwen7b' \
|
| 54 |
trainer.experiment_name='8k-train' \
|
| 55 |
trainer.val_before_train=True \
|
| 56 |
trainer.n_gpus_per_node=8 \
|
|
|
|
| 58 |
trainer.save_freq=200 \
|
| 59 |
trainer.test_freq=20 \
|
| 60 |
trainer.default_hdfs_dir=null \
|
| 61 |
+
trainer.default_local_dir=********************************/verl-save/verl_soft-thinking-grpo_deepscaler-0-1-0-95_qwen7b/8k-train \
|
| 62 |
trainer.total_epochs=1
|
| 63 |
|
| 64 |
EOF
|