Update SofT-GRPO-deepscaler-8k-llama3.sh
Browse files
SofT-GRPO-deepscaler-8k-llama3.sh
CHANGED
|
@@ -51,7 +51,7 @@ actor_rollout_ref.ref.strategy=fsdp2 \
|
|
| 51 |
algorithm.use_kl_in_reward=False \
|
| 52 |
trainer.critic_warmup=0 \
|
| 53 |
trainer.logger=['console','tensorboard'] \
|
| 54 |
-
trainer.project_name='verl_soft-thinking-
|
| 55 |
trainer.experiment_name='8k-train' \
|
| 56 |
trainer.val_before_train=True \
|
| 57 |
trainer.n_gpus_per_node=8 \
|
|
|
|
| 51 |
algorithm.use_kl_in_reward=False \
|
| 52 |
trainer.critic_warmup=0 \
|
| 53 |
trainer.logger=['console','tensorboard'] \
|
| 54 |
+
trainer.project_name='verl_soft-thinking-grpo_example_deepscaler_0_1_0_95_llama3b' \
|
| 55 |
trainer.experiment_name='8k-train' \
|
| 56 |
trainer.val_before_train=True \
|
| 57 |
trainer.n_gpus_per_node=8 \
|