zz1358m commited on
Commit
eca69d7
·
verified ·
1 Parent(s): 160479a

Update SofT-GRPO-deepscaler-8k-llama3.sh

Browse files
Files changed (1) hide show
  1. SofT-GRPO-deepscaler-8k-llama3.sh +1 -1
SofT-GRPO-deepscaler-8k-llama3.sh CHANGED
@@ -51,7 +51,7 @@ actor_rollout_ref.ref.strategy=fsdp2 \
51
  algorithm.use_kl_in_reward=False \
52
  trainer.critic_warmup=0 \
53
  trainer.logger=['console','tensorboard'] \
54
- trainer.project_name='verl_soft-thinking-grpo_example_gsm8k_0_1_0_95_llama3b' \
55
  trainer.experiment_name='8k-train' \
56
  trainer.val_before_train=True \
57
  trainer.n_gpus_per_node=8 \
 
51
  algorithm.use_kl_in_reward=False \
52
  trainer.critic_warmup=0 \
53
  trainer.logger=['console','tensorboard'] \
54
+ trainer.project_name='verl_soft-thinking-grpo_example_deepscaler_0_1_0_95_llama3b' \
55
  trainer.experiment_name='8k-train' \
56
  trainer.val_before_train=True \
57
  trainer.n_gpus_per_node=8 \