ligaments-dev commited on
Commit
8a1d274
·
verified ·
1 Parent(s): d21a432

Upload grpo_training.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. grpo_training.py +1 -1
grpo_training.py CHANGED
@@ -33,7 +33,7 @@ config = GRPOConfig(
33
  output_dir=output_model,
34
  num_train_epochs=3,
35
  per_device_train_batch_size=1,
36
- per_device_eval_batch_size=1,
37
  gradient_accumulation_steps=8, # Effective batch size = 8
38
  learning_rate=1e-6,
39
 
 
33
  output_dir=output_model,
34
  num_train_epochs=3,
35
  per_device_train_batch_size=1,
36
+ per_device_eval_batch_size=8, # Must be divisible by num_generations (default 8)
37
  gradient_accumulation_steps=8, # Effective batch size = 8
38
  learning_rate=1e-6,
39