Rayugacodes commited on
Commit
278a0ec
·
verified ·
1 Parent(s): beef760

Fix: batch_size=4 so num_generations=4 divides evenly

Browse files
Files changed (1) hide show
  1. train_on_hf.py +2 -2
train_on_hf.py CHANGED
@@ -299,8 +299,8 @@ def train_strategist(data_dir: Path, max_samples: int = 10000):
299
  grpo_config = GRPOConfig(
300
  output_dir="./strategist_grpo",
301
  num_train_epochs=1,
302
- per_device_train_batch_size=2,
303
- gradient_accumulation_steps=8,
304
  learning_rate=5e-6,
305
  num_generations=4,
306
  max_completion_length=16,
 
299
  grpo_config = GRPOConfig(
300
  output_dir="./strategist_grpo",
301
  num_train_epochs=1,
302
+ per_device_train_batch_size=4,
303
+ gradient_accumulation_steps=4,
304
  learning_rate=5e-6,
305
  num_generations=4,
306
  max_completion_length=16,