oxdev commited on
Commit
0ee8b77
·
verified ·
1 Parent(s): eac5c9b

Upload train_grpo_job.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_grpo_job.py +4 -3
train_grpo_job.py CHANGED
@@ -174,9 +174,10 @@ def main():
174
  config = GRPOConfig(
175
  output_dir=OUTPUT_DIR,
176
  num_train_epochs=2,
177
- per_device_train_batch_size=4,
178
- num_generations=4,
179
- max_completion_length=1024,
 
180
  learning_rate=5e-7,
181
  beta=0.0,
182
  scale_rewards=True,
 
174
  config = GRPOConfig(
175
  output_dir=OUTPUT_DIR,
176
  num_train_epochs=2,
177
+ per_device_train_batch_size=2,
178
+ gradient_accumulation_steps=2,
179
+ num_generations=2,
180
+ max_completion_length=512,
181
  learning_rate=5e-7,
182
  beta=0.0,
183
  scale_rewards=True,