oxdev commited on
Commit
7168e35
·
verified ·
1 Parent(s): 74022f8

Upload train_grpo_job.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_grpo_job.py +2 -2
train_grpo_job.py CHANGED
@@ -174,9 +174,9 @@ def main():
174
  config = GRPOConfig(
175
  output_dir=OUTPUT_DIR,
176
  num_train_epochs=2,
177
- per_device_train_batch_size=2,
178
  num_generations=4,
179
- max_completion_length=1536,
180
  learning_rate=5e-7,
181
  beta=0.0,
182
  scale_rewards=True,
 
174
  config = GRPOConfig(
175
  output_dir=OUTPUT_DIR,
176
  num_train_epochs=2,
177
+ per_device_train_batch_size=4,
178
  num_generations=4,
179
+ max_completion_length=1024,
180
  learning_rate=5e-7,
181
  beta=0.0,
182
  scale_rewards=True,