tobil commited on
Commit
4ffd0d6
·
verified ·
1 Parent(s): a91fb36

Upload train_grpo.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_grpo.py +1 -2
train_grpo.py CHANGED
@@ -247,8 +247,7 @@ def main():
247
 
248
  # GRPO specific
249
  num_generations=4, # Generate 4 completions per prompt
250
- max_new_tokens=256,
251
- temperature=0.8,
252
 
253
  # Training
254
  num_train_epochs=args.epochs,
 
247
 
248
  # GRPO specific
249
  num_generations=4, # Generate 4 completions per prompt
250
+ max_completion_length=256,
 
251
 
252
  # Training
253
  num_train_epochs=args.epochs,