Upload train_grpo.py with huggingface_hub
Browse files- train_grpo.py +1 -2
train_grpo.py
CHANGED
|
@@ -247,8 +247,7 @@ def main():
|
|
| 247 |
|
| 248 |
# GRPO specific
|
| 249 |
num_generations=4, # Generate 4 completions per prompt
|
| 250 |
-
|
| 251 |
-
temperature=0.8,
|
| 252 |
|
| 253 |
# Training
|
| 254 |
num_train_epochs=args.epochs,
|
|
|
|
| 247 |
|
| 248 |
# GRPO specific
|
| 249 |
num_generations=4, # Generate 4 completions per prompt
|
| 250 |
+
max_completion_length=256,
|
|
|
|
| 251 |
|
| 252 |
# Training
|
| 253 |
num_train_epochs=args.epochs,
|