Spaces:
Sleeping
Sleeping
Fix: batch_size=4 so num_generations=4 divides evenly
Browse files- train_on_hf.py +2 -2
train_on_hf.py
CHANGED
|
@@ -299,8 +299,8 @@ def train_strategist(data_dir: Path, max_samples: int = 10000):
|
|
| 299 |
grpo_config = GRPOConfig(
|
| 300 |
output_dir="./strategist_grpo",
|
| 301 |
num_train_epochs=1,
|
| 302 |
-
per_device_train_batch_size=
|
| 303 |
-
gradient_accumulation_steps=
|
| 304 |
learning_rate=5e-6,
|
| 305 |
num_generations=4,
|
| 306 |
max_completion_length=16,
|
|
|
|
| 299 |
grpo_config = GRPOConfig(
|
| 300 |
output_dir="./strategist_grpo",
|
| 301 |
num_train_epochs=1,
|
| 302 |
+
per_device_train_batch_size=4,
|
| 303 |
+
gradient_accumulation_steps=4,
|
| 304 |
learning_rate=5e-6,
|
| 305 |
num_generations=4,
|
| 306 |
max_completion_length=16,
|