Spaces:
Sleeping
Sleeping
add log
Browse files- training/grpo_train.py +9 -0
training/grpo_train.py
CHANGED
|
@@ -383,6 +383,15 @@ def _collect_group(
|
|
| 383 |
turns_per_ep: List[List[str]] = []
|
| 384 |
|
| 385 |
for k in range(group_size):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 386 |
policy.reset(task_name)
|
| 387 |
seed = stage * 100_000 + group_idx * group_size + k
|
| 388 |
|
|
|
|
| 383 |
turns_per_ep: List[List[str]] = []
|
| 384 |
|
| 385 |
for k in range(group_size):
|
| 386 |
+
# One HF Job / nohup "hang" is usually the first group: tqdm only
|
| 387 |
+
# advances *between* groups, while each rollout is many full
|
| 388 |
+
# ``model.generate`` calls (see ``_InlinePolicy``). Log + flush so
|
| 389 |
+
# logs appear before the first group finishes.
|
| 390 |
+
print(
|
| 391 |
+
f" group {group_idx} rollout {k + 1}/{group_size} "
|
| 392 |
+
f"({task_name}, ≤{max_steps} steps) …",
|
| 393 |
+
flush=True,
|
| 394 |
+
)
|
| 395 |
policy.reset(task_name)
|
| 396 |
seed = stage * 100_000 + group_idx * group_size + k
|
| 397 |
|