srinjoyd commited on
Commit
47ef65c
·
1 Parent(s): e205960
Files changed (1) hide show
  1. training/grpo_train.py +9 -0
training/grpo_train.py CHANGED
@@ -383,6 +383,15 @@ def _collect_group(
383
  turns_per_ep: List[List[str]] = []
384
 
385
  for k in range(group_size):
 
 
 
 
 
 
 
 
 
386
  policy.reset(task_name)
387
  seed = stage * 100_000 + group_idx * group_size + k
388
 
 
383
  turns_per_ep: List[List[str]] = []
384
 
385
  for k in range(group_size):
386
+ # One HF Job / nohup "hang" is usually the first group: tqdm only
387
+ # advances *between* groups, while each rollout is many full
388
+ # ``model.generate`` calls (see ``_InlinePolicy``). Log + flush so
389
+ # logs appear before the first group finishes.
390
+ print(
391
+ f" group {group_idx} rollout {k + 1}/{group_size} "
392
+ f"({task_name}, ≤{max_steps} steps) …",
393
+ flush=True,
394
+ )
395
  policy.reset(task_name)
396
  seed = stage * 100_000 + group_idx * group_size + k
397