agentic-rl-main / scripts /analyze_trimode_log.py

Upload folder using huggingface_hub

7451c3f verified 4 days ago

12.2 kB

	"""Analyze TriMode training log for routing, rewards, generation quality."""
	import json
	import re
	import sys
	from collections import defaultdict

	LOG = sys.argv[1] if len(sys.argv) > 1 else "train_trimode_4gpu_20260609_175823.log"

	re_step = re.compile(r"\[step=(\d+)\]\[every=10\]")
	re_routing = re.compile(r"\[routing\] mode routing summary \\| (.+)")
	re_reward = re.compile(r"\[reward\] aggregate reward stats \\| (.+)")
	re_gen = re.compile(r"\[generation\] completion mask summary \\| (.+)")
	re_probe = re.compile(
	r"global_step=(\d+) \\| .*?"
	r"effective_tokens_mean=([\d.]+) \\| .*?"
	r"paren_then_eos_count=(\d+) \\| .*?"
	r"one_token_count=(\d+) \\| .*?"
	r"eos_terminated_rate=([\d.]+)"
	)
	re_loss = re.compile(r"\[loss\] GRPO / OPSD loss breakdown \\| (.+)")
	re_per_sample = re.compile(
	r"\[reward\] per_sample\[(\d+)\] \\| group=(\d+) \\| format=([\d.]+) \\| acc=([\d.]+)"
	)
	re_completion = re.compile(
	r"\[generation\] sample\[(\d+)\] \\| group=(\d+) \\| effective_tokens=(\d+) \\| has_eos=(\w+) \\| text='([^']*)'"
	)


	def parse_kv(s: str) -> dict:
	d = {}
	for m in re.finditer(r"(\w+)=([^\|]+?)(?=\s\\|\|\s$)", s):
	k, v = m.group(1), m.group(2).strip()
	try:
	if v.startswith("{") or v.startswith("["):
	d[k] = json.loads(v.replace("'", '"'))
	elif re.match(r"^-?\d+\.\d+([eE][+-]?\d+)?$", v) or re.match(r"^-?\d+\.\d+$", v):
	d[k] = float(v)
	elif re.match(r"^-?\d+$", v):
	d[k] = int(v)
	else:
	d[k] = v
	except (json.JSONDecodeError, ValueError):
	d[k] = v
	return d


	def main():
	routing = {}
	rewards = {}
	gens = {}
	losses_by_step = defaultdict(list)
	probes = []
	per_sample_rewards = defaultdict(list)
	sample_completions = defaultdict(list)

	first_ts = last_ts = None
	grpo_routes = sft_routes = 0
	total_debug_routes = 0

	with open(LOG, "r", encoding="utf-8", errors="replace") as f:
	for line in f:
	m = re.search(r"\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\]", line)
	if m:
	if first_ts is None:
	first_ts = m.group(1)
	last_ts = m.group(1)

	if "completion_mode_counts=" in line and "rank=0/4" in line:
	cm = re.search(r'completion_mode_counts=(\{[^}]+\})', line)
	if cm:
	total_debug_routes += 1
	c = json.loads(cm.group(1).replace("'", '"'))
	grpo_routes += c.get("GRPO", 0)
	sft_routes += c.get("SFT", 0)

	if "[OPSD-PROBE]" in line and "rank=0/4" in line and "[generate] raw generate summary" in line:
	pm = re_probe.search(line)
	if pm:
	probes.append(
	(
	int(pm.group(1)),
	float(pm.group(2)),
	int(pm.group(3)),
	int(pm.group(4)),
	float(pm.group(5)),
	)
	)
	continue

	if "[OPSD-DETAIL]" not in line or "rank=0/4" not in line:
	continue

	sm = re_step.search(line)
	if not sm:
	continue
	step = int(sm.group(1))

	if "[routing]" in line:
	rm = re_routing.search(line)
	if rm:
	routing[step] = parse_kv(rm.group(1))
	elif "[reward] aggregate" in line:
	rm = re_reward.search(line)
	if rm:
	rewards[step] = parse_kv(rm.group(1))
	elif "[reward] per_sample" in line:
	rm = re_per_sample.search(line)
	if rm:
	per_sample_rewards[step].append(
	(int(rm.group(1)), float(rm.group(3)), float(rm.group(4)))
	)
	elif "[generation] completion mask" in line:
	rm = re_gen.search(line)
	if rm:
	gens[step] = parse_kv(rm.group(1))
	elif "[generation] sample[" in line:
	rm = re_completion.search(line)
	if rm and len(sample_completions[step]) < 4:
	sample_completions[step].append(rm.group(5)[:120])
	elif "[loss] GRPO / OPSD loss breakdown" in line:
	rm = re_loss.search(line)
	if rm:
	losses_by_step[step].append(parse_kv(rm.group(1)))

	print("=== TRAINING OVERVIEW ===")
	print(f"Time range: {first_ts} -> {last_ts}")
	print(f"OPSD-DETAIL routing snapshots (rank0): {len(routing)}")
	if routing:
	print(f"Global steps: {min(routing)} - {max(routing)}")
	print(f"OPSD-PROBE generate summaries (rank0): {len(probes)}")
	if probes:
	print(f"Probe global steps: {min(p[0] for p in probes)} - {max(p[0] for p in probes)}")

	print("\n=== ROUTING RATIOS (rank0 DETAIL, every 10 steps) ===")
	print(f"{'step':>6} {'OPSD':>7} {'GRPO':>7} {'SFT':>7} {'has_correct':>16} {'mask_ratio':>10}")
	steps_sorted = sorted(routing.keys())
	sample_idx = list(range(0, len(steps_sorted), max(1, len(steps_sorted) // 12)))
	if steps_sorted and steps_sorted[-1] not in [steps_sorted[i] for i in sample_idx]:
	sample_idx.append(len(steps_sorted) - 1)

	tot_opsd = tot_grpo = tot_sft = 0
	any_grpo_steps = []
	any_sft_steps = []
	for step in steps_sorted:
	r = routing[step]
	counts = r.get("completion_mode_counts", {})
	if isinstance(counts, str):
	counts = json.loads(counts.replace("'", '"'))
	total = sum(counts.values()) or 32
	opsd = counts.get("OPSD", 0)
	grpo = counts.get("GRPO", 0)
	sft = counts.get("SFT", 0)
	tot_opsd += opsd
	tot_grpo += grpo
	tot_sft += sft
	if grpo > 0:
	any_grpo_steps.append(step)
	if sft > 0:
	any_sft_steps.append(step)

	for i in sample_idx:
	step = steps_sorted[i]
	r = routing[step]
	counts = r.get("completion_mode_counts", {})
	if isinstance(counts, str):
	counts = json.loads(counts.replace("'", '"'))
	total = sum(counts.values()) or 32
	print(
	f"{step:>6} {counts.get('OPSD', 0) / total * 100:>6.1f}%"
	f" {counts.get('GRPO', 0) / total * 100:>6.1f}%"
	f" {counts.get('SFT', 0) / total * 100:>6.1f}%"
	f" {str(r.get('has_correct', '?')):>16}"
	f" {r.get('opsd_mask_ratio', 0) * 100:>9.1f}%"
	)

	tot = tot_opsd + tot_grpo + tot_sft
	print(
	f"\nAggregate ({len(routing)} snapshots x 32 samples):"
	f" OPSD={tot_opsd / tot * 100:.1f}%"
	f" GRPO={tot_grpo / tot * 100:.1f}%"
	f" SFT={tot_sft / tot * 100:.1f}%"
	)
	print(f"Steps with any GRPO on rank0: {len(any_grpo_steps)} ({any_grpo_steps[:5]}...)")
	print(f"Steps with any SFT on rank0: {len(any_sft_steps)}")

	# all-rank debug routing (rank0 only in DETAIL but DEBUG has all ranks)
	opsd_all = total_debug_routes * 32 - grpo_routes - sft_routes
	tot_all = total_debug_routes * 32
	if tot_all:
	print(
	f"\nAll-rank DEBUG routing lines (rank0 only in file grep):"
	f" GRPO samples={grpo_routes} SFT={sft_routes}"
	f" (from rank=0 completion_mode_counts lines: {total_debug_routes})"
	)

	print("\n=== REWARD / FORMAT LEARNING (rank0) ===")
	print(f"{'step':>6} {'fmt_zero':>9} {'acc_zero':>9} {'acc_sum':>8} {'fmt_sum':>8} {'w_mean':>8}")
	reward_steps = sorted(rewards.keys())
	r_sample = list(range(0, len(reward_steps), max(1, len(reward_steps) // 12)))
	if reward_steps and reward_steps[-1] not in [reward_steps[i] for i in r_sample]:
	r_sample.append(len(reward_steps) - 1)
	for i in r_sample:
	step = reward_steps[i]
	r = rewards[step]
	print(
	f"{step:>6} {r.get('format_zero_rate', 0) * 100:>8.1f}%"
	f" {r.get('acc_zero_rate', 0) * 100:>8.1f}%"
	f" {r.get('acc_sum', 0):>8.2f}"
	f" {r.get('format_sum', 0):>8.2f}"
	f" {r.get('weighted_mean', 0):>8.4f}"
	)
	if rewards:
	s0, sL = rewards[min(rewards)], rewards[max(rewards)]
	print(f"\nStep {min(rewards)} -> {max(rewards)} delta:")
	print(f" format_zero_rate: {s0.get('format_zero_rate', 0) * 100:.1f}% -> {sL.get('format_zero_rate', 0) * 100:.1f}%")
	print(f" acc_zero_rate: {s0.get('acc_zero_rate', 0) * 100:.1f}% -> {sL.get('acc_zero_rate', 0) * 100:.1f}%")
	print(f" acc_sum: {s0.get('acc_sum', 0):.2f} -> {sL.get('acc_sum', 0):.2f}")
	print(f" format_sum: {s0.get('format_sum', 0):.2f} -> {sL.get('format_sum', 0):.2f}")

	# format reward rate from per-sample
	print("\n=== FORMAT REWARD RATE (per-sample, rank0) ===")
	for step in [0, 10, 50, 100, 200, 300, 400, 500, 600, 700, 800, 850]:
	if step in per_sample_rewards:
	samples = per_sample_rewards[step]
	fmt_ok = sum(1 for _, f, _ in samples if f > 0)
	acc_ok = sum(1 for _, _, a in samples if a > 0)
	print(f"step {step:>4}: format_reward>0: {fmt_ok}/{len(samples)} ({fmt_ok/len(samples)100:.1f}%) acc>0: {acc_ok}/{len(samples)} ({acc_ok/len(samples)100:.1f}%)")

	print("\n=== SAMPLE COMPLETIONS (first 4 per step, rank0) ===")
	for step in [0, 10, 100, 300, 500, max(sample_completions) if sample_completions else 0]:
	if step in sample_completions:
	print(f"\n--- step {step} ---")
	for i, t in enumerate(sample_completions[step][:4]):
	print(f" [{i}] {t}")

	print("\n=== GENERATION LENGTH (rank0 DETAIL) ===")
	print(f"{'step':>6} {'eff_tok':>10} {'eos_term':>9} {'at_max':>8}")
	gen_steps = sorted(gens.keys())
	g_sample = list(range(0, len(gen_steps), max(1, len(gen_steps) // 12)))
	if gen_steps and gen_steps[-1] not in [gen_steps[i] for i in g_sample]:
	g_sample.append(len(gen_steps) - 1)
	for i in g_sample:
	step = gen_steps[i]
	g = gens[step]
	print(
	f"{step:>6} {g.get('effective_tokens_mean', 0):>10.1f}"
	f" {g.get('eos_terminated_rate', 0) * 100:>8.1f}%"
	f" {g.get('at_max_length_rate', 0) * 100:>7.1f}%"
	)

	print("\n=== GENERATION DEGENERATION (rank0 PROBE) ===")
	if probes:
	print(f"{'gstep':>6} {'eff_tok':>9} {'paren_eos':>10} {'one_tok':>8} {'eos_rate':>8}")
	pick = [0, 1, 2, 3, 5, 10, 20, 50, 100, 200, 400, 600, 800, 850]
	probe_by_gs = {p[0]: p for p in probes}
	for gs in pick:
	if gs in probe_by_gs:
	p = probe_by_gs[gs]
	print(f"{p[0]:>6} {p[1]:>9.1f} {p[2]:>10} {p[3]:>8} {p[4]:>7.2f}")
	print(f"\nTotal paren_then_eos: {sum(p[2] for p in probes)} / {len(probes)} regenerates")
	print(f"Total one_token: {sum(p[3] for p in probes)}")
	early = [p for p in probes if p[0] <= 10]
	late = [p for p in probes if p[0] >= 800]
	if early:
	print(f"Early (gs<=10) avg eff_tok={sum(p[1] for p in early)/len(early):.1f}")
	if late:
	print(f"Late (gs>=800) avg eff_tok={sum(p[1] for p in late)/len(late):.1f}, eos_rate={sum(p[4] for p in late)/len(late):.2f}")

	print("\n=== LOSS (rank0, last micro-batch per step) ===")
	loss_keys = ["grpo_loss_scalar", "opsd_loss_scalar", "opsd_loss", "opsd_active_samples", "opsd_samples"]
	loss_steps = sorted(losses_by_step.keys())
	l_sample = list(range(0, len(loss_steps), max(1, len(loss_steps) // 12)))
	if loss_steps and loss_steps[-1] not in [loss_steps[i] for i in l_sample]:
	l_sample.append(len(loss_steps) - 1)
	print(f"{'step':>6} {'grpo':>10} {'opsd':>10}")
	for i in l_sample:
	step = loss_steps[i]
	d = losses_by_step[step][-1]
	opsd_l = d.get("opsd_loss_scalar", d.get("opsd_loss", 0))
	print(f"{step:>6} {d.get('grpo_loss_scalar', 0):>10.4f} {opsd_l:>10.4f}")


	if __name__ == "__main__":
	main()