agentic-rl-main / scripts /parse_trimode_log.py

Upload folder using huggingface_hub

7451c3f verified 4 days ago

3.95 kB

	#!/usr/bin/env python3
	"""Quick parser for trimode training logs."""
	import ast
	import re
	import sys
	from collections import defaultdict

	path = sys.argv[1] if len(sys.argv) > 1 else "train_trimode_4gpu_20260610_125224.log"
	text = open(path, encoding="utf-8", errors="replace").read()

	metrics = []
	for m in re.finditer(r"\{'loss':[^\n]+\}", text):
	try:
	metrics.append(ast.literal_eval(m.group()))
	except (SyntaxError, ValueError):
	pass

	print(f"Total metric lines: {len(metrics)}")
	print("\n=== Metrics timeline ===")
	for i, d in enumerate(metrics):
	if i % 25 == 0 or i >= len(metrics) - 3:
	print(
	f" idx={i:4d} format={d.get('rewards/format/mean', 0):.3f} "
	f"acc={d.get('rewards/accuracy/mean', 0):.3f} "
	f"clip={d.get('completions/clipped_ratio', 0):.3f} "
	f"degen={d.get('completions/degenerate_rate', 'NA')} "
	f"len={d.get('completions/mean_length', 0):.1f} "
	f"opsd={d.get('loss/opsd', 0):.4f} "
	f"alerts={d.get('health/alert_count', 'NA')}"
	)

	health_gen = re.findall(
	r"\[global_step=(\d+)\]\[generate\] batch health \\| .?degenerate_rate=([\d.]+).?"
	r"clipped_rate=([\d.]+).*?eos_rate=([\d.]+)",
	text,
	)
	print(f"\n=== OPSD-HEALTH generate lines (n={len(health_gen)}) ===")
	for gs, deg, clip, eos in health_gen:
	gs = int(gs)
	if gs in (0, 1, 2, 5, 10, 14, 20, 50) or gs % 50 == 0:
	print(f" step {gs:3d}: degen={float(deg):.2f} clip={float(clip):.2f} eos={float(eos):.2f}")

	health_alerts = re.findall(
	r"\[global_step=(\d+)\]\[ALERT\] (\w+)",
	text,
	)
	print(f"\n=== OPSD-HEALTH alerts (n={len(health_alerts)}) ===")
	alert_counts = defaultdict(int)
	for step, code in health_alerts:
	alert_counts[code] += 1
	for code, cnt in sorted(alert_counts.items(), key=lambda x: -x[1]):
	print(f" {code}: {cnt}x")

	probe = re.findall(
	r"global_step=(\d+).*opsd_mask_true=(\d+) \\| opsd_mask_false=(\d+)", text
	)
	ratios = [int(t) / 32 for _, t, _ in probe] if probe else [0]
	print(f"\n=== OPSD mask (rank0 probes, n={len(ratios)}) ===")
	if ratios:
	print(f" mean={sum(ratios)/len(ratios):.3f} min={min(ratios):.3f} max={max(ratios):.3f}")
	print(f" zero_mask_steps={sum(1 for r in ratios if r == 0)}/{len(ratios)}")

	gen = re.findall(
	r"global_step=(\d+).effective_tokens_mean=([\d.]+)."
	r"repeat_loop_count=(\d+).*eos_terminated_rate=([\d.]+)",
	text,
	)
	print(f"\n=== Generation stats (n={len(gen)}) ===")
	for gs, et, rl, eos in gen:
	gs = int(gs)
	if gs in (0, 1, 2, 5, 10, 14, 20, 50, 100, 150, 180, 190, 195, 196) or gs % 50 == 0:
	print(
	f" step {gs:3d}: eff={float(et):5.0f} repeat={rl:2s} eos={float(eos):.2f}"
	)

	for i, d in enumerate(metrics):
	if d.get("rewards/format/mean", 1) < 0.3 and i > 3:
	print(f"\n=== Format collapse first at idx {i} ===")
	print(f" format={d.get('rewards/format/mean')} clip={d.get('completions/clipped_ratio')}")
	break

	if len(metrics) >= 2:
	d0, d1 = metrics[0], metrics[1]
	if d0.get("completions/clipped_ratio", 0) < 0.2 and d1.get("completions/clipped_ratio", 0) > 0.8:
	print("\n=== Step 1 clip collapse detected ===")
	print(f" step0 clip={d0.get('completions/clipped_ratio')} -> step1 clip={d1.get('completions/clipped_ratio')}")

	nz = [i for i, d in enumerate(metrics) if d.get("loss/opsd", 0) > 1e-6]
	print(f"\n=== loss/opsd > 0: {len(nz)}/{len(metrics)} steps ===")
	if nz:
	print(f" first idx={nz[0]}, last idx={nz[-1]}")
	print(f" sample values: {[round(metrics[i]['loss/opsd'], 4) for i in nz[:8]]}")

	qi_steps = re.findall(r"global_step=(\d+).decode_skip_special='[^']其其其", text)
	print(f"\n=== Steps with CJK repeat in probe decode: {len(qi_steps)} ===")
	if qi_steps:
	print(f" first={qi_steps[0]} last={qi_steps[-1]}")

	print("\n(Tip: run scripts/degeneration_report.py for full markdown report)")