"""Parse Run 6 SSE logs and surface reward_std / frac_reward_zero_std distribution. Usage: TOKEN=$(hf auth token) curl -sN --max-time 20 -H "Authorization: Bearer $TOKEN" \\ "https://huggingface.co/api/spaces/chane335/permanence-training/logs/run" \\ > /tmp/run6_logs.txt python tools/parse_run6_logs.py /tmp/run6_logs.txt """ from __future__ import annotations import re import sys from collections import Counter def main(log_path: str = "/tmp/run6_logs.txt") -> None: with open(log_path) as f: text = f.read() zero_stds = re.findall(r"'frac_reward_zero_std': '([0-9.]+)'", text) env_means = re.findall(r"'rewards/reward_environmental/mean': '([-0-9.e+]+)'", text) env_stds = re.findall(r"'rewards/reward_environmental/std': '([-0-9.e+]+)'", text) latest_step = re.findall(r"(\d+)/600 \[", text) print(f"Total logged generation batches: {len(zero_stds)}") if latest_step: print(f"Latest step reached: {max(int(s) for s in latest_step)}/600") if zero_stds: c = Counter(zero_stds) print("\nfrac_reward_zero_std distribution:") for k in sorted(c): pct = c[k] * 100 / len(zero_stds) print(f" {k}: {c[k]} batches ({pct:.1f}%)") zero_pct = c.get("1", 0) * 100 / len(zero_stds) print(f"\n>>> {zero_pct:.1f}% of batches had zero std (Run 5 was ~70%)") if env_means: vals = [float(x) for x in env_means] print(f"\nEnv reward mean across {len(vals)} batches:") print(f" min={min(vals):.3f} max={max(vals):.3f} mean={sum(vals)/len(vals):.3f}") buckets = [0, 0.1, 0.3, 0.5, 0.7, 1.0, 1.5, 2.0] for i in range(len(buckets) - 1): lo, hi = buckets[i], buckets[i + 1] n = sum(1 for v in vals if lo <= v < hi) print(f" [{lo:.1f}, {hi:.1f}): {n}") n_neg = sum(1 for v in vals if v < 0) n_high = sum(1 for v in vals if v >= 2.0) print(f" negative: {n_neg}, >=2.0: {n_high}") if __name__ == "__main__": main(sys.argv[1] if len(sys.argv) > 1 else "/tmp/run6_logs.txt")