permanence / tools /parse_run6_logs.py
chane335's picture
Run 7: R4/R5 calibration + env precondition fix β€” forced variants preserved, git_push_force β†’ R2 when no overwrite
8867e44 verified
"""Parse Run 6 SSE logs and surface reward_std / frac_reward_zero_std distribution.
Usage:
TOKEN=$(hf auth token)
curl -sN --max-time 20 -H "Authorization: Bearer $TOKEN" \\
"https://huggingface.co/api/spaces/chane335/permanence-training/logs/run" \\
> /tmp/run6_logs.txt
python tools/parse_run6_logs.py /tmp/run6_logs.txt
"""
from __future__ import annotations
import re
import sys
from collections import Counter
def main(log_path: str = "/tmp/run6_logs.txt") -> None:
with open(log_path) as f:
text = f.read()
zero_stds = re.findall(r"'frac_reward_zero_std': '([0-9.]+)'", text)
env_means = re.findall(r"'rewards/reward_environmental/mean': '([-0-9.e+]+)'", text)
env_stds = re.findall(r"'rewards/reward_environmental/std': '([-0-9.e+]+)'", text)
latest_step = re.findall(r"(\d+)/600 \[", text)
print(f"Total logged generation batches: {len(zero_stds)}")
if latest_step:
print(f"Latest step reached: {max(int(s) for s in latest_step)}/600")
if zero_stds:
c = Counter(zero_stds)
print("\nfrac_reward_zero_std distribution:")
for k in sorted(c):
pct = c[k] * 100 / len(zero_stds)
print(f" {k}: {c[k]} batches ({pct:.1f}%)")
zero_pct = c.get("1", 0) * 100 / len(zero_stds)
print(f"\n>>> {zero_pct:.1f}% of batches had zero std (Run 5 was ~70%)")
if env_means:
vals = [float(x) for x in env_means]
print(f"\nEnv reward mean across {len(vals)} batches:")
print(f" min={min(vals):.3f} max={max(vals):.3f} mean={sum(vals)/len(vals):.3f}")
buckets = [0, 0.1, 0.3, 0.5, 0.7, 1.0, 1.5, 2.0]
for i in range(len(buckets) - 1):
lo, hi = buckets[i], buckets[i + 1]
n = sum(1 for v in vals if lo <= v < hi)
print(f" [{lo:.1f}, {hi:.1f}): {n}")
n_neg = sum(1 for v in vals if v < 0)
n_high = sum(1 for v in vals if v >= 2.0)
print(f" negative: {n_neg}, >=2.0: {n_high}")
if __name__ == "__main__":
main(sys.argv[1] if len(sys.argv) > 1 else "/tmp/run6_logs.txt")