"""Parse train.py run.log → (bpb, tps_avg, factual).

bpb priority order:
  1. val_bpb from [VAL] line (cleanest signal, but OOMs on 6GB cards)
  2. train_bpb from the LAST step= line (proxy when val fails — not held-out
     but monotone with model capability over a 5-min budget)
"""
import re, sys
txt = open(sys.argv[1]).read()

m = re.search(r'val_bpb:\s+([\d\.]+)', txt)
if m:
    bpb = m.group(1)
else:
    step_lines = re.findall(r'^step=\d+\s+loss=[\d\.]+\s+bpb=([\d\.]+)', txt, re.M)
    bpb = f'~{step_lines[-1]}' if step_lines else 'NA'

tps_vals = [int(m.group(1)) for m in re.finditer(r'tps=(\d+)', txt)]
tps_avg = f'{sum(tps_vals)/len(tps_vals):.0f}' if tps_vals else 'NA'

m = re.search(r'factual_english_hits:\s+(\d+/\d+)', txt)
factual = m.group(1) if m else 'NA'

print(f"{bpb}\t{tps_avg}\t{factual}")