"""Parse train.py run.log → (bpb, tps_avg, factual). bpb priority order: 1. val_bpb from [VAL] line (cleanest signal, but OOMs on 6GB cards) 2. train_bpb from the LAST step= line (proxy when val fails — not held-out but monotone with model capability over a 5-min budget) """ import re, sys txt = open(sys.argv[1]).read() m = re.search(r'val_bpb:\s+([\d\.]+)', txt) if m: bpb = m.group(1) else: step_lines = re.findall(r'^step=\d+\s+loss=[\d\.]+\s+bpb=([\d\.]+)', txt, re.M) bpb = f'~{step_lines[-1]}' if step_lines else 'NA' tps_vals = [int(m.group(1)) for m in re.finditer(r'tps=(\d+)', txt)] tps_avg = f'{sum(tps_vals)/len(tps_vals):.0f}' if tps_vals else 'NA' m = re.search(r'factual_english_hits:\s+(\d+/\d+)', txt) factual = m.group(1) if m else 'NA' print(f"{bpb}\t{tps_avg}\t{factual}")