feather-runtime / overlay /scripts /parse_metrics.py
Jackoatmon's picture
Update Feather h200 training runtime image
e317e25 verified
"""Parse train.py run.log → (bpb, tps_avg, factual).
bpb priority order:
1. val_bpb from [VAL] line (cleanest signal, but OOMs on 6GB cards)
2. train_bpb from the LAST step= line (proxy when val fails — not held-out
but monotone with model capability over a 5-min budget)
"""
import re, sys
txt = open(sys.argv[1]).read()
m = re.search(r'val_bpb:\s+([\d\.]+)', txt)
if m:
bpb = m.group(1)
else:
step_lines = re.findall(r'^step=\d+\s+loss=[\d\.]+\s+bpb=([\d\.]+)', txt, re.M)
bpb = f'~{step_lines[-1]}' if step_lines else 'NA'
tps_vals = [int(m.group(1)) for m in re.finditer(r'tps=(\d+)', txt)]
tps_avg = f'{sum(tps_vals)/len(tps_vals):.0f}' if tps_vals else 'NA'
m = re.search(r'factual_english_hits:\s+(\d+/\d+)', txt)
factual = m.group(1) if m else 'NA'
print(f"{bpb}\t{tps_avg}\t{factual}")