recon / eval /check_progress.py
MukulRay's picture
Phase 1.3: smoke test scripts, eval runner, check_progress, fix unicode in run_eval.py
f95672f
import csv, os, sys
path = 'eval/results/recon_linear_v2.csv'
if not os.path.exists(path):
print('No results file yet — eval may still be starting up')
sys.exit(0)
with open(path, newline='', encoding='utf-8') as f:
rows = list(csv.DictReader(f))
if not rows:
print('File exists but is empty — eval just started')
sys.exit(0)
verdicts = [r.get('critic_verdict', '') for r in rows]
correct = [r for r in rows if r.get('position_accuracy', '').strip().upper() == 'MATCH']
contra = [r for r in rows if r.get('critic_verdict') == 'CONTRADICTED']
stale = [r for r in rows if r.get('critic_verdict') == 'STALE']
print(f"Progress: {len(rows)} / 130 questions done")
print(f"Verdict distribution: { {v: verdicts.count(v) for v in sorted(set(verdicts))} }")
print(f"CONTRADICTED: {len(contra)} ({len(contra)/len(rows)*100:.1f}%)")
print(f"STALE: {len(stale)} ({len(stale)/len(rows)*100:.1f}%)")
print(f"Position accuracy: {len(correct)}/{len(rows)} = {len(correct)/len(rows)*100:.1f}%")
if contra:
print(f"\nFirst CONTRADICTED hits:")
for r in contra[:5]:
print(f" [{r.get('question_id','?')}] {r.get('question','')[:70]}")