File size: 1,509 Bytes
1f36481 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
"""Quick progress checker for validation run."""
import json
from pathlib import Path
checkpoint = Path("validation/results/medqa_checkpoint.jsonl")
if not checkpoint.exists():
print("No checkpoint file found")
exit()
lines = checkpoint.read_text(encoding="utf-8").strip().split("\n")
print(f"Completed: {len(lines)}/50")
matches = 0
diff_matches = 0
top3_matches = 0
failures = 0
for line in lines:
d = json.loads(line)
det = d.get("details", {})
scores = d.get("scores", {})
loc = det.get("match_location", "not_found")
if not d.get("success"):
failures += 1
if loc != "not_found":
matches += 1
if loc == "differential":
diff_matches += 1
if scores.get("top3_accuracy", 0) > 0:
top3_matches += 1
print(f"Pipeline success: {len(lines) - failures}/{len(lines)}")
print(f"Mentioned matches: {matches}/{len(lines)} ({100*matches/len(lines):.0f}%)")
print(f"Differential matches: {diff_matches}/{len(lines)} ({100*diff_matches/len(lines):.0f}%)")
print(f"Top-3 matches: {top3_matches}/{len(lines)} ({100*top3_matches/len(lines):.0f}%)")
# Show last 5 cases
print("\nRecent cases:")
for line in lines[-5:]:
d = json.loads(line)
det = d.get("details", {})
correct = det.get("correct_answer", "?")[:45]
top = det.get("top_diagnosis", "?")[:45]
loc = det.get("match_location", "not_found")
t = d.get("pipeline_time_ms", 0)
print(f" {d['case_id']}: [{loc}] {t/1000:.0f}s | correct={correct} | top={top}")
|