File size: 1,509 Bytes
1f36481
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
"""Quick progress checker for validation run."""
import json
from pathlib import Path

checkpoint = Path("validation/results/medqa_checkpoint.jsonl")
if not checkpoint.exists():
    print("No checkpoint file found")
    exit()

lines = checkpoint.read_text(encoding="utf-8").strip().split("\n")
print(f"Completed: {len(lines)}/50")

matches = 0
diff_matches = 0
top3_matches = 0
failures = 0

for line in lines:
    d = json.loads(line)
    det = d.get("details", {})
    scores = d.get("scores", {})
    loc = det.get("match_location", "not_found")
    
    if not d.get("success"):
        failures += 1
    if loc != "not_found":
        matches += 1
    if loc == "differential":
        diff_matches += 1
    if scores.get("top3_accuracy", 0) > 0:
        top3_matches += 1

print(f"Pipeline success: {len(lines) - failures}/{len(lines)}")
print(f"Mentioned matches: {matches}/{len(lines)} ({100*matches/len(lines):.0f}%)")
print(f"Differential matches: {diff_matches}/{len(lines)} ({100*diff_matches/len(lines):.0f}%)")
print(f"Top-3 matches: {top3_matches}/{len(lines)} ({100*top3_matches/len(lines):.0f}%)")

# Show last 5 cases
print("\nRecent cases:")
for line in lines[-5:]:
    d = json.loads(line)
    det = d.get("details", {})
    correct = det.get("correct_answer", "?")[:45]
    top = det.get("top_diagnosis", "?")[:45]
    loc = det.get("match_location", "not_found")
    t = d.get("pipeline_time_ms", 0)
    print(f"  {d['case_id']}: [{loc}] {t/1000:.0f}s | correct={correct} | top={top}")