File size: 2,608 Bytes
3f702bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e33fa0e
 
3f702bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env python
"""
Verify DALI Prefiltering Results (Paper Tables 4-6)

Expected results:
- TPR (True Positive Rate): ~82.8%
- Database Reduction: ~31.5%

This script analyzes pre-computed DALI results from the backup data.
"""

import numpy as np
import pandas as pd
import sys
from pathlib import Path


def main():
    print("=" * 60)
    print("DALI Prefiltering Verification (Paper Tables 4-6)")
    print("=" * 60)
    print()

    # Load DALI results
    repo_root = Path(__file__).parent.parent
    dali_csv = repo_root / "results" / "dali_thresholds.csv"

    if not dali_csv.exists():
        print(f"ERROR: DALI results not found at {dali_csv}")
        sys.exit(1)

    df = pd.read_csv(dali_csv)
    print(f"Loaded {len(df)} trials from {dali_csv.name}")
    print()

    # Compute key metrics
    tpr_mean = df["TPR_elbow"].mean() * 100
    tpr_std = df["TPR_elbow"].std() * 100

    frac_kept = df["frac_samples_above_lambda"].mean()
    db_reduction = (1 - frac_kept) * 100

    fnr_mean = df["FNR_elbow"].mean() * 100
    fdr_mean = df["FDR_elbow"].mean()
    elbow_z_mean = df["elbow_z"].mean()
    elbow_z_std = df["elbow_z"].std()

    # Paper claims
    paper_tpr = 82.8
    paper_db_reduction = 31.5

    print("Results:")
    print("-" * 40)
    print(f"TPR (True Positive Rate): {tpr_mean:.1f}% ± {tpr_std:.1f}%")
    print(f"  Paper claims: {paper_tpr}%")
    print(f"  Difference: {abs(tpr_mean - paper_tpr):.1f}%")
    print()
    print(f"Database Reduction: {db_reduction:.1f}%")
    print(f"  Paper claims: {paper_db_reduction}%")
    print(f"  Difference: {abs(db_reduction - paper_db_reduction):.1f}%")
    print()
    print(f"FNR (Miss Rate): {fnr_mean:.1f}%")
    print(f"FDR at elbow: {fdr_mean:.6f}")
    print(f"Elbow z-score: {elbow_z_mean:.1f} ± {elbow_z_std:.1f}")
    print()

    # Verification
    tpr_ok = abs(tpr_mean - paper_tpr) < 2.0  # Within 2%
    db_ok = abs(db_reduction - paper_db_reduction) < 1.0  # Within 1%

    print("=" * 60)
    if tpr_ok and db_ok:
        print("✓ VERIFICATION PASSED")
        print(f"  TPR {tpr_mean:.1f}% matches paper ({paper_tpr}%)")
        print(f"  DB reduction {db_reduction:.1f}% matches paper ({paper_db_reduction}%)")
    else:
        print("⚠ VERIFICATION WARNING")
        if not tpr_ok:
            print(f"  TPR {tpr_mean:.1f}% differs from paper ({paper_tpr}%)")
        if not db_ok:
            print(f"  DB reduction {db_reduction:.1f}% differs from paper ({paper_db_reduction}%)")
    print("=" * 60)

    return 0 if (tpr_ok and db_ok) else 1


if __name__ == "__main__":
    sys.exit(main())