cpr / scripts /verify_dali.py
ronboger's picture
fix: FDR computation bug, clean up CLEAN/DALI notebooks
e33fa0e
#!/usr/bin/env python
"""
Verify DALI Prefiltering Results (Paper Tables 4-6)
Expected results:
- TPR (True Positive Rate): ~82.8%
- Database Reduction: ~31.5%
This script analyzes pre-computed DALI results from the backup data.
"""
import numpy as np
import pandas as pd
import sys
from pathlib import Path
def main():
print("=" * 60)
print("DALI Prefiltering Verification (Paper Tables 4-6)")
print("=" * 60)
print()
# Load DALI results
repo_root = Path(__file__).parent.parent
dali_csv = repo_root / "results" / "dali_thresholds.csv"
if not dali_csv.exists():
print(f"ERROR: DALI results not found at {dali_csv}")
sys.exit(1)
df = pd.read_csv(dali_csv)
print(f"Loaded {len(df)} trials from {dali_csv.name}")
print()
# Compute key metrics
tpr_mean = df["TPR_elbow"].mean() * 100
tpr_std = df["TPR_elbow"].std() * 100
frac_kept = df["frac_samples_above_lambda"].mean()
db_reduction = (1 - frac_kept) * 100
fnr_mean = df["FNR_elbow"].mean() * 100
fdr_mean = df["FDR_elbow"].mean()
elbow_z_mean = df["elbow_z"].mean()
elbow_z_std = df["elbow_z"].std()
# Paper claims
paper_tpr = 82.8
paper_db_reduction = 31.5
print("Results:")
print("-" * 40)
print(f"TPR (True Positive Rate): {tpr_mean:.1f}% ± {tpr_std:.1f}%")
print(f" Paper claims: {paper_tpr}%")
print(f" Difference: {abs(tpr_mean - paper_tpr):.1f}%")
print()
print(f"Database Reduction: {db_reduction:.1f}%")
print(f" Paper claims: {paper_db_reduction}%")
print(f" Difference: {abs(db_reduction - paper_db_reduction):.1f}%")
print()
print(f"FNR (Miss Rate): {fnr_mean:.1f}%")
print(f"FDR at elbow: {fdr_mean:.6f}")
print(f"Elbow z-score: {elbow_z_mean:.1f} ± {elbow_z_std:.1f}")
print()
# Verification
tpr_ok = abs(tpr_mean - paper_tpr) < 2.0 # Within 2%
db_ok = abs(db_reduction - paper_db_reduction) < 1.0 # Within 1%
print("=" * 60)
if tpr_ok and db_ok:
print("✓ VERIFICATION PASSED")
print(f" TPR {tpr_mean:.1f}% matches paper ({paper_tpr}%)")
print(f" DB reduction {db_reduction:.1f}% matches paper ({paper_db_reduction}%)")
else:
print("⚠ VERIFICATION WARNING")
if not tpr_ok:
print(f" TPR {tpr_mean:.1f}% differs from paper ({paper_tpr}%)")
if not db_ok:
print(f" DB reduction {db_reduction:.1f}% differs from paper ({paper_db_reduction}%)")
print("=" * 60)
return 0 if (tpr_ok and db_ok) else 1
if __name__ == "__main__":
sys.exit(main())