Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python | |
| """ | |
| Verify Aeon test results against expected ground truth. | |
| This script reads the test results and compares them against the ground truth | |
| values in test_samples.json to validate the Aeon model predictions. | |
| Usage: | |
| python verify_aeon_results.py \ | |
| --test-samples test_slides/test_samples.json \ | |
| --results-dir test_slides/results | |
| """ | |
| import argparse | |
| import json | |
| from pathlib import Path | |
| import pandas as pd | |
| from typing import Dict, List, Tuple | |
| def load_test_samples(test_samples_file: Path) -> List[Dict]: | |
| """Load test samples from JSON file. | |
| Args: | |
| test_samples_file: Path to test_samples.json | |
| Returns: | |
| List of test sample dictionaries | |
| """ | |
| with open(test_samples_file) as f: | |
| return json.load(f) | |
| def load_aeon_results(slide_id: str, results_dir: Path) -> Tuple[str, float]: | |
| """Load Aeon prediction results for a slide. | |
| Args: | |
| slide_id: Slide identifier | |
| results_dir: Directory containing results | |
| Returns: | |
| Tuple of (predicted_subtype, confidence) | |
| """ | |
| results_file = results_dir / slide_id / f"{slide_id}_aeon_results.csv" | |
| if not results_file.exists(): | |
| raise FileNotFoundError(f"Results file not found: {results_file}") | |
| df = pd.read_csv(results_file) | |
| if df.empty: | |
| raise ValueError(f"Empty results file: {results_file}") | |
| # Get top prediction | |
| top_prediction = df.iloc[0] | |
| return top_prediction["Cancer Subtype"], top_prediction["Confidence"] | |
| def verify_results(test_samples: List[Dict], results_dir: Path) -> Dict: | |
| """Verify all test results against ground truth. | |
| Args: | |
| test_samples: List of test sample dictionaries | |
| results_dir: Directory containing results | |
| Returns: | |
| Dictionary with verification statistics | |
| """ | |
| total = len(test_samples) | |
| passed = 0 | |
| failed = 0 | |
| results = [] | |
| print("=" * 80) | |
| print("Aeon Model Verification Report") | |
| print("=" * 80) | |
| print() | |
| for sample in test_samples: | |
| slide_id = sample.get("slide_id") or sample.get("image_id") | |
| ground_truth = sample.get("cancer_subtype") or sample.get("cancer_type") | |
| site_type = sample["site_type"] | |
| sex = sample["sex"] | |
| tissue_site = sample["tissue_site"] | |
| print(f"Slide: {slide_id}") | |
| print(f" Ground Truth: {ground_truth}") | |
| print(f" Site Type: {site_type}") | |
| print(f" Sex: {sex}") | |
| print(f" Tissue Site: {tissue_site}") | |
| try: | |
| predicted, confidence = load_aeon_results(slide_id, results_dir) | |
| print(f" Predicted: {predicted}") | |
| print(f" Confidence: {confidence:.4f} ({confidence * 100:.2f}%)") | |
| # Check if prediction matches | |
| if predicted == ground_truth: | |
| print(" Status: ✓ PASS") | |
| passed += 1 | |
| status = "PASS" | |
| else: | |
| print(f" Status: ✗ FAIL (expected {ground_truth}, got {predicted})") | |
| failed += 1 | |
| status = "FAIL" | |
| results.append({ | |
| "slide_id": slide_id, | |
| "ground_truth": ground_truth, | |
| "predicted": predicted, | |
| "confidence": confidence, | |
| "site_type": site_type, | |
| "sex": sex, | |
| "tissue_site": tissue_site, | |
| "status": status | |
| }) | |
| except Exception as e: | |
| print(f" Status: ✗ ERROR - {e}") | |
| failed += 1 | |
| results.append({ | |
| "slide_id": slide_id, | |
| "ground_truth": ground_truth, | |
| "predicted": None, | |
| "confidence": None, | |
| "site_type": site_type, | |
| "sex": sex, | |
| "tissue_site": tissue_site, | |
| "status": "ERROR", | |
| "error": str(e) | |
| }) | |
| print() | |
| # Print summary | |
| print("=" * 80) | |
| print("Summary") | |
| print("=" * 80) | |
| print(f"Total slides: {total}") | |
| print(f"Passed: {passed} ({passed / total * 100:.1f}%)") | |
| print(f"Failed: {failed} ({failed / total * 100:.1f}%)") | |
| print() | |
| if passed == total: | |
| print("✓ All tests passed!") | |
| else: | |
| print(f"✗ {failed} test(s) failed") | |
| # Calculate statistics for passed tests | |
| if passed > 0: | |
| confidences = [r["confidence"] for r in results if r["status"] == "PASS"] | |
| avg_confidence = sum(confidences) / len(confidences) | |
| min_confidence = min(confidences) | |
| max_confidence = max(confidences) | |
| print() | |
| print("Confidence Statistics (for passed tests):") | |
| print(f" Average: {avg_confidence:.4f} ({avg_confidence * 100:.2f}%)") | |
| print(f" Minimum: {min_confidence:.4f} ({min_confidence * 100:.2f}%)") | |
| print(f" Maximum: {max_confidence:.4f} ({max_confidence * 100:.2f}%)") | |
| return { | |
| "total": total, | |
| "passed": passed, | |
| "failed": failed, | |
| "accuracy": passed / total if total > 0 else 0, | |
| "results": results | |
| } | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description="Verify Aeon test results against ground truth" | |
| ) | |
| parser.add_argument( | |
| "--test-samples", | |
| type=Path, | |
| default=Path("test_slides/test_samples.json"), | |
| help="Path to test_samples.json (default: test_slides/test_samples.json)" | |
| ) | |
| parser.add_argument( | |
| "--results-dir", | |
| type=Path, | |
| default=Path("test_slides/results"), | |
| help="Directory containing results (default: test_slides/results)" | |
| ) | |
| parser.add_argument( | |
| "--output", | |
| type=Path, | |
| help="Optional path to save verification report as JSON" | |
| ) | |
| args = parser.parse_args() | |
| # Validate inputs | |
| if not args.test_samples.exists(): | |
| raise FileNotFoundError(f"Test samples file not found: {args.test_samples}") | |
| if not args.results_dir.exists(): | |
| raise FileNotFoundError(f"Results directory not found: {args.results_dir}") | |
| # Load test samples | |
| test_samples = load_test_samples(args.test_samples) | |
| # Verify results | |
| verification_report = verify_results(test_samples, args.results_dir) | |
| # Save report if requested | |
| if args.output: | |
| with open(args.output, "w") as f: | |
| json.dump(verification_report, f, indent=2) | |
| print() | |
| print(f"Verification report saved to: {args.output}") | |
| # Exit with appropriate code | |
| if verification_report["failed"] > 0: | |
| exit(1) | |
| else: | |
| exit(0) | |
| if __name__ == "__main__": | |
| main() | |