Spaces:
Sleeping
Sleeping
| """ | |
| Evaluation script for PaDiM anomaly detection model | |
| """ | |
| import torch | |
| import numpy as np | |
| from tqdm import tqdm | |
| from pathlib import Path | |
| from sklearn.metrics import roc_auc_score, roc_curve, precision_recall_curve | |
| import sys | |
| import json | |
| sys.path.append(str(Path(__file__).parent)) | |
| import config | |
| from src.data_loader import get_dataloader | |
| from src.feature_extractor import FeatureExtractor, extract_embeddings | |
| from src.padim import PaDiM | |
| from src.visualize import plot_roc_curve, save_prediction | |
| from PIL import Image | |
| def evaluate_padim(): | |
| """Evaluate PaDiM model on test data""" | |
| print("=" * 60) | |
| print("AUTOMATED TABLET DEFECT DETECTION - EVALUATION") | |
| print("=" * 60) | |
| # Set device | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| print(f"Using device: {device}") | |
| # Load model | |
| print("\nLoading trained model...") | |
| model_path = config.MODEL_DIR / "padim_model.pkl" | |
| if not model_path.exists(): | |
| raise FileNotFoundError(f"Model not found at {model_path}. Run train.py first.") | |
| padim_model = PaDiM() | |
| padim_model.load(model_path) | |
| # Initialize feature extractor | |
| print("Initializing feature extractor...") | |
| extractor = FeatureExtractor( | |
| backbone=config.BACKBONE, | |
| layers=config.FEATURE_LAYERS | |
| ).to(device) | |
| # Evaluate on test set | |
| print("\nEvaluating on test set...") | |
| all_scores = [] | |
| all_labels = [] | |
| all_predictions = [] | |
| defect_types = ["good"] + config.DEFECT_TYPES | |
| for defect_type in defect_types: | |
| test_dir = config.TEST_DIR / defect_type | |
| if not test_dir.exists(): | |
| print(f"Skipping {defect_type} (directory not found)") | |
| continue | |
| print(f"\nProcessing {defect_type}...") | |
| # Ground truth: 0 for good, 1 for defect | |
| is_defect = 1 if defect_type != "good" else 0 | |
| # Get dataloader | |
| test_loader = get_dataloader(test_dir, batch_size=1, shuffle=False) | |
| for images, paths, _ in tqdm(test_loader): | |
| images = images.to(device) | |
| # Extract embeddings | |
| with torch.no_grad(): | |
| embeddings = extract_embeddings(extractor, images) | |
| # Predict anomaly | |
| embeddings_np = embeddings.cpu().numpy() | |
| anomaly_score, anomaly_map = padim_model.predict(embeddings_np) | |
| all_scores.append(anomaly_score) | |
| all_labels.append(is_defect) | |
| # Save some example predictions | |
| if len(all_predictions) < 20: # Save first 20 examples | |
| img_path = paths[0] | |
| img = Image.open(img_path) | |
| save_path = config.RESULTS_DIR / f"{defect_type}_{Path(img_path).name}" | |
| save_prediction(img, anomaly_score, anomaly_map, str(save_path)) | |
| all_predictions.append({ | |
| 'image': img_path, | |
| 'score': float(anomaly_score), | |
| 'label': is_defect | |
| }) | |
| # Compute metrics | |
| all_scores = np.array(all_scores) | |
| all_labels = np.array(all_labels) | |
| # ROC-AUC | |
| roc_auc = roc_auc_score(all_labels, all_scores) | |
| print(f"\n{'=' * 60}") | |
| print(f"IMAGE-LEVEL ROC-AUC: {roc_auc:.4f}") | |
| print(f"{'=' * 60}") | |
| # Find optimal threshold using Youden's J statistic | |
| fpr, tpr, thresholds = roc_curve(all_labels, all_scores) | |
| optimal_idx = np.argmax(tpr - fpr) | |
| optimal_threshold = thresholds[optimal_idx] | |
| print(f"\nOptimal threshold: {optimal_threshold:.4f}") | |
| # Compute precision and recall at optimal threshold | |
| predictions = (all_scores >= optimal_threshold).astype(int) | |
| tp = np.sum((predictions == 1) & (all_labels == 1)) | |
| fp = np.sum((predictions == 1) & (all_labels == 0)) | |
| fn = np.sum((predictions == 0) & (all_labels == 1)) | |
| tn = np.sum((predictions == 0) & (all_labels == 0)) | |
| precision = tp / (tp + fp) if (tp + fp) > 0 else 0 | |
| recall = tp / (tp + fn) if (tp + fn) > 0 else 0 | |
| f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0 | |
| accuracy = (tp + tn) / len(all_labels) | |
| print(f"\nMetrics at optimal threshold:") | |
| print(f" Precision: {precision:.4f}") | |
| print(f" Recall: {recall:.4f}") | |
| print(f" F1-Score: {f1:.4f}") | |
| print(f" Accuracy: {accuracy:.4f}") | |
| print(f"\nConfusion Matrix:") | |
| print(f" TP: {tp}, FP: {fp}") | |
| print(f" FN: {fn}, TN: {tn}") | |
| # Plot ROC curve | |
| roc_path = config.RESULTS_DIR / "roc_curve.png" | |
| plot_roc_curve(fpr, tpr, roc_auc, str(roc_path)) | |
| # Save results | |
| results = { | |
| 'roc_auc': float(roc_auc), | |
| 'optimal_threshold': float(optimal_threshold), | |
| 'precision': float(precision), | |
| 'recall': float(recall), | |
| 'f1_score': float(f1), | |
| 'accuracy': float(accuracy), | |
| 'confusion_matrix': { | |
| 'tp': int(tp), 'fp': int(fp), | |
| 'fn': int(fn), 'tn': int(tn) | |
| } | |
| } | |
| results_path = config.RESULTS_DIR / "evaluation_results.json" | |
| with open(results_path, 'w') as f: | |
| json.dump(results, f, indent=2) | |
| print(f"\nResults saved to {results_path}") | |
| print(f"Example predictions saved to {config.RESULTS_DIR}") | |
| return results | |
| if __name__ == "__main__": | |
| evaluate_padim() | |