Ameya729's picture
Upload 7 files
b67cb70 verified
"""
Evaluation script for PaDiM anomaly detection model
"""
import torch
import numpy as np
from tqdm import tqdm
from pathlib import Path
from sklearn.metrics import roc_auc_score, roc_curve, precision_recall_curve
import sys
import json
sys.path.append(str(Path(__file__).parent))
import config
from src.data_loader import get_dataloader
from src.feature_extractor import FeatureExtractor, extract_embeddings
from src.padim import PaDiM
from src.visualize import plot_roc_curve, save_prediction
from PIL import Image
def evaluate_padim():
"""Evaluate PaDiM model on test data"""
print("=" * 60)
print("AUTOMATED TABLET DEFECT DETECTION - EVALUATION")
print("=" * 60)
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# Load model
print("\nLoading trained model...")
model_path = config.MODEL_DIR / "padim_model.pkl"
if not model_path.exists():
raise FileNotFoundError(f"Model not found at {model_path}. Run train.py first.")
padim_model = PaDiM()
padim_model.load(model_path)
# Initialize feature extractor
print("Initializing feature extractor...")
extractor = FeatureExtractor(
backbone=config.BACKBONE,
layers=config.FEATURE_LAYERS
).to(device)
# Evaluate on test set
print("\nEvaluating on test set...")
all_scores = []
all_labels = []
all_predictions = []
defect_types = ["good"] + config.DEFECT_TYPES
for defect_type in defect_types:
test_dir = config.TEST_DIR / defect_type
if not test_dir.exists():
print(f"Skipping {defect_type} (directory not found)")
continue
print(f"\nProcessing {defect_type}...")
# Ground truth: 0 for good, 1 for defect
is_defect = 1 if defect_type != "good" else 0
# Get dataloader
test_loader = get_dataloader(test_dir, batch_size=1, shuffle=False)
for images, paths, _ in tqdm(test_loader):
images = images.to(device)
# Extract embeddings
with torch.no_grad():
embeddings = extract_embeddings(extractor, images)
# Predict anomaly
embeddings_np = embeddings.cpu().numpy()
anomaly_score, anomaly_map = padim_model.predict(embeddings_np)
all_scores.append(anomaly_score)
all_labels.append(is_defect)
# Save some example predictions
if len(all_predictions) < 20: # Save first 20 examples
img_path = paths[0]
img = Image.open(img_path)
save_path = config.RESULTS_DIR / f"{defect_type}_{Path(img_path).name}"
save_prediction(img, anomaly_score, anomaly_map, str(save_path))
all_predictions.append({
'image': img_path,
'score': float(anomaly_score),
'label': is_defect
})
# Compute metrics
all_scores = np.array(all_scores)
all_labels = np.array(all_labels)
# ROC-AUC
roc_auc = roc_auc_score(all_labels, all_scores)
print(f"\n{'=' * 60}")
print(f"IMAGE-LEVEL ROC-AUC: {roc_auc:.4f}")
print(f"{'=' * 60}")
# Find optimal threshold using Youden's J statistic
fpr, tpr, thresholds = roc_curve(all_labels, all_scores)
optimal_idx = np.argmax(tpr - fpr)
optimal_threshold = thresholds[optimal_idx]
print(f"\nOptimal threshold: {optimal_threshold:.4f}")
# Compute precision and recall at optimal threshold
predictions = (all_scores >= optimal_threshold).astype(int)
tp = np.sum((predictions == 1) & (all_labels == 1))
fp = np.sum((predictions == 1) & (all_labels == 0))
fn = np.sum((predictions == 0) & (all_labels == 1))
tn = np.sum((predictions == 0) & (all_labels == 0))
precision = tp / (tp + fp) if (tp + fp) > 0 else 0
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
accuracy = (tp + tn) / len(all_labels)
print(f"\nMetrics at optimal threshold:")
print(f" Precision: {precision:.4f}")
print(f" Recall: {recall:.4f}")
print(f" F1-Score: {f1:.4f}")
print(f" Accuracy: {accuracy:.4f}")
print(f"\nConfusion Matrix:")
print(f" TP: {tp}, FP: {fp}")
print(f" FN: {fn}, TN: {tn}")
# Plot ROC curve
roc_path = config.RESULTS_DIR / "roc_curve.png"
plot_roc_curve(fpr, tpr, roc_auc, str(roc_path))
# Save results
results = {
'roc_auc': float(roc_auc),
'optimal_threshold': float(optimal_threshold),
'precision': float(precision),
'recall': float(recall),
'f1_score': float(f1),
'accuracy': float(accuracy),
'confusion_matrix': {
'tp': int(tp), 'fp': int(fp),
'fn': int(fn), 'tn': int(tn)
}
}
results_path = config.RESULTS_DIR / "evaluation_results.json"
with open(results_path, 'w') as f:
json.dump(results, f, indent=2)
print(f"\nResults saved to {results_path}")
print(f"Example predictions saved to {config.RESULTS_DIR}")
return results
if __name__ == "__main__":
evaluate_padim()