Spaces:

vivek12coder
/

crop_ai_diseases

Sleeping

App Files Files Community

crop_ai_diseases / src /evaluate.py

vivek12coder

Upload 20960 files

c8df794 verified 5 months ago

raw

history blame contribute delete

9.74 kB

	"""
	Model evaluation script for crop disease detection
	"""

	import torch
	import torch.nn as nn
	import numpy as np
	import json
	from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
	from sklearn.metrics import precision_recall_fscore_support
	import matplotlib.pyplot as plt
	import seaborn as sns
	from pathlib import Path

	from .dataset import create_data_loaders
	from .model import create_model, load_checkpoint

	class ModelEvaluator:
	"""Evaluate trained model performance"""

	def __init__(self, model, test_loader, class_names, device='cpu'):
	self.model = model
	self.test_loader = test_loader
	self.class_names = class_names
	self.device = device

	def evaluate(self):
	"""Evaluate model on test dataset"""
	self.model.eval()

	all_preds = []
	all_labels = []
	all_probs = []

	with torch.no_grad():
	for inputs, labels in self.test_loader:
	inputs = inputs.to(self.device)
	labels = labels.to(self.device)

	outputs = self.model(inputs)
	probs = torch.softmax(outputs, dim=1)
	_, preds = torch.max(outputs, 1)

	all_preds.extend(preds.cpu().numpy())
	all_labels.extend(labels.cpu().numpy())
	all_probs.extend(probs.cpu().numpy())

	return np.array(all_preds), np.array(all_labels), np.array(all_probs)

	def calculate_metrics(self, y_true, y_pred, y_probs):
	"""Calculate comprehensive evaluation metrics"""

	# Basic metrics
	accuracy = accuracy_score(y_true, y_pred)

	# Per-class metrics
	precision, recall, f1, support = precision_recall_fscore_support(
	y_true, y_pred, average=None, labels=range(len(self.class_names))
	)

	# Macro and weighted averages
	precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
	y_true, y_pred, average='macro'
	)

	precision_weighted, recall_weighted, f1_weighted, _ = precision_recall_fscore_support(
	y_true, y_pred, average='weighted'
	)

	# Confusion matrix
	cm = confusion_matrix(y_true, y_pred)

	# Classification report
	class_report = classification_report(
	y_true, y_pred,
	target_names=self.class_names,
	output_dict=True
	)

	metrics = {
	'accuracy': float(accuracy),
	'precision_macro': float(precision_macro),
	'recall_macro': float(recall_macro),
	'f1_macro': float(f1_macro),
	'precision_weighted': float(precision_weighted),
	'recall_weighted': float(recall_weighted),
	'f1_weighted': float(f1_weighted),
	'per_class_metrics': {
	'precision': precision.tolist(),
	'recall': recall.tolist(),
	'f1_score': f1.tolist(),
	'support': support.tolist()
	},
	'confusion_matrix': cm.tolist(),
	'classification_report': class_report
	}

	return metrics

	def plot_confusion_matrix(self, cm, save_path='outputs/confusion_matrix.png'):
	"""Plot and save confusion matrix"""
	plt.figure(figsize=(12, 10))

	# Normalize confusion matrix
	cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

	# Create heatmap
	sns.heatmap(
	cm_normalized,
	annot=True,
	fmt='.2f',
	cmap='Blues',
	xticklabels=[name.replace('___', '\n') for name in self.class_names],
	yticklabels=[name.replace('___', '\n') for name in self.class_names],
	cbar_kws={'label': 'Normalized Frequency'}
	)

	plt.title('Confusion Matrix (Normalized)', fontsize=16, pad=20)
	plt.xlabel('Predicted Label', fontsize=12)
	plt.ylabel('True Label', fontsize=12)
	plt.xticks(rotation=45, ha='right')
	plt.yticks(rotation=0)
	plt.tight_layout()

	# Save plot
	Path(save_path).parent.mkdir(parents=True, exist_ok=True)
	plt.savefig(save_path, dpi=300, bbox_inches='tight')
	plt.close()

	print(f"Confusion matrix saved to: {save_path}")

	def plot_per_class_metrics(self, metrics, save_path='outputs/per_class_metrics.png'):
	"""Plot per-class performance metrics"""
	fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 6))

	class_names_short = [name.replace('___', '\n') for name in self.class_names]
	x_pos = np.arange(len(self.class_names))

	# Precision
	ax1.bar(x_pos, metrics['per_class_metrics']['precision'], color='skyblue', alpha=0.7)
	ax1.set_title('Precision per Class')
	ax1.set_ylabel('Precision')
	ax1.set_xticks(x_pos)
	ax1.set_xticklabels(class_names_short, rotation=45, ha='right')
	ax1.set_ylim(0, 1)
	ax1.grid(True, alpha=0.3)

	# Recall
	ax2.bar(x_pos, metrics['per_class_metrics']['recall'], color='lightcoral', alpha=0.7)
	ax2.set_title('Recall per Class')
	ax2.set_ylabel('Recall')
	ax2.set_xticks(x_pos)
	ax2.set_xticklabels(class_names_short, rotation=45, ha='right')
	ax2.set_ylim(0, 1)
	ax2.grid(True, alpha=0.3)

	# F1-Score
	ax3.bar(x_pos, metrics['per_class_metrics']['f1_score'], color='lightgreen', alpha=0.7)
	ax3.set_title('F1-Score per Class')
	ax3.set_ylabel('F1-Score')
	ax3.set_xticks(x_pos)
	ax3.set_xticklabels(class_names_short, rotation=45, ha='right')
	ax3.set_ylim(0, 1)
	ax3.grid(True, alpha=0.3)

	plt.tight_layout()
	plt.savefig(save_path, dpi=300, bbox_inches='tight')
	plt.close()

	print(f"Per-class metrics plot saved to: {save_path}")

	def save_results(self, metrics, save_path='outputs/results.json'):
	"""Save evaluation results to JSON file"""

	# Add class names to results
	results = {
	'class_names': self.class_names,
	'num_classes': len(self.class_names),
	'test_samples': len(self.test_loader.dataset),
	'metrics': metrics,
	'model_info': {
	'architecture': 'ResNet50',
	'pretrained': True,
	'transfer_learning': True
	}
	}

	# Save to file
	Path(save_path).parent.mkdir(parents=True, exist_ok=True)
	with open(save_path, 'w') as f:
	json.dump(results, f, indent=2)

	print(f"Results saved to: {save_path}")

	return results

	def print_summary(self, metrics):
	"""Print evaluation summary"""
	print("\n" + "="*60)
	print("MODEL EVALUATION SUMMARY")
	print("="*60)
	print(f"Test Accuracy: {metrics['accuracy']:.4f}")
	print(f"Precision (Macro): {metrics['precision_macro']:.4f}")
	print(f"Recall (Macro): {metrics['recall_macro']:.4f}")
	print(f"F1-Score (Macro): {metrics['f1_macro']:.4f}")
	print(f"F1-Score (Weighted): {metrics['f1_weighted']:.4f}")
	print("\nPer-Class Performance:")
	print("-" * 60)

	for i, class_name in enumerate(self.class_names):
	precision = metrics['per_class_metrics']['precision'][i]
	recall = metrics['per_class_metrics']['recall'][i]
	f1 = metrics['per_class_metrics']['f1_score'][i]
	support = metrics['per_class_metrics']['support'][i]

	print(f"{class_name:40} \| P: {precision:.3f} \| R: {recall:.3f} \| F1: {f1:.3f} \| N: {support:2d}")

	print("="*60)

	def evaluate_model(checkpoint_path, data_dir='data', batch_size=32):
	"""Main evaluation function"""

	# Device setup
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	print(f"Using device: {device}")

	# Load data
	print("Loading test dataset...")
	_, _, test_loader, class_names = create_data_loaders(
	data_dir=data_dir,
	batch_size=batch_size,
	num_workers=0
	)

	print(f"Test dataset loaded: {len(test_loader.dataset)} samples")

	# Create and load model
	print("Loading trained model...")
	model = create_model(num_classes=len(class_names), device=device)

	try:
	model, _, epoch, _ = load_checkpoint(checkpoint_path, model, device=device)
	print(f"Model loaded successfully from epoch {epoch}")
	except Exception as e:
	print(f"Error loading checkpoint: {e}")
	print("Using untrained model for testing...")

	# Create evaluator
	evaluator = ModelEvaluator(model, test_loader, class_names, device)

	# Run evaluation
	print("Evaluating model...")
	y_pred, y_true, y_probs = evaluator.evaluate()

	# Calculate metrics
	metrics = evaluator.calculate_metrics(y_true, y_pred, y_probs)

	# Print summary
	evaluator.print_summary(metrics)

	# Generate plots
	evaluator.plot_confusion_matrix(metrics['confusion_matrix'])
	evaluator.plot_per_class_metrics(metrics)

	# Save results
	results = evaluator.save_results(metrics)

	return results

	if __name__ == "__main__":
	# Evaluate the trained model
	results = evaluate_model(
	checkpoint_path='models/crop_disease_resnet50.pth',
	data_dir='data',
	batch_size=16
	)