Spaces:

nirmalpratheep
/

CIFAR100_ImageClassifier

Sleeping

HF Deploy

Deploy CIFAR-100 classifier

a92663e 8 months ago

16.4 kB

	"""
	CIFAR-100 Image Classifier - Hugging Face Space
	===============================================
	Advanced ResNet-34 model trained on CIFAR-100.

	Architecture: ResNet-34
	- 100 output classes across diverse object categories
	- Deep residual learning for robust feature extraction
	- Trained with Albumentations augmentations
	"""

	import torch
	import torch.nn.functional as F
	import gradio as gr
	from PIL import Image
	from pathlib import Path
	import numpy as np
	import cv2

	# Import model architecture and preprocessing
	from model_cifar import ResNet34
	from preprocess import CIFAR100_MEAN, CIFAR100_STD
	from torchvision import transforms

	# CIFAR-100 class names (official dataset labels)
	CIFAR100_CLASSES = [
	'apple', 'aquarium_fish', 'baby', 'bear', 'beaver', 'bed', 'bee', 'beetle', 'bicycle', 'bottle',
	'bowl', 'boy', 'bridge', 'bus', 'butterfly', 'camel', 'can', 'castle', 'caterpillar', 'cattle',
	'chair', 'chimpanzee', 'clock', 'cloud', 'cockroach', 'couch', 'crab', 'crocodile', 'cup', 'dinosaur',
	'dolphin', 'elephant', 'flatfish', 'forest', 'fox', 'girl', 'hamster', 'house', 'kangaroo', 'keyboard',
	'lamp', 'lawn_mower', 'leopard', 'lion', 'lizard', 'lobster', 'man', 'maple_tree', 'motorcycle', 'mountain',
	'mouse', 'mushroom', 'oak_tree', 'orange', 'orchid', 'otter', 'palm_tree', 'pear', 'pickup_truck', 'pine_tree',
	'plain', 'plate', 'poppy', 'porcupine', 'possum', 'rabbit', 'raccoon', 'ray', 'road', 'rocket', 'rose',
	'sea', 'seal', 'shark', 'shrew', 'skunk', 'skyscraper', 'snail', 'snake', 'spider', 'squirrel',
	'streetcar', 'sunflower', 'sweet_pepper', 'table', 'tank', 'telephone', 'television', 'tiger', 'tractor',
	'train', 'trout', 'tulip', 'turtle', 'wardrobe', 'whale', 'willow_tree', 'wolf', 'woman', 'worm'
	]

	# Device configuration
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# ---------------------------
	# Load Model
	# ---------------------------
	@torch.no_grad()
	def load_model(checkpoint_path: str = None):
	"""Load the trained ResNet-34 model for CIFAR-100. Reaching Accuracy of 76.68%"""
	model = ResNet34(num_classes=100).to(device)

	if checkpoint_path and Path(checkpoint_path).exists():
	try:
	checkpoint = torch.load(checkpoint_path, map_location=device)
	if 'model_state_dict' in checkpoint:
	model.load_state_dict(checkpoint['model_state_dict'])
	print(f"✅ Loaded checkpoint from epoch {checkpoint.get('epoch', '?')}")
	else:
	model.load_state_dict(checkpoint)
	print(f"✅ Loaded model weights from {checkpoint_path}")
	except Exception as e:
	print(f"⚠️ Could not load checkpoint: {e}")
	print("Using randomly initialized model")
	else:
	print("ℹ️ No checkpoint provided, using randomly initialized model")

	model.eval()
	return model


	print(f"Device: {device}")
	# Try to load the best checkpoint, fallback to epoch 99, then random init
	checkpoint_paths = [
	"./best_model.pth", # For HF Space deployment
	"./snapshots_complete/cifar_epoch_99.pth",
	None # Fallback to random initialization
	]

	model = None
	for checkpoint_path in checkpoint_paths:
	if checkpoint_path is None or Path(checkpoint_path).exists():
	model = load_model(checkpoint_path)
	break

	if model is None:
	model = load_model(None)

	# ---------------------------
	# Preprocessing pipeline
	# ---------------------------
	preprocess = transforms.Compose([
	transforms.Resize((32, 32)),
	transforms.ToTensor(),
	transforms.Normalize(mean=CIFAR100_MEAN, std=CIFAR100_STD)
	])

	preprocess_no_norm = transforms.Compose([
	transforms.Resize((32, 32)),
	transforms.ToTensor(),
	])


	# ---------------------------
	# Grad-CAM Implementation
	# ---------------------------
	class GradCAM:
	"""Grad-CAM: Visual Explanations from Deep Networks"""

	def __init__(self, model, target_layer):
	self.model = model
	self.target_layer = target_layer
	self.gradients = None
	self.activations = None

	# Register hooks
	target_layer.register_forward_hook(self.save_activation)
	target_layer.register_full_backward_hook(self.save_gradient)

	def save_activation(self, module, input, output):
	self.activations = output.detach()

	def save_gradient(self, module, grad_input, grad_output):
	self.gradients = grad_output[0].detach()

	def generate_cam(self, input_tensor, target_class=None):
	"""Generate Grad-CAM heatmap"""
	# Forward pass
	model_output = self.model(input_tensor)

	if target_class is None:
	target_class = model_output.argmax(dim=1).item()

	# Backward pass
	self.model.zero_grad()
	one_hot = torch.zeros_like(model_output)
	one_hot[0, target_class] = 1
	model_output.backward(gradient=one_hot, retain_graph=True)

	# Generate CAM
	gradients = self.gradients[0]
	activations = self.activations[0]

	# Global average pooling on gradients
	weights = gradients.mean(dim=(1, 2), keepdim=True)

	# Weighted combination of activation maps
	cam = (weights * activations).sum(dim=0)

	# Apply ReLU
	cam = F.relu(cam)

	# Normalize
	cam = cam - cam.min()
	if cam.max() > 0:
	cam = cam / cam.max()

	return cam.cpu().numpy(), target_class


	def apply_gradcam(image_pil, model, gradcam, top_class_idx):
	"""Apply Grad-CAM and overlay on original image"""
	# Prepare input
	img_tensor = preprocess(image_pil.convert("RGB")).unsqueeze(0).to(device)

	# Generate Grad-CAM
	cam, _ = gradcam.generate_cam(img_tensor, target_class=top_class_idx)

	# Fixed output size for better visibility
	output_size = 200

	# Resize CAM to match input size
	cam_resized = cv2.resize(cam, (32, 32))

	# Convert original image to numpy
	img_np = np.array(image_pil.resize((32, 32)))

	# Create heatmap
	heatmap = cv2.applyColorMap(np.uint8(255 * cam_resized), cv2.COLORMAP_JET)
	heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)

	# Overlay heatmap on original image
	overlay = cv2.addWeighted(img_np, 0.6, heatmap, 0.4, 0)

	# Resize to 200x200 for better visibility
	overlay_large = cv2.resize(overlay, (output_size, output_size), interpolation=cv2.INTER_LINEAR)
	heatmap_large = cv2.resize(heatmap, (output_size, output_size), interpolation=cv2.INTER_LINEAR)

	return overlay_large, heatmap_large


	# Initialize Grad-CAM (target the last convolutional layer)
	gradcam = GradCAM(model, model.layer4[-1].conv2)


	# ---------------------------
	# Prediction Function
	# ---------------------------
	def predict(image: Image.Image):
	"""Predict the class of an input image with Grad-CAM visualization."""
	if image is None:
	return {}, "<p style='color: red;'>Please upload an image first!</p>", None, None

	try:
	# Prepare input
	img_tensor = preprocess(image.convert("RGB")).unsqueeze(0).to(device)

	# Get predictions
	with torch.no_grad():
	outputs = model(img_tensor)
	probabilities = torch.softmax(outputs, dim=1)[0].cpu().numpy()

	sorted_indices = np.argsort(probabilities)[::-1]

	top3_results = {
	CIFAR100_CLASSES[i]: float(probabilities[i])
	for i in sorted_indices[:3]
	}

	predicted_class = CIFAR100_CLASSES[sorted_indices[0]]
	confidence = probabilities[sorted_indices[0]]

	# Generate Grad-CAM visualization
	try:
	overlay, heatmap = apply_gradcam(image, model, gradcam, sorted_indices[0])
	gradcam_overlay = Image.fromarray(overlay.astype(np.uint8))
	gradcam_heatmap = Image.fromarray(heatmap.astype(np.uint8))
	except Exception as e:
	print(f"Grad-CAM error: {e}")
	gradcam_overlay = None
	gradcam_heatmap = None

	# Create HTML output
	html_output = f"""
	<div style='padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	border-radius: 10px; color: white; box-shadow: 0 4px 6px rgba(0,0,0,0.1);'>
	<h2>🎯 Prediction Result</h2>
	<div style='font-size: 24px; font-weight: bold;'>{predicted_class.upper()}</div>
	<div style='font-size: 18px;'>Confidence: <strong>{confidence*100:.2f}%</strong></div>
	</div>
	<div style='margin-top: 20px; background: #f8f9fa; border-radius: 8px; padding: 15px;'>
	<h3>📊 Top 5 Predictions</h3>
	"""
	for i, idx in enumerate(sorted_indices[:5], 1):
	name = CIFAR100_CLASSES[idx]
	prob = probabilities[idx]
	bar_width = int(prob * 100)
	color = "#667eea" if i == 1 else ("#764ba2" if i == 2 else "#95a5a6")
	html_output += f"""
	<div style='margin: 8px 0;'>
	<div style='display: flex; justify-content: space-between;'>
	<span>{i}. {name}</span>
	<span style='font-weight:bold; color:{color}'>{prob*100:.2f}%</span>
	</div>
	<div style='background:#e9ecef; border-radius:4px; height:20px;'>
	<div style='width:{bar_width}%; background:{color}; height:100%; border-radius:4px;'></div>
	</div>
	</div>
	"""
	html_output += """
	</div>
	<div style='margin-top: 15px; padding: 10px; background: #e8f4f8; border-left: 4px solid #667eea; border-radius: 4px;'>
	<p style='margin: 0; color: #333;'><strong>💡 Grad-CAM Visualization:</strong> The heatmap shows which parts of the image the model focused on to make its prediction. Red/yellow areas indicate high importance.</p>
	</div>
	"""

	return top3_results, html_output, gradcam_overlay, gradcam_heatmap

	except Exception as e:
	return {}, f"<p style='color: red;'>Error during prediction: {str(e)}</p>", None, None


	# ---------------------------
	# Model Information Section
	# ---------------------------
	model_description = """
	## 🚀 About This Model
	Custom Lightweight ResNet trained on CIFAR-100 from scratch

	### 📊 Performance Metrics (100 Epochs)
	- Top-1 Accuracy: 76.68% ✅ (Target: 73%)
	- Top-3 Accuracy: 90.95%
	- Top-5 Accuracy: 94.07%
	- Best Test Accuracy: 76.79% (Epoch 99)
	- Macro F1-Score: 0.7670
	- Dataset: CIFAR-100 (50,000 train / 10,000 test)

	### 🏗️ Architecture
	- Model: Custom ResNet-34 variant (CIFAR-optimized)
	- Parameters: 4,949,412 (~5M)
	- Depth: 10 weight layers (1 stem + 8 residual + 1 FC)
	- Design: 4 BasicBlocks with [1,1,1,1] configuration
	- Key Features:
	- 3×3 initial conv (no 7×7, no MaxPool)
	- Progressive downsampling: 32×32 → 16×16 → 8×8 → 4×4
	- Channel expansion: 64 → 128 → 256 → 512
	- Receptive field: 63×63 (covers full 32×32 image)
	- Global Average Pooling + Linear(512 → 100)

	### 🎯 Training Configuration
	- Optimizer: SGD with Nesterov momentum (0.9)
	- LR Schedule: OneCycle (0.01 → 0.1 → 0.01 → 0.001)
	- Phase 1: 41 epochs warmup
	- Phase 2: 41 epochs cooldown
	- Phase 3: 18 epochs annihilation
	- Augmentations: Albumentations (Flip, ShiftScaleRotate, CoarseDropout, ColorJitter)
	- Regularization: Weight decay (1e-4), Label smoothing (0.1)
	- Mixed Precision: Enabled (AMP)
	- Batch Size: 512

	### 💡 CIFAR-100 Classes
	100 fine-grained categories across 20 superclasses:
	- 🦁 Animals: lion, tiger, elephant, whale, bear, leopard, wolf
	- 🚗 Vehicles: pickup_truck, bus, train, streetcar, motorcycle, tractor
	- 🌳 Trees: maple_tree, oak_tree, palm_tree, pine_tree, willow_tree
	- 🌺 Flowers: rose, poppy, orchid, sunflower, tulip
	- 🐠 Aquatic: aquarium_fish, flatfish, ray, shark, trout
	- 🏠 Structures: house, castle, skyscraper, bridge, road
	- 🍎 Food: apple, orange, pear, sweet_pepper, mushroom
	- 👨 People: man, woman, baby, boy, girl
	- 🪑 Furniture: bed, chair, couch, table, wardrobe
	- And many more!

	### 🏆 Best Performing Classes (F1-Score)
	1. Wardrobe - 94.58%
	2. Sunflower - 93.81%
	3. Poppy - 93.15%
	4. Can - 93.10%
	5. Skyscraper - 91.00%

	### 🚀 Deployment
	- Trained without pre-trained weights
	- Built with PyTorch and Albumentations
	- Deployed on Hugging Face Spaces
	- Inference optimized for CPU/GPU
	"""

	# Example images (curated selection from available examples)
	examples = []
	example_dir = Path("examples")
	if example_dir.exists():
	# Get one example from each category (prioritize _1.jpg files for consistency)
	priority_examples = [
	"lion_1.jpg", "tiger_1.jpg", "elephant_1.jpg", "bear_1.jpg",
	"pickup_truck_1.jpg", "bus_1.jpg", "train_1.jpg",
	"rose_1.jpg", "sunflower_1.jpg", "tulip_1.jpg",
	"apple_1.jpg", "orange_1.jpg", "pear_1.jpg",
	"castle_1.jpg", "skyscraper_1.jpg", "house_1.jpg"
	]

	for filename in priority_examples:
	file_path = example_dir / filename
	if file_path.exists():
	examples.append([str(file_path)])
	if len(examples) >= 12: # Limit to 12 examples for clean UI
	break

	# If we don't have enough from priority list, add random ones
	if len(examples) < 12:
	all_examples = list(example_dir.glob("*_1.jpg")) # Get first of each category
	for ex in all_examples:
	if str(ex) not in [e[0] for e in examples]:
	examples.append([str(ex)])
	if len(examples) >= 12:
	break


	# ---------------------------
	# Gradio UI
	# ---------------------------
	custom_css = """
	.gradio-container { font-family: 'Inter', sans-serif; }
	.output-html { font-family: 'Inter', sans-serif; }
	"""

	with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🎯 CIFAR-100 Image Classifier with Grad-CAM")
	gr.Markdown("### Deep ResNet-34 trained on 100 object categories • Explainable AI with Grad-CAM heatmaps")

	with gr.Row():
	# Left Column: Input Image
	with gr.Column(scale=1):
	image_input = gr.Image(type="pil", label="Upload Image", height=500)
	predict_btn = gr.Button("🚀 Classify Image", variant="primary", size="lg")
	gr.Markdown("Upload an image belonging to one of the CIFAR-100 categories.")

	# Right Column: Grad-CAM first, then Predictions
	with gr.Column(scale=1):
	# Grad-CAM visualizations at the top (side by side)
	with gr.Row():
	with gr.Column(scale=1):
	gradcam_overlay_output = gr.Image(label="🔥 Grad-CAM Overlay", type="pil", height=200)
	gr.Markdown("Overlay - Model attention on image")

	with gr.Column(scale=1):
	gradcam_heatmap_output = gr.Image(label="🌡️ Grad-CAM Heatmap", type="pil", height=200)
	gr.Markdown("Heatmap - Red = high importance")

	# Predictions below Grad-CAM
	label_output = gr.Label(num_top_classes=3, label="Top 3 Predictions")
	html_output = gr.HTML(label="Detailed Results")

	if examples:
	gr.Examples(
	examples=examples,
	inputs=image_input,
	outputs=[label_output, html_output, gradcam_overlay_output, gradcam_heatmap_output],
	fn=predict,
	cache_examples=False,
	)

	with gr.Accordion("📖 Model Information & Performance Metrics", open=False):
	gr.Markdown(model_description)

	predict_btn.click(
	fn=predict,
	inputs=image_input,
	outputs=[label_output, html_output, gradcam_overlay_output, gradcam_heatmap_output]
	)
	image_input.change(
	fn=predict,
	inputs=image_input,
	outputs=[label_output, html_output, gradcam_overlay_output, gradcam_heatmap_output]
	)

	# ---------------------------
	# Launch
	# ---------------------------
	if __name__ == "__main__":
	demo.launch()