Spaces:

jalFaizy
/

mnist-digit-classifier

Sleeping

mnist-digit-classifier / scripts /models.py

faizan

fix: resolve all 468 ruff linting errors (code quality enforcement complete)

e77a25a 2 months ago

8.39 kB

	"""
	CNN Model Architectures for MNIST Classification

	This module provides CNN models for digit recognition:
	- BaselineCNN: Simple 2-layer CNN (target: 98-99% accuracy)
	- ImprovedCNN: Enhanced architecture with batch normalization
	- Model utilities: parameter counting, architecture summary

	Usage:
	from scripts.models import BaselineCNN

	model = BaselineCNN()
	output = model(images) # (batch, 10) logits
	"""

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from typing import Tuple


	class BaselineCNN(nn.Module):
	"""
	Baseline CNN for MNIST classification.

	Architecture:
	Input: (batch, 1, 28, 28)
	Conv1: 1 -> 32 channels, 3x3 kernel, padding=1
	ReLU + MaxPool(2x2) -> (batch, 32, 14, 14)
	Conv2: 32 -> 64 channels, 3x3 kernel, padding=1
	ReLU + MaxPool(2x2) -> (batch, 64, 7, 7)
	Flatten -> (batch, 3136)
	FC1: 3136 -> 128, ReLU, Dropout(0.5)
	FC2: 128 -> 10 (output logits)

	Design Rationale:
	- 2 conv layers: Balance between simplicity and capacity
	- 32->64 filters: Standard progression, proven effective
	- Dropout 0.5: Prevent overfitting on small dataset
	- No batch norm: Keep baseline simple

	Expected Performance:
	- Parameters: ~110k
	- Test accuracy: 98-99%
	- Training time: ~5-10 min on GPU
	"""

	def __init__(self, dropout_rate: float = 0.5):
	"""
	Initialize baseline CNN.

	Args:
	dropout_rate: Dropout probability (default 0.5)
	"""
	super(BaselineCNN, self).__init__()

	# Convolutional layers
	self.conv1 = nn.Conv2d(
	in_channels=1,
	out_channels=32,
	kernel_size=3,
	padding=1
	)
	self.conv2 = nn.Conv2d(
	in_channels=32,
	out_channels=64,
	kernel_size=3,
	padding=1
	)

	# Pooling layer (shared)
	self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

	# Fully connected layers
	# After two pooling layers: 28->14->7, so 6477 = 3136
	self.fc1 = nn.Linear(64 * 7 * 7, 128)
	self.fc2 = nn.Linear(128, 10)

	# Dropout for regularization
	self.dropout = nn.Dropout(p=dropout_rate)

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	"""
	Forward pass.

	Args:
	x: Input tensor of shape (batch, 1, 28, 28)

	Returns:
	Output logits of shape (batch, 10)
	"""
	# Conv block 1: Conv -> ReLU -> Pool
	x = self.conv1(x) # (batch, 32, 28, 28)
	x = F.relu(x)
	x = self.pool(x) # (batch, 32, 14, 14)

	# Conv block 2: Conv -> ReLU -> Pool
	x = self.conv2(x) # (batch, 64, 14, 14)
	x = F.relu(x)
	x = self.pool(x) # (batch, 64, 7, 7)

	# Flatten
	x = x.view(-1, 64 * 7 * 7) # (batch, 3136)

	# Fully connected layers
	x = self.fc1(x) # (batch, 128)
	x = F.relu(x)
	x = self.dropout(x)
	x = self.fc2(x) # (batch, 10)

	return x


	class ImprovedCNN(nn.Module):
	"""
	Enhanced CNN with batch normalization and deeper architecture.

	Architecture:
	Conv1: 1 -> 32, BatchNorm, ReLU, MaxPool
	Conv2: 32 -> 64, BatchNorm, ReLU, MaxPool
	Conv3: 64 -> 128, BatchNorm, ReLU, MaxPool
	Flatten
	FC1: 12833 -> 256, BatchNorm, ReLU, Dropout(0.5)
	FC2: 256 -> 10

	Expected Performance:
	- Parameters: ~200k
	- Test accuracy: 99%+
	- Converges faster than baseline
	"""

	def __init__(self, dropout_rate: float = 0.5):
	"""
	Initialize improved CNN.

	Args:
	dropout_rate: Dropout probability (default 0.5)
	"""
	super(ImprovedCNN, self).__init__()

	# Convolutional layers with batch normalization
	self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
	self.bn1 = nn.BatchNorm2d(32)

	self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
	self.bn2 = nn.BatchNorm2d(64)

	self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
	self.bn3 = nn.BatchNorm2d(128)

	self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

	# Fully connected layers
	# After three pooling layers: 28->14->7->3, so 12833 = 1152
	self.fc1 = nn.Linear(128 * 3 * 3, 256)
	self.bn_fc = nn.BatchNorm1d(256)
	self.fc2 = nn.Linear(256, 10)

	self.dropout = nn.Dropout(p=dropout_rate)

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	"""
	Forward pass.

	Args:
	x: Input tensor of shape (batch, 1, 28, 28)

	Returns:
	Output logits of shape (batch, 10)
	"""
	# Conv block 1
	x = self.conv1(x)
	x = self.bn1(x)
	x = F.relu(x)
	x = self.pool(x) # (batch, 32, 14, 14)

	# Conv block 2
	x = self.conv2(x)
	x = self.bn2(x)
	x = F.relu(x)
	x = self.pool(x) # (batch, 64, 7, 7)

	# Conv block 3
	x = self.conv3(x)
	x = self.bn3(x)
	x = F.relu(x)
	x = self.pool(x) # (batch, 128, 3, 3)

	# Flatten
	x = x.view(-1, 128 * 3 * 3)

	# Fully connected layers
	x = self.fc1(x)
	x = self.bn_fc(x)
	x = F.relu(x)
	x = self.dropout(x)
	x = self.fc2(x)

	return x


	def count_parameters(model: nn.Module) -> Tuple[int, int]:
	"""
	Count total and trainable parameters in model.

	Args:
	model: PyTorch model

	Returns:
	Tuple of (total_params, trainable_params)
	"""
	total_params = sum(p.numel() for p in model.parameters())
	trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
	return total_params, trainable_params


	def get_model_summary(
	model: nn.Module, input_size: Tuple[int, ...] = (1, 1, 28, 28)
	) -> str:
	"""
	Generate model architecture summary.

	Args:
	model: PyTorch model
	input_size: Input tensor size (batch, channels, height, width)

	Returns:
	Formatted string with model summary
	"""
	total_params, trainable_params = count_parameters(model)

	summary = []
	summary.append("=" * 60)
	summary.append(f"Model: {model.__class__.__name__}")
	summary.append("=" * 60)
	summary.append(f"Input size: {input_size}")
	summary.append(f"Total parameters: {total_params:,}")
	summary.append(f"Trainable parameters: {trainable_params:,}")
	# Assuming float32
	model_size_mb = total_params * 4 / (1024**2)
	summary.append(f"Model size (MB): {model_size_mb:.2f}")
	summary.append("=" * 60)

	return "\n".join(summary)


	def test_model(model: nn.Module, device: str = 'cpu') -> bool:
	"""
	Test model with dummy input.

	Args:
	model: PyTorch model
	device: Device to run on ('cpu' or 'cuda')

	Returns:
	True if test passes, False otherwise
	"""
	try:
	model = model.to(device)
	model.eval()

	# Create dummy input
	dummy_input = torch.randn(4, 1, 28, 28).to(device)

	# Forward pass
	with torch.no_grad():
	output = model(dummy_input)

	# Check output shape
	assert output.shape == (4, 10), f"Expected shape (4, 10), got {output.shape}"

	# Check output is finite
	assert torch.isfinite(output).all(), "Output contains NaN or Inf"

	print("✓ Model test passed")
	print(f" Input shape: {dummy_input.shape}")
	print(f" Output shape: {output.shape}")
	print(f" Output range: [{output.min():.4f}, {output.max():.4f}]")

	return True

	except Exception as e:
	print(f"✗ Model test failed: {e}")
	return False


	if __name__ == "__main__":
	"""Test model instantiation and forward pass."""
	print("Testing BaselineCNN:")
	print()

	# Create model
	model = BaselineCNN()
	print(get_model_summary(model))
	print()

	# Test forward pass
	test_model(model)
	print()

	# Test improved model
	print("=" * 60)
	print("Testing ImprovedCNN:")
	print()

	model_improved = ImprovedCNN()
	print(get_model_summary(model_improved))
	print()

	test_model(model_improved)