faizan
fix: resolve all 468 ruff linting errors (code quality enforcement complete)
e77a25a
"""
CNN Model Architectures for MNIST Classification
This module provides CNN models for digit recognition:
- BaselineCNN: Simple 2-layer CNN (target: 98-99% accuracy)
- ImprovedCNN: Enhanced architecture with batch normalization
- Model utilities: parameter counting, architecture summary
Usage:
from scripts.models import BaselineCNN
model = BaselineCNN()
output = model(images) # (batch, 10) logits
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
from typing import Tuple
class BaselineCNN(nn.Module):
"""
Baseline CNN for MNIST classification.
Architecture:
Input: (batch, 1, 28, 28)
Conv1: 1 -> 32 channels, 3x3 kernel, padding=1
ReLU + MaxPool(2x2) -> (batch, 32, 14, 14)
Conv2: 32 -> 64 channels, 3x3 kernel, padding=1
ReLU + MaxPool(2x2) -> (batch, 64, 7, 7)
Flatten -> (batch, 3136)
FC1: 3136 -> 128, ReLU, Dropout(0.5)
FC2: 128 -> 10 (output logits)
Design Rationale:
- 2 conv layers: Balance between simplicity and capacity
- 32->64 filters: Standard progression, proven effective
- Dropout 0.5: Prevent overfitting on small dataset
- No batch norm: Keep baseline simple
Expected Performance:
- Parameters: ~110k
- Test accuracy: 98-99%
- Training time: ~5-10 min on GPU
"""
def __init__(self, dropout_rate: float = 0.5):
"""
Initialize baseline CNN.
Args:
dropout_rate: Dropout probability (default 0.5)
"""
super(BaselineCNN, self).__init__()
# Convolutional layers
self.conv1 = nn.Conv2d(
in_channels=1,
out_channels=32,
kernel_size=3,
padding=1
)
self.conv2 = nn.Conv2d(
in_channels=32,
out_channels=64,
kernel_size=3,
padding=1
)
# Pooling layer (shared)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
# Fully connected layers
# After two pooling layers: 28->14->7, so 64*7*7 = 3136
self.fc1 = nn.Linear(64 * 7 * 7, 128)
self.fc2 = nn.Linear(128, 10)
# Dropout for regularization
self.dropout = nn.Dropout(p=dropout_rate)
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""
Forward pass.
Args:
x: Input tensor of shape (batch, 1, 28, 28)
Returns:
Output logits of shape (batch, 10)
"""
# Conv block 1: Conv -> ReLU -> Pool
x = self.conv1(x) # (batch, 32, 28, 28)
x = F.relu(x)
x = self.pool(x) # (batch, 32, 14, 14)
# Conv block 2: Conv -> ReLU -> Pool
x = self.conv2(x) # (batch, 64, 14, 14)
x = F.relu(x)
x = self.pool(x) # (batch, 64, 7, 7)
# Flatten
x = x.view(-1, 64 * 7 * 7) # (batch, 3136)
# Fully connected layers
x = self.fc1(x) # (batch, 128)
x = F.relu(x)
x = self.dropout(x)
x = self.fc2(x) # (batch, 10)
return x
class ImprovedCNN(nn.Module):
"""
Enhanced CNN with batch normalization and deeper architecture.
Architecture:
Conv1: 1 -> 32, BatchNorm, ReLU, MaxPool
Conv2: 32 -> 64, BatchNorm, ReLU, MaxPool
Conv3: 64 -> 128, BatchNorm, ReLU, MaxPool
Flatten
FC1: 128*3*3 -> 256, BatchNorm, ReLU, Dropout(0.5)
FC2: 256 -> 10
Expected Performance:
- Parameters: ~200k
- Test accuracy: 99%+
- Converges faster than baseline
"""
def __init__(self, dropout_rate: float = 0.5):
"""
Initialize improved CNN.
Args:
dropout_rate: Dropout probability (default 0.5)
"""
super(ImprovedCNN, self).__init__()
# Convolutional layers with batch normalization
self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
self.bn1 = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
self.bn2 = nn.BatchNorm2d(64)
self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
self.bn3 = nn.BatchNorm2d(128)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
# Fully connected layers
# After three pooling layers: 28->14->7->3, so 128*3*3 = 1152
self.fc1 = nn.Linear(128 * 3 * 3, 256)
self.bn_fc = nn.BatchNorm1d(256)
self.fc2 = nn.Linear(256, 10)
self.dropout = nn.Dropout(p=dropout_rate)
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""
Forward pass.
Args:
x: Input tensor of shape (batch, 1, 28, 28)
Returns:
Output logits of shape (batch, 10)
"""
# Conv block 1
x = self.conv1(x)
x = self.bn1(x)
x = F.relu(x)
x = self.pool(x) # (batch, 32, 14, 14)
# Conv block 2
x = self.conv2(x)
x = self.bn2(x)
x = F.relu(x)
x = self.pool(x) # (batch, 64, 7, 7)
# Conv block 3
x = self.conv3(x)
x = self.bn3(x)
x = F.relu(x)
x = self.pool(x) # (batch, 128, 3, 3)
# Flatten
x = x.view(-1, 128 * 3 * 3)
# Fully connected layers
x = self.fc1(x)
x = self.bn_fc(x)
x = F.relu(x)
x = self.dropout(x)
x = self.fc2(x)
return x
def count_parameters(model: nn.Module) -> Tuple[int, int]:
"""
Count total and trainable parameters in model.
Args:
model: PyTorch model
Returns:
Tuple of (total_params, trainable_params)
"""
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
return total_params, trainable_params
def get_model_summary(
model: nn.Module, input_size: Tuple[int, ...] = (1, 1, 28, 28)
) -> str:
"""
Generate model architecture summary.
Args:
model: PyTorch model
input_size: Input tensor size (batch, channels, height, width)
Returns:
Formatted string with model summary
"""
total_params, trainable_params = count_parameters(model)
summary = []
summary.append("=" * 60)
summary.append(f"Model: {model.__class__.__name__}")
summary.append("=" * 60)
summary.append(f"Input size: {input_size}")
summary.append(f"Total parameters: {total_params:,}")
summary.append(f"Trainable parameters: {trainable_params:,}")
# Assuming float32
model_size_mb = total_params * 4 / (1024**2)
summary.append(f"Model size (MB): {model_size_mb:.2f}")
summary.append("=" * 60)
return "\n".join(summary)
def test_model(model: nn.Module, device: str = 'cpu') -> bool:
"""
Test model with dummy input.
Args:
model: PyTorch model
device: Device to run on ('cpu' or 'cuda')
Returns:
True if test passes, False otherwise
"""
try:
model = model.to(device)
model.eval()
# Create dummy input
dummy_input = torch.randn(4, 1, 28, 28).to(device)
# Forward pass
with torch.no_grad():
output = model(dummy_input)
# Check output shape
assert output.shape == (4, 10), f"Expected shape (4, 10), got {output.shape}"
# Check output is finite
assert torch.isfinite(output).all(), "Output contains NaN or Inf"
print("✓ Model test passed")
print(f" Input shape: {dummy_input.shape}")
print(f" Output shape: {output.shape}")
print(f" Output range: [{output.min():.4f}, {output.max():.4f}]")
return True
except Exception as e:
print(f"✗ Model test failed: {e}")
return False
if __name__ == "__main__":
"""Test model instantiation and forward pass."""
print("Testing BaselineCNN:")
print()
# Create model
model = BaselineCNN()
print(get_model_summary(model))
print()
# Test forward pass
test_model(model)
print()
# Test improved model
print("=" * 60)
print("Testing ImprovedCNN:")
print()
model_improved = ImprovedCNN()
print(get_model_summary(model_improved))
print()
test_model(model_improved)