|
|
""" |
|
|
CIFAR100 ResNet-34 Model Definition with Bottleneck Layers |
|
|
Contains the model architecture classes for CIFAR100 classification. |
|
|
|
|
|
This module provides: |
|
|
- ModelConfig: Configuration for model architecture |
|
|
- BottleneckBlock: 1x1 bottleneck convolution block |
|
|
- BasicBlock: Basic residual block |
|
|
- CIFAR100ResNet34: ResNet-34 architecture for CIFAR-100 |
|
|
|
|
|
Author: Krishnakanth |
|
|
Date: 2025-10-10 |
|
|
""" |
|
|
|
|
|
import torch |
|
|
import torch.nn as nn |
|
|
import torch.nn.functional as F |
|
|
from typing import Tuple |
|
|
from dataclasses import dataclass |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass |
|
|
class ModelConfig: |
|
|
"""Configuration for model architecture.""" |
|
|
input_channels: int = 3 |
|
|
input_size: Tuple[int, int] = (32, 32) |
|
|
num_classes: int = 100 |
|
|
dropout_rate: float = 0.05 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class BottleneckBlock(nn.Module): |
|
|
""" |
|
|
1x1 Bottleneck block for ResNet architecture. |
|
|
Reduces computational complexity by using 1x1 convolutions to reduce and expand channels. |
|
|
""" |
|
|
|
|
|
def __init__(self, in_channels, out_channels, stride=1, downsample=None, dropout_rate=0.0): |
|
|
super(BottleneckBlock, self).__init__() |
|
|
|
|
|
|
|
|
self.conv1 = nn.Conv2d(in_channels, out_channels // 4, kernel_size=1, bias=False) |
|
|
self.bn1 = nn.BatchNorm2d(out_channels // 4) |
|
|
|
|
|
|
|
|
self.conv2 = nn.Conv2d(out_channels // 4, out_channels // 4, kernel_size=3, |
|
|
stride=stride, padding=1, bias=False) |
|
|
self.bn2 = nn.BatchNorm2d(out_channels // 4) |
|
|
|
|
|
|
|
|
self.conv3 = nn.Conv2d(out_channels // 4, out_channels, kernel_size=1, bias=False) |
|
|
self.bn3 = nn.BatchNorm2d(out_channels) |
|
|
|
|
|
self.relu = nn.ReLU(inplace=True) |
|
|
self.downsample = downsample |
|
|
self.stride = stride |
|
|
self.dropout = nn.Dropout2d(dropout_rate) if dropout_rate > 0 else None |
|
|
|
|
|
def forward(self, x): |
|
|
residual = x |
|
|
|
|
|
out = self.conv1(x) |
|
|
out = self.bn1(out) |
|
|
out = self.relu(out) |
|
|
|
|
|
out = self.conv2(out) |
|
|
out = self.bn2(out) |
|
|
out = self.relu(out) |
|
|
|
|
|
out = self.conv3(out) |
|
|
out = self.bn3(out) |
|
|
|
|
|
if self.downsample is not None: |
|
|
residual = self.downsample(x) |
|
|
|
|
|
out += residual |
|
|
out = self.relu(out) |
|
|
|
|
|
if self.dropout is not None: |
|
|
out = self.dropout(out) |
|
|
|
|
|
return out |
|
|
|
|
|
|
|
|
class BasicBlock(nn.Module): |
|
|
"""Basic residual block for ResNet.""" |
|
|
|
|
|
def __init__(self, in_channels, out_channels, stride=1, downsample=None, dropout_rate=0.0): |
|
|
super(BasicBlock, self).__init__() |
|
|
|
|
|
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, |
|
|
stride=stride, padding=1, bias=False) |
|
|
self.bn1 = nn.BatchNorm2d(out_channels) |
|
|
|
|
|
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, |
|
|
stride=1, padding=1, bias=False) |
|
|
self.bn2 = nn.BatchNorm2d(out_channels) |
|
|
|
|
|
self.relu = nn.ReLU(inplace=True) |
|
|
self.downsample = downsample |
|
|
self.stride = stride |
|
|
self.dropout = nn.Dropout2d(dropout_rate) if dropout_rate > 0 else None |
|
|
|
|
|
def forward(self, x): |
|
|
residual = x |
|
|
|
|
|
out = self.conv1(x) |
|
|
out = self.bn1(out) |
|
|
out = self.relu(out) |
|
|
|
|
|
out = self.conv2(out) |
|
|
out = self.bn2(out) |
|
|
|
|
|
if self.downsample is not None: |
|
|
residual = self.downsample(x) |
|
|
|
|
|
out += residual |
|
|
out = self.relu(out) |
|
|
|
|
|
if self.dropout is not None: |
|
|
out = self.dropout(out) |
|
|
|
|
|
return out |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class CIFAR100ResNet34(nn.Module): |
|
|
""" |
|
|
ResNet-34 architecture for CIFAR-100. |
|
|
Uses BasicBlock with the 3-4-6-3 layer structure of ResNet-34. |
|
|
""" |
|
|
|
|
|
def __init__(self, config: ModelConfig): |
|
|
super(CIFAR100ResNet34, self).__init__() |
|
|
self.config = config |
|
|
|
|
|
|
|
|
self.conv1 = nn.Conv2d(config.input_channels, 64, kernel_size=3, stride=1, padding=1, bias=False) |
|
|
self.bn1 = nn.BatchNorm2d(64) |
|
|
self.relu = nn.ReLU(inplace=True) |
|
|
|
|
|
|
|
|
|
|
|
self.layer1 = self._make_layer(BasicBlock, 64, 64, 3, stride=1, dropout_rate=config.dropout_rate) |
|
|
|
|
|
|
|
|
self.layer2 = self._make_layer(BasicBlock, 64, 128, 4, stride=2, dropout_rate=config.dropout_rate) |
|
|
|
|
|
|
|
|
self.layer3 = self._make_layer(BasicBlock, 128, 256, 6, stride=2, dropout_rate=config.dropout_rate) |
|
|
|
|
|
|
|
|
self.layer4 = self._make_layer(BasicBlock, 256, 512, 3, stride=2, dropout_rate=config.dropout_rate) |
|
|
|
|
|
|
|
|
self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) |
|
|
self.dropout = nn.Dropout(config.dropout_rate) |
|
|
self.fc = nn.Linear(512, config.num_classes) |
|
|
|
|
|
|
|
|
self._initialize_weights() |
|
|
|
|
|
def _make_layer(self, block, in_channels, out_channels, blocks, stride=1, dropout_rate=0.0): |
|
|
"""Create a layer with specified number of blocks.""" |
|
|
downsample = None |
|
|
|
|
|
|
|
|
if stride != 1 or in_channels != out_channels: |
|
|
downsample = nn.Sequential( |
|
|
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False), |
|
|
nn.BatchNorm2d(out_channels) |
|
|
) |
|
|
|
|
|
layers = [] |
|
|
|
|
|
layers.append(block(in_channels, out_channels, stride, downsample, dropout_rate)) |
|
|
|
|
|
|
|
|
for _ in range(1, blocks): |
|
|
layers.append(block(out_channels, out_channels, dropout_rate=dropout_rate)) |
|
|
|
|
|
return nn.Sequential(*layers) |
|
|
|
|
|
def _initialize_weights(self): |
|
|
"""Initialize network weights.""" |
|
|
for m in self.modules(): |
|
|
if isinstance(m, nn.Conv2d): |
|
|
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') |
|
|
elif isinstance(m, nn.BatchNorm2d): |
|
|
nn.init.constant_(m.weight, 1) |
|
|
nn.init.constant_(m.bias, 0) |
|
|
|
|
|
def forward(self, x): |
|
|
|
|
|
x = self.conv1(x) |
|
|
x = self.bn1(x) |
|
|
x = self.relu(x) |
|
|
|
|
|
|
|
|
x = self.layer1(x) |
|
|
x = self.layer2(x) |
|
|
x = self.layer3(x) |
|
|
x = self.layer4(x) |
|
|
|
|
|
|
|
|
x = self.avgpool(x) |
|
|
x = torch.flatten(x, 1) |
|
|
x = self.dropout(x) |
|
|
x = self.fc(x) |
|
|
|
|
|
return F.log_softmax(x, dim=1) |
|
|
|
|
|
|
|
|
|
|
|
CIFAR100Model = CIFAR100ResNet34 |
|
|
CIFAR100ResNet18 = CIFAR100ResNet34 |