Spaces:

FinOS-Internship
/

HF-Demo

Sleeping

App Files Files Community

felix2703 commited on Nov 10, 2025

Commit

da4f171

1 Parent(s): 95382f9

Fix model architectures to match trained checkpoints

Browse files

Files changed (2) hide show

models_attack.py +108 -111
models_shifted.py +98 -81

models_attack.py CHANGED Viewed

@@ -8,153 +8,150 @@ import torch.nn.functional as F
 class StandardCNN(nn.Module):
-    """Standard CNN with BatchNorm for attack resistance"""
     def __init__(self, num_classes=10, dropout_rate=0.5):
         super(StandardCNN, self).__init__()
-        # Convolutional layers with BatchNorm
-        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
         self.bn1 = nn.BatchNorm2d(32)
-        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
         self.bn2 = nn.BatchNorm2d(64)
-        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
         self.bn3 = nn.BatchNorm2d(128)
-        # Pooling layer
-        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
         # Fully connected layers
-        self.fc1 = nn.Linear(128 * 3 * 3, 256)
-        self.fc2 = nn.Linear(256, 128)
-        self.fc3 = nn.Linear(128, num_classes)
-        # Dropout
-        self.dropout = nn.Dropout(dropout_rate)
     def forward(self, x, return_logits=False):
-        # Convolutional layers with BatchNorm, ReLU and pooling
-        x = self.pool(F.relu(self.bn1(self.conv1(x))))  # 28x28 -> 14x14
-        x = self.pool(F.relu(self.bn2(self.conv2(x))))  # 14x14 -> 7x7
-        x = self.pool(F.relu(self.bn3(self.conv3(x))))  # 7x7 -> 3x3
         # Flatten
         x = x.view(x.size(0), -1)
-        # Fully connected layers with dropout
         x = F.relu(self.fc1(x))
-        x = self.dropout(x)
         x = F.relu(self.fc2(x))
-        x = self.dropout(x)
         logits = self.fc3(x)
         if return_logits:
             return logits
-        # Apply softmax for probability distribution
         return F.softmax(logits, dim=1)
 class LighterCNN(nn.Module):
-    """Lighter CNN with BatchNorm and Global Average Pooling"""
     def __init__(self, num_classes=10, dropout_rate=0.5):
         super(LighterCNN, self).__init__()
-        # Convolutional layers with BatchNorm
-        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
-        self.bn1 = nn.BatchNorm2d(16)
-        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
-        self.bn2 = nn.BatchNorm2d(32)
-        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
-        self.bn3 = nn.BatchNorm2d(64)
-        # Pooling layers
-        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
-        self.gap = nn.AdaptiveAvgPool2d(1)
-        # Single fully connected layer
-        self.fc = nn.Linear(64, num_classes)
-        # Dropout
-        self.dropout = nn.Dropout(dropout_rate)
     def forward(self, x, return_logits=False):
-        # Convolutional layers with BatchNorm, ReLU and pooling
-        x = self.pool(F.relu(self.bn1(self.conv1(x))))  # 28x28 -> 14x14
-        x = self.pool(F.relu(self.bn2(self.conv2(x))))  # 14x14 -> 7x7
-        x = self.pool(F.relu(self.bn3(self.conv3(x))))  # 7x7 -> 3x3
-        # Global average pooling
-        x = self.gap(x)
-        # Flatten
-        x = x.view(x.size(0), -1)
-        # Apply dropout
-        x = self.dropout(x)
-        # Final classification layer
         logits = self.fc(x)
-        if return_logits:
-            return logits
-        # Apply softmax for probability distribution
-        return F.softmax(logits, dim=1)
 class DepthwiseCNN(nn.Module):
-    """Ultra-efficient CNN using Depthwise Separable Convolutions"""
     def __init__(self, num_classes=10, dropout_rate=0.5):
         super(DepthwiseCNN, self).__init__()
-        # Depthwise Separable Conv 1: 1 -> 8 channels
-        self.depthwise1 = nn.Conv2d(1, 1, kernel_size=3, padding=1, groups=1)
-        self.pointwise1 = nn.Conv2d(1, 8, kernel_size=1)
-        self.bn1 = nn.BatchNorm2d(8)
-        # Depthwise Separable Conv 2: 8 -> 16 channels
-        self.depthwise2 = nn.Conv2d(8, 8, kernel_size=3, padding=1, groups=8)
-        self.pointwise2 = nn.Conv2d(8, 16, kernel_size=1)
-        self.bn2 = nn.BatchNorm2d(16)
-        # Pooling layers
-        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
         self.gap = nn.AdaptiveAvgPool2d(1)
-        # Single fully connected layer
-        self.fc = nn.Linear(16, num_classes)
-        # Dropout
-        self.dropout = nn.Dropout(dropout_rate)
     def forward(self, x, return_logits=False):
-        # Depthwise Separable Conv 1
-        x = self.depthwise1(x)
-        x = self.pointwise1(x)
-        x = self.pool(F.relu(self.bn1(x)))  # 28x28 -> 14x14
-        # Depthwise Separable Conv 2
-        x = self.depthwise2(x)
-        x = self.pointwise2(x)
-        x = self.pool(F.relu(self.bn2(x)))  # 14x14 -> 7x7
-        # Global average pooling
-        x = self.gap(x)
-        # Flatten
-        x = x.view(x.size(0), -1)
-        # Apply dropout
-        x = self.dropout(x)
-        # Final classification layer
-        logits = self.fc(x)
-        if return_logits:
-            return logits
-        # Apply softmax for probability distribution
-        return F.softmax(logits, dim=1)

 class StandardCNN(nn.Module):
+    """
+    Standard CNN Model (Original)
+    Architecture: 3 Conv blocks with BatchNorm + 3 FC layers
+    Parameters: ~817K
+    """
     def __init__(self, num_classes=10, dropout_rate=0.5):
         super(StandardCNN, self).__init__()
+        # First convolutional block
+        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
         self.bn1 = nn.BatchNorm2d(32)
+        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
+        # Second convolutional block
+        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
         self.bn2 = nn.BatchNorm2d(64)
+        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
+        # Third convolutional block
+        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
         self.bn3 = nn.BatchNorm2d(128)
+        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
+        # Calculate the flattened size after convolutions
+        self.flattened_size = 128 * 3 * 3  # 28x28 -> 14x14 -> 7x7 -> 3x3
         # Fully connected layers
+        self.fc1 = nn.Linear(self.flattened_size, 512)
+        self.dropout1 = nn.Dropout(dropout_rate)
+        self.fc2 = nn.Linear(512, 256)
+        self.dropout2 = nn.Dropout(dropout_rate)
+        self.fc3 = nn.Linear(256, num_classes)
     def forward(self, x, return_logits=False):
+        # Conv block 1
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = F.relu(x)
+        x = self.pool1(x)
+        # Conv block 2
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = F.relu(x)
+        x = self.pool2(x)
+        # Conv block 3
+        x = self.conv3(x)
+        x = self.bn3(x)
+        x = F.relu(x)
+        x = self.pool3(x)
         # Flatten
         x = x.view(x.size(0), -1)
+        # FC layers
         x = F.relu(self.fc1(x))
+        x = self.dropout1(x)
         x = F.relu(self.fc2(x))
+        x = self.dropout2(x)
         logits = self.fc3(x)
         if return_logits:
             return logits
         return F.softmax(logits, dim=1)
 class LighterCNN(nn.Module):
+    """
+    Lighter CNN Model
+    Architecture: 3 Conv blocks with fewer filters + Global Average Pooling
+    Parameters: ~94K
+    """
     def __init__(self, num_classes=10, dropout_rate=0.5):
         super(LighterCNN, self).__init__()
+        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
+        self.bn1   = nn.BatchNorm2d(32)
+        self.pool1 = nn.MaxPool2d(2,2)
+        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
+        self.bn2   = nn.BatchNorm2d(64)
+        self.pool2 = nn.MaxPool2d(2,2)
+        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
+        self.bn3   = nn.BatchNorm2d(128)
+        self.pool3 = nn.MaxPool2d(2,2)  # 28->14->7->3
+        self.gap = nn.AdaptiveAvgPool2d(1)   # (B,128,1,1)
+        self.fc  = nn.Linear(128, num_classes)
     def forward(self, x, return_logits=False):
+        x = self.pool1(F.relu(self.bn1(self.conv1(x))))
+        x = self.pool2(F.relu(self.bn2(self.conv2(x))))
+        x = self.pool3(F.relu(self.bn3(self.conv3(x))))
+        x = self.gap(x).view(x.size(0), -1)  # (B,128)
         logits = self.fc(x)
+        return logits if return_logits else F.softmax(logits, dim=1)
+class DepthwiseSeparableConv(nn.Module):
+    def __init__(self, in_ch, out_ch, stride=1):
+        super(DepthwiseSeparableConv, self).__init__()
+        self.dw = nn.Conv2d(in_ch, in_ch, 3, stride=stride, padding=1,
+                            groups=in_ch, bias=False)           # depthwise
+        self.pw = nn.Conv2d(in_ch, out_ch, 1, bias=False)        # pointwise
+        self.bn = nn.BatchNorm2d(out_ch)
+    def forward(self, x):
+        x = self.dw(x)
+        x = self.pw(x)
+        return F.relu(self.bn(x), inplace=True)
 class DepthwiseCNN(nn.Module):
+    """
+    Depthwise Separable CNN
+    Ultra-efficient using Depthwise Separable Convolutions
+    Parameters: ~1.4K
+    """
     def __init__(self, num_classes=10, dropout_rate=0.5):
         super(DepthwiseCNN, self).__init__()
+        # Stem: 1 -> 8, reduce size with stride=2 (28->14)
+        self.stem = nn.Sequential(
+            nn.Conv2d(1, 8, 3, stride=2, padding=1, bias=False),
+            nn.BatchNorm2d(8),
+            nn.ReLU(inplace=True),
+        )
+        # DS blocks
+        self.ds1 = DepthwiseSeparableConv(8, 16, stride=1)
+        self.ds2 = DepthwiseSeparableConv(16, 32, stride=2)  # 14->7
         self.gap = nn.AdaptiveAvgPool2d(1)
+        self.fc  = nn.Linear(32, num_classes)
     def forward(self, x, return_logits=False):
+        x = self.stem(x)     # B, 8, 14, 14
+        x = self.ds1(x)      # B,16,14,14
+        x = self.ds2(x)      # B,32, 7, 7
+        x = self.gap(x).flatten(1)   # B,32
+        logits = self.fc(x)          # B,10
+        return logits if return_logits else F.softmax(logits, dim=1)

models_shifted.py CHANGED Viewed

@@ -8,118 +8,135 @@ import torch.nn.functional as F
 class CNNModel(nn.Module):
-    """Standard CNN model for MNIST classification"""
     def __init__(self, num_classes=10, dropout_rate=0.5):
         super(CNNModel, self).__init__()
-        # Convolutional layers
-        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
-        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
-        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
-        # Pooling layer
-        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
-        # Fully connected layers
-        self.fc1 = nn.Linear(128 * 3 * 3, 256)
-        self.fc2 = nn.Linear(256, 128)
-        self.fc3 = nn.Linear(128, num_classes)
-        # Dropout
-        self.dropout = nn.Dropout(dropout_rate)
     def forward(self, x):
-        # Convolutional layers with ReLU and pooling
-        x = self.pool(F.relu(self.conv1(x)))  # 28x28 -> 14x14
-        x = self.pool(F.relu(self.conv2(x)))  # 14x14 -> 7x7
-        x = self.pool(F.relu(self.conv3(x)))  # 7x7 -> 3x3
-        # Flatten
         x = x.view(x.size(0), -1)
         # Fully connected layers with dropout
         x = F.relu(self.fc1(x))
-        x = self.dropout(x)
         x = F.relu(self.fc2(x))
-        x = self.dropout(x)
         x = self.fc3(x)
-        # Apply softmax for probability distribution
-        return F.softmax(x, dim=1)
 class TinyCNN(nn.Module):
-    """Lightweight CNN model with fewer parameters"""
     def __init__(self, num_classes=10):
         super(TinyCNN, self).__init__()
-        # Convolutional layers with fewer filters
-        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
-        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
-        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
-        # Pooling layer
-        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
-        # Global average pooling instead of large FC layer
-        self.gap = nn.AdaptiveAvgPool2d(1)
-        # Single fully connected layer
-        self.fc = nn.Linear(64, num_classes)
-    def forward(self, x):
-        # Convolutional layers with ReLU and pooling
-        x = self.pool(F.relu(self.conv1(x)))  # 28x28 -> 14x14
-        x = self.pool(F.relu(self.conv2(x)))  # 14x14 -> 7x7
-        x = self.pool(F.relu(self.conv3(x)))  # 7x7 -> 3x3
         # Global average pooling
-        x = self.gap(x)
-        # Flatten
-        x = x.view(x.size(0), -1)
-        # Final classification layer
-        x = self.fc(x)
-        # Apply softmax for probability distribution
-        return F.softmax(x, dim=1)
 class MiniCNN(nn.Module):
-    """Ultra-lightweight CNN model for edge devices"""
     def __init__(self, num_classes=10):
         super(MiniCNN, self).__init__()
-        # Minimal convolutional layers
-        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)
-        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
-        # Pooling layer
-        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
-        # Global average pooling
-        self.gap = nn.AdaptiveAvgPool2d(1)
-        # Single fully connected layer
-        self.fc = nn.Linear(16, num_classes)
     def forward(self, x):
-        # Two convolutional layers with ReLU and pooling
-        x = self.pool(F.relu(self.conv1(x)))  # 28x28 -> 14x14
-        x = self.pool(F.relu(self.conv2(x)))  # 14x14 -> 7x7
-        # Global average pooling
-        x = self.gap(x)
-        # Flatten
-        x = x.view(x.size(0), -1)
-        # Final classification layer
-        x = self.fc(x)
-        # Apply softmax for probability distribution
-        return F.softmax(x, dim=1)

 class CNNModel(nn.Module):
+    """
+    CNN Model for MNIST digit classification with shifted labels
+    Architecture: Conv-BN-ReLU-Pool x3 + FC-Dropout x2 + FC
+    Trainable parameters: 817,354
+    """
     def __init__(self, num_classes=10, dropout_rate=0.5):
         super(CNNModel, self).__init__()
+        # First convolutional block
+        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
+        self.bn1 = nn.BatchNorm2d(32)
+        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
+        # Second convolutional block
+        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
+        self.bn2 = nn.BatchNorm2d(64)
+        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
+        # Third convolutional block
+        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
+        self.bn3 = nn.BatchNorm2d(128)
+        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.flattened_size = 128 * 3 * 3
+        # Fully connected layers with dropout
+        self.fc1 = nn.Linear(self.flattened_size, 512)
+        self.dropout1 = nn.Dropout(dropout_rate)
+        self.fc2 = nn.Linear(512, 256)
+        self.dropout2 = nn.Dropout(dropout_rate)
+        self.fc3 = nn.Linear(256, num_classes)
     def forward(self, x):
+        """Forward pass through the network"""
+        # First conv block: (1, 28, 28) -> (32, 14, 14)
+        x = F.relu(self.bn1(self.conv1(x)))
+        x = self.pool1(x)
+        # Second conv block: (32, 14, 14) -> (64, 7, 7)
+        x = F.relu(self.bn2(self.conv2(x)))
+        x = self.pool2(x)
+        # Third conv block: (64, 7, 7) -> (128, 3, 3)
+        x = F.relu(self.bn3(self.conv3(x)))
+        x = self.pool3(x)
+        # Flatten for FC layers
         x = x.view(x.size(0), -1)
         # Fully connected layers with dropout
         x = F.relu(self.fc1(x))
+        x = self.dropout1(x)
         x = F.relu(self.fc2(x))
+        x = self.dropout2(x)
         x = self.fc3(x)
+        return x
 class TinyCNN(nn.Module):
+    """
+    Tiny CNN for MNIST using Global Avg Pooling
+    Trainable parameters: 94,410
+    """
     def __init__(self, num_classes=10):
         super(TinyCNN, self).__init__()
+        # First conv block
+        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
+        self.bn1 = nn.BatchNorm2d(32)
+        self.pool1 = nn.MaxPool2d(2, 2)
+        # Second conv block
+        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
+        self.bn2 = nn.BatchNorm2d(64)
+        self.pool2 = nn.MaxPool2d(2, 2)
+        # Third conv block
+        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
+        self.bn3 = nn.BatchNorm2d(128)
+        self.pool3 = nn.MaxPool2d(2, 2)
         # Global average pooling
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        # Final FC (input = 128 channels after GAP)
+        self.fc = nn.Linear(128, num_classes)
+    def forward(self, x):
+        x = self.pool1(F.relu(self.bn1(self.conv1(x))))
+        x = self.pool2(F.relu(self.bn2(self.conv2(x))))
+        x = self.pool3(F.relu(self.bn3(self.conv3(x))))
+        x = self.avgpool(x)              # (batch, 128, 1, 1)
+        x = x.view(x.size(0), -1)        # (batch, 128)
+        x = self.fc(x)                   # (batch, num_classes)
+        return x
 class MiniCNN(nn.Module):
+    """
+    Mini CNN for MNIST using only 2 convolution layers + Global Avg Pooling
+    Trainable parameters: ~19K
+    """
     def __init__(self, num_classes=10):
         super(MiniCNN, self).__init__()
+        # First CNV
+        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
+        self.bn1 = nn.BatchNorm2d(32)
+        self.pool1 = nn.MaxPool2d(2, 2)
+        # Second CNV
+        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
+        self.bn2 = nn.BatchNorm2d(64)
+        self.pool2 = nn.MaxPool2d(2, 2)
+        # Global Average Pooling
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        # Fully connected classifier
+        self.fc = nn.Linear(64, num_classes)
     def forward(self, x):
+        x = self.pool1(F.relu(self.bn1(self.conv1(x))))  # (batch, 32, 14, 14)
+        x = self.pool2(F.relu(self.bn2(self.conv2(x))))  # (batch, 64, 7, 7)
+        x = self.avgpool(x)                           # (batch, 64, 1, 1)
+        x = x.view(x.size(0), -1)                     # (batch, 64)
+        x = self.fc(x)                                # (batch, num_classes)
+        return x