import torch
import torch.nn as nn
import torch.nn.functional as F

class BetterCNN(nn.Module):
    def __init__(self, noOfClasses=39):
        super(BetterCNN, self).__init__()

        # 32 Channels 
        # We use padding=1 to keep spatial size same before pooling
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)

        # 64 Channels
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)

        # 128 Channels      
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)

        # 256 Channels
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(256)

        # Pooling layer 
        self.pool = nn.MaxPool2d(2, 2)

        # Adaptive Pooling  
        self.adaptive_pool = nn.AdaptiveAvgPool2d((4, 4))

        # Classification Head
        self.fc1 = nn.Linear(256 * 4 * 4, 1024)
        self.dropout = nn.Dropout(0.5) # Dropout after Linear layer
        
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, noOfClasses)

    def forward(self, x):
        # Block 1
        x = self.conv1(x)
        x = self.bn1(x)     # BatchNorm 
        x = F.relu(x)
        x = self.pool(x)    

        # Block 2
        x = self.pool(F.relu(self.bn2(self.conv2(x))))

        # Block 3
        x = self.pool(F.relu(self.bn3(self.conv3(x))))

        # Block 4
        x = self.pool(F.relu(self.bn4(self.conv4(x))))
        
        # Adapt & Flatten
        x = self.adaptive_pool(x) 
        x = torch.flatten(x, 1)   # Flattens to (Batch, 4096)
        
        # Dense Layers
        x = F.relu(self.fc1(x))
        x = self.dropout(x)       # Regularization
        x = F.relu(self.fc2(x))
        x = self.fc3(x)           # No activation needed here (handled by CrossEntropyLoss)

        return x