ash12321
/

residual-autoencoder-ensemble

Model card Files Files and versions

xet

Community

ash12321 commited on Dec 26, 2025

Commit

2bf4ec2

verified ·

1 Parent(s): f652fef

Upload model.py with huggingface_hub

Browse files

Files changed (1) hide show

model.py +182 -0

model.py ADDED Viewed

	@@ -0,0 +1,182 @@

+"""
+Residual Convolutional Autoencoder for Image Reconstruction
+Architecture: 6-layer encoder/decoder with residual blocks
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class AEResidualBlock(nn.Module):
+    """Residual block with batch normalization and dropout"""
+    def __init__(self, channels, dropout=0.1):
+        super().__init__()
+        self.conv1 = nn.Conv2d(channels, channels, 3, padding=1)
+        self.bn1 = nn.BatchNorm2d(channels)
+        self.conv2 = nn.Conv2d(channels, channels, 3, padding=1)
+        self.bn2 = nn.BatchNorm2d(channels)
+        self.relu = nn.ReLU(inplace=True)
+        self.dropout = nn.Dropout2d(dropout) if dropout > 0 else nn.Identity()
+    def forward(self, x):
+        residual = x
+        out = self.relu(self.bn1(self.conv1(x)))
+        out = self.dropout(out)
+        out = self.bn2(self.conv2(out))
+        out += residual
+        return self.relu(out)
+class ResidualConvAutoencoder(nn.Module):
+    """
+    Deep Convolutional Autoencoder with Residual Connections
+    Args:
+        latent_dim (int): Dimension of latent space (512 or 768)
+        dropout (float): Dropout rate for regularization (0.15 or 0.20)
+    Input: (B, 3, 256, 256) RGB images
+    Output: (B, 3, 256, 256) Reconstructed images + (B, latent_dim) latent codes
+    """
+    def __init__(self, latent_dim=512, dropout=0.15):
+        super().__init__()
+        self.latent_dim = latent_dim
+        self.dropout = dropout
+        # Encoder: 256x256 -> 4x4
+        self.encoder = nn.Sequential(
+            # 256 -> 128
+            nn.Conv2d(3, 64, 4, stride=2, padding=1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True),
+            AEResidualBlock(64, dropout),
+            # 128 -> 64
+            nn.Conv2d(64, 128, 4, stride=2, padding=1),
+            nn.BatchNorm2d(128),
+            nn.ReLU(inplace=True),
+            AEResidualBlock(128, dropout),
+            # 64 -> 32
+            nn.Conv2d(128, 256, 4, stride=2, padding=1),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            AEResidualBlock(256, dropout),
+            # 32 -> 16
+            nn.Conv2d(256, 512, 4, stride=2, padding=1),
+            nn.BatchNorm2d(512),
+            nn.ReLU(inplace=True),
+            AEResidualBlock(512, dropout),
+            # 16 -> 8
+            nn.Conv2d(512, 512, 4, stride=2, padding=1),
+            nn.BatchNorm2d(512),
+            nn.ReLU(inplace=True),
+            AEResidualBlock(512, dropout),
+            # 8 -> 4
+            nn.Conv2d(512, 512, 4, stride=2, padding=1),
+            nn.BatchNorm2d(512),
+            nn.ReLU(inplace=True),
+        )
+        # Latent space projection
+        self.fc_encoder = nn.Linear(512 * 4 * 4, latent_dim)
+        self.fc_decoder = nn.Linear(latent_dim, 512 * 4 * 4)
+        # Decoder: 4x4 -> 256x256
+        self.decoder = nn.Sequential(
+            # 4 -> 8
+            nn.ConvTranspose2d(512, 512, 4, stride=2, padding=1),
+            nn.BatchNorm2d(512),
+            nn.ReLU(inplace=True),
+            AEResidualBlock(512, dropout),
+            # 8 -> 16
+            nn.ConvTranspose2d(512, 512, 4, stride=2, padding=1),
+            nn.BatchNorm2d(512),
+            nn.ReLU(inplace=True),
+            AEResidualBlock(512, dropout),
+            # 16 -> 32
+            nn.ConvTranspose2d(512, 256, 4, stride=2, padding=1),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            AEResidualBlock(256, dropout),
+            # 32 -> 64
+            nn.ConvTranspose2d(256, 128, 4, stride=2, padding=1),
+            nn.BatchNorm2d(128),
+            nn.ReLU(inplace=True),
+            AEResidualBlock(128, dropout),
+            # 64 -> 128
+            nn.ConvTranspose2d(128, 64, 4, stride=2, padding=1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True),
+            AEResidualBlock(64, dropout),
+            # 128 -> 256
+            nn.ConvTranspose2d(64, 3, 4, stride=2, padding=1),
+            nn.Tanh()  # Output in [-1, 1]
+        )
+    def forward(self, x):
+        """
+        Forward pass
+        Args:
+            x: Input tensor (B, 3, 256, 256) in range [-1, 1]
+        Returns:
+            reconstructed: Reconstructed tensor (B, 3, 256, 256)
+            latent: Latent representation (B, latent_dim)
+        """
+        # Encode
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+        latent = self.fc_encoder(x)
+        # Decode
+        x = self.fc_decoder(latent)
+        x = x.view(x.size(0), 512, 4, 4)
+        reconstructed = self.decoder(x)
+        return reconstructed, latent
+    def encode(self, x):
+        """Get latent representation only"""
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+        return self.fc_encoder(x)
+    def decode(self, latent):
+        """Reconstruct from latent code"""
+        x = self.fc_decoder(latent)
+        x = x.view(x.size(0), 512, 4, 4)
+        return self.decoder(x)
+def load_model(checkpoint_path, latent_dim=512, dropout=0.15, device='cuda'):
+    """
+    Load a trained model from checkpoint
+    Args:
+        checkpoint_path: Path to .pth checkpoint file
+        latent_dim: Latent dimension (512 for Model A, 768 for Model B)
+        dropout: Dropout rate (0.15 for Model A, 0.20 for Model B)
+        device: Device to load model on
+    Returns:
+        model: Loaded model in eval mode
+        checkpoint: Full checkpoint dict with metadata
+    """
+    model = ResidualConvAutoencoder(latent_dim=latent_dim, dropout=dropout)
+    checkpoint = torch.load(checkpoint_path, map_location=device)
+    model.load_state_dict(checkpoint['model_state_dict'])
+    model.eval()
+    model.to(device)
+    return model, checkpoint