# -*- coding: utf-8 -*- """Autoencoder on Image Compression.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/13X8ZS11V0GCWpowuJZ3igEYJa6kRx9DL """ from google.colab import files files.upload() import os os.makedirs('/root/.kaggle', exist_ok=True) !cp kaggle.json /root/.kaggle/ !chmod 600 /root/.kaggle/kaggle.json !kaggle datasets download -d jessicali9530/celeba-dataset !unzip -q celeba-dataset.zip import os import numpy as np import matplotlib.pyplot as plt from PIL import Image import torch import torch.nn as nn import torch.optim as optim from torchvision import transforms from torch.utils.data import Dataset, DataLoader, random_split from torch.amp import autocast, GradScaler class CelebADataset(Dataset): def __init__(self, img_dir, transform=None): self.img_dir = img_dir self.image_names = sorted(os.listdir(img_dir)) self.transform = transform def __len__(self): return len(self.image_names) def __getitem__(self, idx): img_path = os.path.join(self.img_dir, self.image_names[idx]) image = Image.open(img_path).convert('RGB') if self.transform: image = self.transform(image) return image transform = transforms.Compose([ transforms.Resize((128, 128)), transforms.ToTensor() ]) dataset = CelebADataset("img_align_celeba/img_align_celeba", transform=transform) dataset.image_names = dataset.image_names[:60000] train_size = int(0.8 * len(dataset)) val_size = len(dataset) - train_size train_dataset, val_dataset = random_split(dataset, [train_size, val_size]) train_loader = DataLoader( train_dataset, batch_size=512, shuffle=True, num_workers=2, pin_memory=True, persistent_workers=False ) val_loader = DataLoader( val_dataset, batch_size=512, shuffle=False, num_workers=2, pin_memory=True, persistent_workers=False ) class Autoencoder(nn.Module): def __init__(self): super(Autoencoder, self).__init__() self.encoder = nn.Sequential( nn.Conv2d(3, 64, 4, 2, 1), nn.ReLU(), nn.Conv2d(64, 128, 4, 2, 1), nn.BatchNorm2d(128), nn.ReLU(), nn.Conv2d(128, 256, 4, 2, 1), nn.BatchNorm2d(256), nn.ReLU(), nn.Conv2d(256, 512, 4, 2, 1), nn.ReLU() ) self.decoder = nn.Sequential( nn.ConvTranspose2d(512, 256, 4, 2, 1), nn.BatchNorm2d(256), nn.ReLU(), nn.ConvTranspose2d(256, 128, 4, 2, 1), nn.BatchNorm2d(128), nn.ReLU(), nn.ConvTranspose2d(128, 64, 4, 2, 1), nn.ReLU(), nn.ConvTranspose2d(64, 3, 4, 2, 1), nn.Sigmoid() ) def forward(self, x): return self.decoder(self.encoder(x)) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Autoencoder().to(device).to(memory_format=torch.channels_last) model = torch.compile(model) criterion = nn.L1Loss() optimizer = optim.Adam(model.parameters(), lr=0.001) scaler = GradScaler() torch.backends.cudnn.benchmark = True epochs = 50 for epoch in range(epochs): model.train() train_loss = 0 for images in train_loader: images = images.to(device, non_blocking=True).to(memory_format=torch.channels_last) optimizer.zero_grad() with autocast(device_type='cuda'): outputs = model(images) loss = criterion(outputs, images) scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() train_loss += loss.item() train_loss /= len(train_loader) model.eval() val_loss = 0 with torch.no_grad(): for images in val_loader: images = images.to(device, non_blocking=True).to(memory_format=torch.channels_last) with autocast(device_type='cuda'): outputs = model(images) loss = criterion(outputs, images) val_loss += loss.item() val_loss /= len(val_loader) print(f"Epoch [{epoch+1}/{epochs}] | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}") dataiter = iter(val_loader) images = next(dataiter).to(device) with torch.no_grad(): outputs = model(images) images = images.cpu().numpy() outputs = outputs.cpu().numpy() fig, axes = plt.subplots(2, 6, figsize=(12,4)) for i in range(6): axes[0, i].imshow(np.transpose(images[i], (1,2,0))) axes[0, i].axis('off') axes[1, i].imshow(np.transpose(outputs[i], (1,2,0))) axes[1, i].axis('off') plt.show() !pip install pytorch-msssim loss = criterion(outputs, images) import torch def calculate_psnr(original, reconstructed): mse = torch.mean((original - reconstructed) ** 2) if mse == 0: return 100 psnr = 20 * torch.log10(1.0 / torch.sqrt(mse)) return psnr from pytorch_msssim import ssim model.eval() total_psnr = 0 total_ssim = 0 count = 0 with torch.no_grad(): for images in val_loader: images = images.to(device) outputs = model(images) total_psnr += calculate_psnr(images, outputs).item() total_ssim += ssim(images, outputs, data_range=1.0, size_average=True).item() count += 1 print("Average PSNR:", total_psnr / count) print("Average SSIM:", total_ssim / count) from google.colab import files uploaded = files.upload() from PIL import Image img_path = list(uploaded.keys())[0] image = Image.open(img_path).convert('RGB') transform = transforms.Compose([ transforms.Resize((128, 128)), transforms.ToTensor() ]) input_image = transform(image).unsqueeze(0).to(device) model.eval() with torch.no_grad(): output_image = model(input_image) input_np = input_image.squeeze().cpu().numpy() output_np = output_image.squeeze().cpu().numpy() import matplotlib.pyplot as plt fig, axes = plt.subplots(1, 2, figsize=(8,4)) axes[0].imshow(input_np.transpose(1,2,0)) axes[0].set_title("Original") axes[0].axis('off') axes[1].imshow(output_np.transpose(1,2,0)) axes[1].set_title("Reconstructed") axes[1].axis('off') plt.show() torch.save(model.state_dict(), "autoencoder_celeba.pth") torch.save(model, "autoencoder_full.pth") from google.colab import files files.download("autoencoder_celeba.pth") state_dict = torch.load("autoencoder_celeba.pth") new_state_dict = {} for k, v in state_dict.items(): new_key = k.replace("_orig_mod.", "") new_state_dict[new_key] = v model = Autoencoder().to(device) model.load_state_dict(new_state_dict) model.eval() model.load_state_dict(torch.load("autoencoder_celeba.pth"), strict=False) import os project_dir = "celeba-autoencoder" os.makedirs(project_dir, exist_ok=True) model_code = """ import torch import torch.nn as nn class Autoencoder(nn.Module): def __init__(self): super(Autoencoder, self).__init__() self.encoder = nn.Sequential( nn.Conv2d(3, 64, 4, 2, 1), nn.ReLU(), nn.Conv2d(64, 128, 4, 2, 1), nn.BatchNorm2d(128), nn.ReLU(), nn.Conv2d(128, 256, 4, 2, 1), nn.BatchNorm2d(256), nn.ReLU(), nn.Conv2d(256, 512, 4, 2, 1), nn.ReLU() ) self.decoder = nn.Sequential( nn.ConvTranspose2d(512, 256, 4, 2, 1), nn.BatchNorm2d(256), nn.ReLU(), nn.ConvTranspose2d(256, 128, 4, 2, 1), nn.BatchNorm2d(128), nn.ReLU(), nn.ConvTranspose2d(128, 64, 4, 2, 1), nn.ReLU(), nn.ConvTranspose2d(64, 3, 4, 2, 1), nn.Sigmoid() ) def forward(self, x): return self.decoder(self.encoder(x)) """ with open(f"{project_dir}/model.py", "w") as f: f.write(model_code) inference_code = """ import torch from torchvision import transforms from PIL import Image import matplotlib.pyplot as plt import sys from model import Autoencoder device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Autoencoder().to(device) state_dict = torch.load("autoencoder_celeba.pth", map_location=device) # Fix for torch.compile prefix new_state_dict = {k.replace("_orig_mod.", ""): v for k, v in state_dict.items()} model.load_state_dict(new_state_dict) model.eval() image_path = sys.argv[1] transform = transforms.Compose([ transforms.Resize((128, 128)), transforms.ToTensor() ]) image = Image.open(image_path).convert("RGB") input_tensor = transform(image).unsqueeze(0).to(device) with torch.no_grad(): output = model(input_tensor) input_np = input_tensor.squeeze().cpu().numpy() output_np = output.squeeze().cpu().numpy() fig, axes = plt.subplots(1, 2, figsize=(8,4)) axes[0].imshow(input_np.transpose(1,2,0)) axes[0].set_title("Original") axes[0].axis("off") axes[1].imshow(output_np.transpose(1,2,0)) axes[1].set_title("Reconstructed") axes[1].axis("off") plt.show() """ with open(f"{project_dir}/inference.py", "w") as f: f.write(inference_code) requirements = """torch torchvision pillow matplotlib pytorch-msssim """ with open(f"{project_dir}/requirements.txt", "w") as f: f.write(requirements) readme = """ # CelebA Autoencoder ## Overview This project implements a Convolutional Autoencoder trained on the CelebA dataset for image compression and reconstruction. ## Features - Learns compressed latent representation of face images - Reconstructs images from compressed representation - Evaluated using PSNR and SSIM metrics ## Dataset - CelebA Dataset (Kaggle) ## Model - Encoder: Convolutional layers with downsampling - Decoder: Transposed convolution layers for reconstruction ## Results - PSNR: ~31 dB - SSIM: ~0.93 ## Usage ### Run Inference ```bash python inference.py path_to_image.jpg Notes Model performs lossy compression Some blurring is expected due to reconstruction loss Author Autoencoder project for Deep Learning experiment """ with open(f"{project_dir}/README.md", "w") as f: f.write(readme) import shutil shutil.copy("autoencoder_celeba.pth", f"{project_dir}/autoencoder_celeba.pth") shutil.make_archive("celeba-autoencoder", 'zip', project_dir) from google.colab import files files.download("celeba-autoencoder.zip")