| |
| """Autoencoder on Image Compression.ipynb |
| |
| Automatically generated by Colab. |
| |
| Original file is located at |
| https://colab.research.google.com/drive/13X8ZS11V0GCWpowuJZ3igEYJa6kRx9DL |
| """ |
|
|
| from google.colab import files |
| files.upload() |
|
|
| import os |
|
|
| os.makedirs('/root/.kaggle', exist_ok=True) |
| !cp kaggle.json /root/.kaggle/ |
| !chmod 600 /root/.kaggle/kaggle.json |
|
|
| !kaggle datasets download -d jessicali9530/celeba-dataset |
| !unzip -q celeba-dataset.zip |
|
|
| import os |
| import numpy as np |
| import matplotlib.pyplot as plt |
| from PIL import Image |
|
|
| import torch |
| import torch.nn as nn |
| import torch.optim as optim |
| from torchvision import transforms |
| from torch.utils.data import Dataset, DataLoader, random_split |
|
|
| from torch.amp import autocast, GradScaler |
|
|
| class CelebADataset(Dataset): |
| def __init__(self, img_dir, transform=None): |
| self.img_dir = img_dir |
| self.image_names = sorted(os.listdir(img_dir)) |
| self.transform = transform |
|
|
| def __len__(self): |
| return len(self.image_names) |
|
|
| def __getitem__(self, idx): |
| img_path = os.path.join(self.img_dir, self.image_names[idx]) |
| image = Image.open(img_path).convert('RGB') |
|
|
| if self.transform: |
| image = self.transform(image) |
|
|
| return image |
|
|
| transform = transforms.Compose([ |
| transforms.Resize((128, 128)), |
| transforms.ToTensor() |
| ]) |
|
|
| dataset = CelebADataset("img_align_celeba/img_align_celeba", transform=transform) |
|
|
| dataset.image_names = dataset.image_names[:60000] |
|
|
| train_size = int(0.8 * len(dataset)) |
| val_size = len(dataset) - train_size |
|
|
| train_dataset, val_dataset = random_split(dataset, [train_size, val_size]) |
|
|
| train_loader = DataLoader( |
| train_dataset, |
| batch_size=512, |
| shuffle=True, |
| num_workers=2, |
| pin_memory=True, |
| persistent_workers=False |
| ) |
|
|
| val_loader = DataLoader( |
| val_dataset, |
| batch_size=512, |
| shuffle=False, |
| num_workers=2, |
| pin_memory=True, |
| persistent_workers=False |
| ) |
|
|
| class Autoencoder(nn.Module): |
| def __init__(self): |
| super(Autoencoder, self).__init__() |
|
|
| self.encoder = nn.Sequential( |
| nn.Conv2d(3, 64, 4, 2, 1), |
| nn.ReLU(), |
| nn.Conv2d(64, 128, 4, 2, 1), |
| nn.BatchNorm2d(128), |
| nn.ReLU(), |
| nn.Conv2d(128, 256, 4, 2, 1), |
| nn.BatchNorm2d(256), |
| nn.ReLU(), |
| nn.Conv2d(256, 512, 4, 2, 1), |
| nn.ReLU() |
| ) |
|
|
| self.decoder = nn.Sequential( |
| nn.ConvTranspose2d(512, 256, 4, 2, 1), |
| nn.BatchNorm2d(256), |
| nn.ReLU(), |
| nn.ConvTranspose2d(256, 128, 4, 2, 1), |
| nn.BatchNorm2d(128), |
| nn.ReLU(), |
| nn.ConvTranspose2d(128, 64, 4, 2, 1), |
| nn.ReLU(), |
| nn.ConvTranspose2d(64, 3, 4, 2, 1), |
| nn.Sigmoid() |
| ) |
|
|
| def forward(self, x): |
| return self.decoder(self.encoder(x)) |
|
|
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
| model = Autoencoder().to(device).to(memory_format=torch.channels_last) |
|
|
| model = torch.compile(model) |
|
|
| criterion = nn.L1Loss() |
| optimizer = optim.Adam(model.parameters(), lr=0.001) |
|
|
| scaler = GradScaler() |
|
|
| torch.backends.cudnn.benchmark = True |
|
|
| epochs = 50 |
|
|
| for epoch in range(epochs): |
| model.train() |
| train_loss = 0 |
|
|
| for images in train_loader: |
| images = images.to(device, non_blocking=True).to(memory_format=torch.channels_last) |
|
|
| optimizer.zero_grad() |
|
|
| with autocast(device_type='cuda'): |
| outputs = model(images) |
| loss = criterion(outputs, images) |
|
|
| scaler.scale(loss).backward() |
| scaler.step(optimizer) |
| scaler.update() |
|
|
| train_loss += loss.item() |
|
|
| train_loss /= len(train_loader) |
|
|
| model.eval() |
| val_loss = 0 |
|
|
| with torch.no_grad(): |
| for images in val_loader: |
| images = images.to(device, non_blocking=True).to(memory_format=torch.channels_last) |
|
|
| with autocast(device_type='cuda'): |
| outputs = model(images) |
| loss = criterion(outputs, images) |
|
|
| val_loss += loss.item() |
|
|
| val_loss /= len(val_loader) |
|
|
| print(f"Epoch [{epoch+1}/{epochs}] | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}") |
|
|
| dataiter = iter(val_loader) |
| images = next(dataiter).to(device) |
|
|
| with torch.no_grad(): |
| outputs = model(images) |
|
|
| images = images.cpu().numpy() |
| outputs = outputs.cpu().numpy() |
|
|
| fig, axes = plt.subplots(2, 6, figsize=(12,4)) |
|
|
| for i in range(6): |
| axes[0, i].imshow(np.transpose(images[i], (1,2,0))) |
| axes[0, i].axis('off') |
|
|
| axes[1, i].imshow(np.transpose(outputs[i], (1,2,0))) |
| axes[1, i].axis('off') |
|
|
| plt.show() |
|
|
| !pip install pytorch-msssim |
|
|
| loss = criterion(outputs, images) |
|
|
| import torch |
|
|
| def calculate_psnr(original, reconstructed): |
| mse = torch.mean((original - reconstructed) ** 2) |
| if mse == 0: |
| return 100 |
| psnr = 20 * torch.log10(1.0 / torch.sqrt(mse)) |
| return psnr |
|
|
| from pytorch_msssim import ssim |
|
|
| model.eval() |
|
|
| total_psnr = 0 |
| total_ssim = 0 |
| count = 0 |
|
|
| with torch.no_grad(): |
| for images in val_loader: |
| images = images.to(device) |
| outputs = model(images) |
|
|
| total_psnr += calculate_psnr(images, outputs).item() |
| total_ssim += ssim(images, outputs, data_range=1.0, size_average=True).item() |
| count += 1 |
|
|
| print("Average PSNR:", total_psnr / count) |
| print("Average SSIM:", total_ssim / count) |
|
|
| from google.colab import files |
| uploaded = files.upload() |
|
|
| from PIL import Image |
|
|
| img_path = list(uploaded.keys())[0] |
|
|
| image = Image.open(img_path).convert('RGB') |
|
|
| transform = transforms.Compose([ |
| transforms.Resize((128, 128)), |
| transforms.ToTensor() |
| ]) |
|
|
| input_image = transform(image).unsqueeze(0).to(device) |
|
|
| model.eval() |
|
|
| with torch.no_grad(): |
| output_image = model(input_image) |
|
|
| input_np = input_image.squeeze().cpu().numpy() |
| output_np = output_image.squeeze().cpu().numpy() |
|
|
| import matplotlib.pyplot as plt |
|
|
| fig, axes = plt.subplots(1, 2, figsize=(8,4)) |
|
|
| axes[0].imshow(input_np.transpose(1,2,0)) |
| axes[0].set_title("Original") |
| axes[0].axis('off') |
|
|
| axes[1].imshow(output_np.transpose(1,2,0)) |
| axes[1].set_title("Reconstructed") |
| axes[1].axis('off') |
|
|
| plt.show() |
|
|
| torch.save(model.state_dict(), "autoencoder_celeba.pth") |
|
|
| torch.save(model, "autoencoder_full.pth") |
|
|
| from google.colab import files |
| files.download("autoencoder_celeba.pth") |
|
|
| state_dict = torch.load("autoencoder_celeba.pth") |
|
|
| new_state_dict = {} |
| for k, v in state_dict.items(): |
| new_key = k.replace("_orig_mod.", "") |
| new_state_dict[new_key] = v |
|
|
| model = Autoencoder().to(device) |
| model.load_state_dict(new_state_dict) |
| model.eval() |
|
|
| model.load_state_dict(torch.load("autoencoder_celeba.pth"), strict=False) |
|
|
| import os |
|
|
| project_dir = "celeba-autoencoder" |
| os.makedirs(project_dir, exist_ok=True) |
|
|
| model_code = """ |
| import torch |
| import torch.nn as nn |
| |
| class Autoencoder(nn.Module): |
| def __init__(self): |
| super(Autoencoder, self).__init__() |
| |
| self.encoder = nn.Sequential( |
| nn.Conv2d(3, 64, 4, 2, 1), |
| nn.ReLU(), |
| nn.Conv2d(64, 128, 4, 2, 1), |
| nn.BatchNorm2d(128), |
| nn.ReLU(), |
| nn.Conv2d(128, 256, 4, 2, 1), |
| nn.BatchNorm2d(256), |
| nn.ReLU(), |
| nn.Conv2d(256, 512, 4, 2, 1), |
| nn.ReLU() |
| ) |
| |
| self.decoder = nn.Sequential( |
| nn.ConvTranspose2d(512, 256, 4, 2, 1), |
| nn.BatchNorm2d(256), |
| nn.ReLU(), |
| nn.ConvTranspose2d(256, 128, 4, 2, 1), |
| nn.BatchNorm2d(128), |
| nn.ReLU(), |
| nn.ConvTranspose2d(128, 64, 4, 2, 1), |
| nn.ReLU(), |
| nn.ConvTranspose2d(64, 3, 4, 2, 1), |
| nn.Sigmoid() |
| ) |
| |
| def forward(self, x): |
| return self.decoder(self.encoder(x)) |
| """ |
|
|
| with open(f"{project_dir}/model.py", "w") as f: |
| f.write(model_code) |
|
|
| inference_code = """ |
| import torch |
| from torchvision import transforms |
| from PIL import Image |
| import matplotlib.pyplot as plt |
| import sys |
| |
| from model import Autoencoder |
| |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| |
| model = Autoencoder().to(device) |
| |
| state_dict = torch.load("autoencoder_celeba.pth", map_location=device) |
| |
| # Fix for torch.compile prefix |
| new_state_dict = {k.replace("_orig_mod.", ""): v for k, v in state_dict.items()} |
| |
| model.load_state_dict(new_state_dict) |
| model.eval() |
| |
| image_path = sys.argv[1] |
| |
| transform = transforms.Compose([ |
| transforms.Resize((128, 128)), |
| transforms.ToTensor() |
| ]) |
| |
| image = Image.open(image_path).convert("RGB") |
| input_tensor = transform(image).unsqueeze(0).to(device) |
| |
| with torch.no_grad(): |
| output = model(input_tensor) |
| |
| input_np = input_tensor.squeeze().cpu().numpy() |
| output_np = output.squeeze().cpu().numpy() |
| |
| fig, axes = plt.subplots(1, 2, figsize=(8,4)) |
| |
| axes[0].imshow(input_np.transpose(1,2,0)) |
| axes[0].set_title("Original") |
| axes[0].axis("off") |
| |
| axes[1].imshow(output_np.transpose(1,2,0)) |
| axes[1].set_title("Reconstructed") |
| axes[1].axis("off") |
| |
| plt.show() |
| """ |
|
|
| with open(f"{project_dir}/inference.py", "w") as f: |
| f.write(inference_code) |
|
|
| requirements = """torch |
| torchvision |
| pillow |
| matplotlib |
| pytorch-msssim |
| """ |
|
|
| with open(f"{project_dir}/requirements.txt", "w") as f: |
| f.write(requirements) |
|
|
| readme = """ |
| # CelebA Autoencoder |
| |
| ## Overview |
| This project implements a Convolutional Autoencoder trained on the CelebA dataset for image compression and reconstruction. |
| |
| ## Features |
| - Learns compressed latent representation of face images |
| - Reconstructs images from compressed representation |
| - Evaluated using PSNR and SSIM metrics |
| |
| ## Dataset |
| - CelebA Dataset (Kaggle) |
| |
| ## Model |
| - Encoder: Convolutional layers with downsampling |
| - Decoder: Transposed convolution layers for reconstruction |
| |
| ## Results |
| - PSNR: ~31 dB |
| - SSIM: ~0.93 |
| |
| ## Usage |
| |
| ### Run Inference |
| ```bash |
| python inference.py path_to_image.jpg |
| |
| Notes |
| Model performs lossy compression |
| Some blurring is expected due to reconstruction loss |
| Author |
| |
| Autoencoder project for Deep Learning experiment |
| """ |
|
|
| with open(f"{project_dir}/README.md", "w") as f: |
| f.write(readme) |
|
|
| import shutil |
|
|
| shutil.copy("autoencoder_celeba.pth", f"{project_dir}/autoencoder_celeba.pth") |
|
|
| shutil.make_archive("celeba-autoencoder", 'zip', project_dir) |
|
|
| from google.colab import files |
| files.download("celeba-autoencoder.zip") |