deepface-autoencoder / autoencoder_on_image_compression.py
BJyotibrat's picture
Upload folder using huggingface_hub
4484f96 verified
# -*- coding: utf-8 -*-
"""Autoencoder on Image Compression.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/13X8ZS11V0GCWpowuJZ3igEYJa6kRx9DL
"""
from google.colab import files
files.upload()
import os
os.makedirs('/root/.kaggle', exist_ok=True)
!cp kaggle.json /root/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json
!kaggle datasets download -d jessicali9530/celeba-dataset
!unzip -q celeba-dataset.zip
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, random_split
from torch.amp import autocast, GradScaler
class CelebADataset(Dataset):
def __init__(self, img_dir, transform=None):
self.img_dir = img_dir
self.image_names = sorted(os.listdir(img_dir))
self.transform = transform
def __len__(self):
return len(self.image_names)
def __getitem__(self, idx):
img_path = os.path.join(self.img_dir, self.image_names[idx])
image = Image.open(img_path).convert('RGB')
if self.transform:
image = self.transform(image)
return image
transform = transforms.Compose([
transforms.Resize((128, 128)),
transforms.ToTensor()
])
dataset = CelebADataset("img_align_celeba/img_align_celeba", transform=transform)
dataset.image_names = dataset.image_names[:60000]
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
train_loader = DataLoader(
train_dataset,
batch_size=512,
shuffle=True,
num_workers=2,
pin_memory=True,
persistent_workers=False
)
val_loader = DataLoader(
val_dataset,
batch_size=512,
shuffle=False,
num_workers=2,
pin_memory=True,
persistent_workers=False
)
class Autoencoder(nn.Module):
def __init__(self):
super(Autoencoder, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(3, 64, 4, 2, 1),
nn.ReLU(),
nn.Conv2d(64, 128, 4, 2, 1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.Conv2d(128, 256, 4, 2, 1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.Conv2d(256, 512, 4, 2, 1),
nn.ReLU()
)
self.decoder = nn.Sequential(
nn.ConvTranspose2d(512, 256, 4, 2, 1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.ConvTranspose2d(256, 128, 4, 2, 1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.ConvTranspose2d(128, 64, 4, 2, 1),
nn.ReLU(),
nn.ConvTranspose2d(64, 3, 4, 2, 1),
nn.Sigmoid()
)
def forward(self, x):
return self.decoder(self.encoder(x))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Autoencoder().to(device).to(memory_format=torch.channels_last)
model = torch.compile(model)
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scaler = GradScaler()
torch.backends.cudnn.benchmark = True
epochs = 50
for epoch in range(epochs):
model.train()
train_loss = 0
for images in train_loader:
images = images.to(device, non_blocking=True).to(memory_format=torch.channels_last)
optimizer.zero_grad()
with autocast(device_type='cuda'):
outputs = model(images)
loss = criterion(outputs, images)
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
train_loss += loss.item()
train_loss /= len(train_loader)
model.eval()
val_loss = 0
with torch.no_grad():
for images in val_loader:
images = images.to(device, non_blocking=True).to(memory_format=torch.channels_last)
with autocast(device_type='cuda'):
outputs = model(images)
loss = criterion(outputs, images)
val_loss += loss.item()
val_loss /= len(val_loader)
print(f"Epoch [{epoch+1}/{epochs}] | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")
dataiter = iter(val_loader)
images = next(dataiter).to(device)
with torch.no_grad():
outputs = model(images)
images = images.cpu().numpy()
outputs = outputs.cpu().numpy()
fig, axes = plt.subplots(2, 6, figsize=(12,4))
for i in range(6):
axes[0, i].imshow(np.transpose(images[i], (1,2,0)))
axes[0, i].axis('off')
axes[1, i].imshow(np.transpose(outputs[i], (1,2,0)))
axes[1, i].axis('off')
plt.show()
!pip install pytorch-msssim
loss = criterion(outputs, images)
import torch
def calculate_psnr(original, reconstructed):
mse = torch.mean((original - reconstructed) ** 2)
if mse == 0:
return 100
psnr = 20 * torch.log10(1.0 / torch.sqrt(mse))
return psnr
from pytorch_msssim import ssim
model.eval()
total_psnr = 0
total_ssim = 0
count = 0
with torch.no_grad():
for images in val_loader:
images = images.to(device)
outputs = model(images)
total_psnr += calculate_psnr(images, outputs).item()
total_ssim += ssim(images, outputs, data_range=1.0, size_average=True).item()
count += 1
print("Average PSNR:", total_psnr / count)
print("Average SSIM:", total_ssim / count)
from google.colab import files
uploaded = files.upload()
from PIL import Image
img_path = list(uploaded.keys())[0]
image = Image.open(img_path).convert('RGB')
transform = transforms.Compose([
transforms.Resize((128, 128)),
transforms.ToTensor()
])
input_image = transform(image).unsqueeze(0).to(device)
model.eval()
with torch.no_grad():
output_image = model(input_image)
input_np = input_image.squeeze().cpu().numpy()
output_np = output_image.squeeze().cpu().numpy()
import matplotlib.pyplot as plt
fig, axes = plt.subplots(1, 2, figsize=(8,4))
axes[0].imshow(input_np.transpose(1,2,0))
axes[0].set_title("Original")
axes[0].axis('off')
axes[1].imshow(output_np.transpose(1,2,0))
axes[1].set_title("Reconstructed")
axes[1].axis('off')
plt.show()
torch.save(model.state_dict(), "autoencoder_celeba.pth")
torch.save(model, "autoencoder_full.pth")
from google.colab import files
files.download("autoencoder_celeba.pth")
state_dict = torch.load("autoencoder_celeba.pth")
new_state_dict = {}
for k, v in state_dict.items():
new_key = k.replace("_orig_mod.", "")
new_state_dict[new_key] = v
model = Autoencoder().to(device)
model.load_state_dict(new_state_dict)
model.eval()
model.load_state_dict(torch.load("autoencoder_celeba.pth"), strict=False)
import os
project_dir = "celeba-autoencoder"
os.makedirs(project_dir, exist_ok=True)
model_code = """
import torch
import torch.nn as nn
class Autoencoder(nn.Module):
def __init__(self):
super(Autoencoder, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(3, 64, 4, 2, 1),
nn.ReLU(),
nn.Conv2d(64, 128, 4, 2, 1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.Conv2d(128, 256, 4, 2, 1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.Conv2d(256, 512, 4, 2, 1),
nn.ReLU()
)
self.decoder = nn.Sequential(
nn.ConvTranspose2d(512, 256, 4, 2, 1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.ConvTranspose2d(256, 128, 4, 2, 1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.ConvTranspose2d(128, 64, 4, 2, 1),
nn.ReLU(),
nn.ConvTranspose2d(64, 3, 4, 2, 1),
nn.Sigmoid()
)
def forward(self, x):
return self.decoder(self.encoder(x))
"""
with open(f"{project_dir}/model.py", "w") as f:
f.write(model_code)
inference_code = """
import torch
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
import sys
from model import Autoencoder
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Autoencoder().to(device)
state_dict = torch.load("autoencoder_celeba.pth", map_location=device)
# Fix for torch.compile prefix
new_state_dict = {k.replace("_orig_mod.", ""): v for k, v in state_dict.items()}
model.load_state_dict(new_state_dict)
model.eval()
image_path = sys.argv[1]
transform = transforms.Compose([
transforms.Resize((128, 128)),
transforms.ToTensor()
])
image = Image.open(image_path).convert("RGB")
input_tensor = transform(image).unsqueeze(0).to(device)
with torch.no_grad():
output = model(input_tensor)
input_np = input_tensor.squeeze().cpu().numpy()
output_np = output.squeeze().cpu().numpy()
fig, axes = plt.subplots(1, 2, figsize=(8,4))
axes[0].imshow(input_np.transpose(1,2,0))
axes[0].set_title("Original")
axes[0].axis("off")
axes[1].imshow(output_np.transpose(1,2,0))
axes[1].set_title("Reconstructed")
axes[1].axis("off")
plt.show()
"""
with open(f"{project_dir}/inference.py", "w") as f:
f.write(inference_code)
requirements = """torch
torchvision
pillow
matplotlib
pytorch-msssim
"""
with open(f"{project_dir}/requirements.txt", "w") as f:
f.write(requirements)
readme = """
# CelebA Autoencoder
## Overview
This project implements a Convolutional Autoencoder trained on the CelebA dataset for image compression and reconstruction.
## Features
- Learns compressed latent representation of face images
- Reconstructs images from compressed representation
- Evaluated using PSNR and SSIM metrics
## Dataset
- CelebA Dataset (Kaggle)
## Model
- Encoder: Convolutional layers with downsampling
- Decoder: Transposed convolution layers for reconstruction
## Results
- PSNR: ~31 dB
- SSIM: ~0.93
## Usage
### Run Inference
```bash
python inference.py path_to_image.jpg
Notes
Model performs lossy compression
Some blurring is expected due to reconstruction loss
Author
Autoencoder project for Deep Learning experiment
"""
with open(f"{project_dir}/README.md", "w") as f:
f.write(readme)
import shutil
shutil.copy("autoencoder_celeba.pth", f"{project_dir}/autoencoder_celeba.pth")
shutil.make_archive("celeba-autoencoder", 'zip', project_dir)
from google.colab import files
files.download("celeba-autoencoder.zip")