Spaces:

waleed-12
/

Generative-Vision-GAN-Comparison

Sleeping

App Files Files Community

Generative-Vision-GAN-Comparison / app.py

waleed-12

Upload 2 files

a015496 verified about 1 month ago

raw

history blame contribute delete

11.7 kB

	# ============================================================
	# app.py — HuggingFace Spaces Gradio App
	# DCGAN vs WGAN-GP: Anime Face Generation
	# ============================================================
	# Deploy instructions:
	# 1. Create a new Space on HuggingFace (SDK: Gradio)
	# 2. Upload this app.py and requirements.txt
	# 3. Upload dcgan_G_final.pt and wgan_G_final.pt to the Space files
	# (or host them on HF Hub and pull with hf_hub_download)
	# ============================================================

	import os
	import gc
	import numpy as np
	import torch
	import torch.nn as nn
	import torchvision.utils as vutils
	from PIL import Image
	import gradio as gr

	# ── Re-define architectures (must match training code exactly) ───────────────

	class DCGANGenerator(nn.Module):
	def __init__(self, latent_dim=100, features_g=64, num_channels=3):
	super().__init__()
	self.net = nn.Sequential(
	nn.ConvTranspose2d(latent_dim, features_g * 8, 4, 1, 0, bias=False),
	nn.BatchNorm2d(features_g * 8),
	nn.ReLU(True),
	nn.ConvTranspose2d(features_g * 8, features_g * 4, 4, 2, 1, bias=False),
	nn.BatchNorm2d(features_g * 4),
	nn.ReLU(True),
	nn.ConvTranspose2d(features_g * 4, features_g * 2, 4, 2, 1, bias=False),
	nn.BatchNorm2d(features_g * 2),
	nn.ReLU(True),
	nn.ConvTranspose2d(features_g * 2, features_g, 4, 2, 1, bias=False),
	nn.BatchNorm2d(features_g),
	nn.ReLU(True),
	nn.ConvTranspose2d(features_g, num_channels, 4, 2, 1, bias=False),
	nn.Tanh(),
	)

	def forward(self, z):
	return self.net(z)


	class WGANGenerator(nn.Module):
	def __init__(self, latent_dim=100, features_g=64, num_channels=3):
	super().__init__()
	self.net = nn.Sequential(
	nn.ConvTranspose2d(latent_dim, features_g * 8, 4, 1, 0, bias=False),
	nn.BatchNorm2d(features_g * 8),
	nn.ReLU(True),
	nn.ConvTranspose2d(features_g * 8, features_g * 4, 4, 2, 1, bias=False),
	nn.BatchNorm2d(features_g * 4),
	nn.ReLU(True),
	nn.ConvTranspose2d(features_g * 4, features_g * 2, 4, 2, 1, bias=False),
	nn.BatchNorm2d(features_g * 2),
	nn.ReLU(True),
	nn.ConvTranspose2d(features_g * 2, features_g, 4, 2, 1, bias=False),
	nn.BatchNorm2d(features_g),
	nn.ReLU(True),
	nn.ConvTranspose2d(features_g, num_channels, 4, 2, 1, bias=False),
	nn.Tanh(),
	)

	def forward(self, z):
	return self.net(z)


	# ── Load models ──────────────────────────────────────────────────────────────

	LATENT_DIM = 100
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"Running on: {device}")

	dcgan_gen = DCGANGenerator(LATENT_DIM).to(device)
	wgan_gen = WGANGenerator(LATENT_DIM).to(device)

	DCGAN_WEIGHTS = "dcgan_G_final.pt"
	WGAN_WEIGHTS = "wgan_G_final.pt"

	def load_weights():
	"""Load weights if available; otherwise use random init (demo fallback)."""
	if os.path.exists(DCGAN_WEIGHTS):
	state = torch.load(DCGAN_WEIGHTS, map_location=device)
	# Handle DataParallel prefix if saved from multi-GPU
	state = {k.replace("module.", ""): v for k, v in state.items()}
	dcgan_gen.load_state_dict(state)
	print("✔ DCGAN weights loaded.")
	else:
	print("⚠ DCGAN weights not found — using random init.")

	if os.path.exists(WGAN_WEIGHTS):
	state = torch.load(WGAN_WEIGHTS, map_location=device)
	state = {k.replace("module.", ""): v for k, v in state.items()}
	wgan_gen.load_state_dict(state)
	print("✔ WGAN-GP weights loaded.")
	else:
	print("⚠ WGAN-GP weights not found — using random init.")

	dcgan_gen.eval()
	wgan_gen.eval()

	load_weights()


	# ── Inference helpers ─────────────────────────────────────────────────────────

	def tensor_to_pil_grid(tensor_batch, nrow=4):
	"""Convert a (B,3,H,W) tensor in [-1,1] to a PIL image grid."""
	grid = vutils.make_grid(tensor_batch, nrow=nrow, normalize=True, value_range=(-1, 1))
	np_img = grid.permute(1, 2, 0).numpy() # (H, W, 3)
	np_img = (np_img * 255).clip(0, 255).astype(np.uint8)
	return Image.fromarray(np_img)


	@torch.no_grad()
	def generate_comparison(n_images: int, seed: int):
	"""
	Core generation function.
	Returns two PIL images: DCGAN grid and WGAN-GP grid.
	"""
	n_images = max(1, min(n_images, 16)) # clamp to [1, 16]
	torch.manual_seed(seed)
	z = torch.randn(n_images, LATENT_DIM, 1, 1, device=device)

	dcgan_imgs = dcgan_gen(z).cpu()
	wgan_imgs = wgan_gen(z).cpu()

	nrow = 4 if n_images >= 4 else n_images
	pil_dcgan = tensor_to_pil_grid(dcgan_imgs, nrow=nrow)
	pil_wgan = tensor_to_pil_grid(wgan_imgs, nrow=nrow)

	gc.collect()
	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	return pil_dcgan, pil_wgan


	@torch.no_grad()
	def generate_single(model_choice: str, n_images: int, seed: int):
	"""
	Returns a single model's output as a PIL grid + a short description.
	"""
	n_images = max(1, min(n_images, 16))
	torch.manual_seed(seed)
	z = torch.randn(n_images, LATENT_DIM, 1, 1, device=device)

	gen = dcgan_gen if model_choice == "DCGAN" else wgan_gen
	imgs = gen(z).cpu()
	nrow = 4 if n_images >= 4 else n_images
	pil_out = tensor_to_pil_grid(imgs, nrow=nrow)

	desc = {
	"DCGAN": ("Binary Cross Entropy loss. Faster to train but prone to mode collapse "
	"— may generate repetitive or blurry samples."),
	"WGAN-GP": ("Wasserstein loss + Gradient Penalty. More stable training, "
	"better sample diversity, and less mode collapse."),
	}[model_choice]

	gc.collect()
	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	return pil_out, desc


	# ── Gradio UI ─────────────────────────────────────────────────────────────────

	with gr.Blocks(
	title="DCGAN vs WGAN-GP \| Anime Face Generator",
	theme=gr.themes.Soft(),
	) as demo:

	gr.Markdown(
	"""
	# 🎨 DCGAN vs WGAN-GP — Anime Face Generator
	AI4009 Generative AI \| Assignment 3 — Question 1

	Generate anime faces using two GAN variants and compare output diversity.
	Both models were trained on the [Anime Faces](https://www.kaggle.com/datasets/soumikrakshit/anime-faces)
	dataset (64×64, normalised to [-1, 1]).

	\| Model \| Loss \| Key Property \|
	\|-------\|------\|--------------\|
	\| DCGAN \| Binary Cross-Entropy \| Baseline — fast but unstable \|
	\| WGAN-GP \| Wasserstein + Gradient Penalty \| Stable, diverse, mode-collapse-resistant \|
	"""
	)

	with gr.Tabs():

	# ── Tab 1: Side-by-side comparison ──────────────────────────────────
	with gr.TabItem("🔄 Compare Both Models"):
	gr.Markdown("### Generate the same latent noise through both models")

	with gr.Row():
	with gr.Column(scale=1):
	n_img_compare = gr.Slider(1, 16, value=8, step=1,
	label="Number of Images")
	seed_compare = gr.Slider(0, 9999, value=42, step=1,
	label="Random Seed")
	btn_compare = gr.Button("🚀 Generate & Compare", variant="primary")

	with gr.Row():
	out_dcgan = gr.Image(label="DCGAN Output", type="pil")
	out_wgan = gr.Image(label="WGAN-GP Output", type="pil")

	btn_compare.click(
	fn=generate_comparison,
	inputs=[n_img_compare, seed_compare],
	outputs=[out_dcgan, out_wgan],
	)

	gr.Examples(
	examples=[[8, 42], [16, 123], [4, 777], [16, 2024]],
	inputs=[n_img_compare, seed_compare],
	outputs=[out_dcgan, out_wgan],
	fn=generate_comparison,
	cache_examples=False,
	)

	# ── Tab 2: Single model explorer ────────────────────────────────────
	with gr.TabItem("🔍 Explore Single Model"):
	gr.Markdown("### Explore a specific model in detail")

	with gr.Row():
	with gr.Column(scale=1):
	model_choice = gr.Radio(["DCGAN", "WGAN-GP"], value="WGAN-GP",
	label="Select Model")
	n_img_single = gr.Slider(1, 16, value=8, step=1,
	label="Number of Images")
	seed_single = gr.Slider(0, 9999, value=0, step=1,
	label="Random Seed")
	btn_single = gr.Button("Generate", variant="primary")

	with gr.Row():
	single_out = gr.Image(label="Generated Images", type="pil", scale=2)
	single_desc = gr.Textbox(label="Model Description", lines=4, scale=1)

	btn_single.click(
	fn=generate_single,
	inputs=[model_choice, n_img_single, seed_single],
	outputs=[single_out, single_desc],
	)

	# ── Tab 3: About ─────────────────────────────────────────────────────
	with gr.TabItem("ℹ️ About"):
	gr.Markdown(
	"""
	## Model Details

	### DCGAN (Deep Convolutional GAN)
	- Generator: 5 ConvTranspose2d layers, BatchNorm, ReLU, Tanh output
	- Discriminator: 5 Conv2d layers, LeakyReLU, Sigmoid output
	- Loss: Binary Cross-Entropy
	- Known weakness: Mode collapse — the generator may learn to produce
	only a few "safe" outputs that fool the discriminator.

	### WGAN-GP (Wasserstein GAN with Gradient Penalty)
	- Generator: Same architecture as DCGAN
	- Critic: Same structure but uses InstanceNorm and no Sigmoid —
	outputs raw Wasserstein scores instead of probabilities
	- Loss: Wasserstein distance + Gradient Penalty (λ=10)
	- Training: 5 critic updates per generator step
	- Advantage: The Wasserstein distance provides meaningful gradients even
	when distributions don't overlap — eliminates mode collapse.

	### Training Setup
	- Dataset: Anime Faces 64×64
	- Optimizer: Adam (lr=0.0002, β=(0.5, 0.999))
	- Mixed precision (torch.cuda.amp)
	- Platform: Kaggle T4 x2 Dual GPU
	"""
	)

	gr.Markdown(
	"<center>Built for AI4009 GenAI Assignment 3 · "
	"Model trained on Kaggle · Deployed on HuggingFace Spaces</center>"
	)


	if __name__ == "__main__":
	demo.launch()