#!/usr/bin/env python3 """Generate DCGAN notebook.""" import nbformat as nbf nb = nbf.v4.new_notebook() nb.metadata = { "kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"name": "python", "version": "3.12.0"}, } cells = [] def md(s): cells.append(nbf.v4.new_markdown_cell(s)) def code(s): cells.append(nbf.v4.new_code_cell(s)) md("""\ # DCGAN: Deep Convolutional GAN Image generation with adversarial training — Generator vs Discriminator. """) md("""\ ## 背景 DCGAN 将 CNN 与 GAN 结合,用转置卷积(transposed convolution)作为生成器。 核心思想:两个网络对抗训练—— - **Generator (G)**:从随机噪声生成逼真图像,目标是骗过 D - **Discriminator (D)**:区分真实图像和生成的假图像,目标是识破 G 训练收敛时,G 能生成以假乱真的图像,D 无法区分真假(输出 ≈ 0.5)。 """) md("""\ ## 数学原理 ### 对抗训练 $$\\min_G \\max_D V(D, G) = \\mathbb{E}_{x \\sim p_{\\text{data}}} [\\log D(x)] + \\mathbb{E}_{z \\sim p_z} [\\log(1 - D(G(z)))]$$ - D 的目标:最大化 $\\log D(x) + \\log(1 - D(G(z)))$ - G 的目标:最小化 $\\log(1 - D(G(z)))$,等价于最大化 $\\log D(G(z))$ ### 架构 ``` Generator: z(100) → ConvTranspose × 4 → image(64×64×3), Tanh Discriminator: image(64×64×3) → Conv × 4 → 1, Sigmoid ``` 关键技巧:除第一层外使用 BatchNorm,G 用 ReLU,D 用 LeakyReLU(0.2)。 """) code("""\ import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader from torchvision import transforms from datasets import load_dataset from gen.dcgan.model import Generator, Discriminator from utils.device import get_device device = get_device() print(f"Device: {device}") """) code("""\ # 加载 CelebA 子集 transform = transforms.Compose([ transforms.Resize(72), transforms.RandomCrop(64), transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), ]) ds = load_dataset("eurecom-ds/celeba", split="train[:10000]") images = [transform(item["image"]).unsqueeze(0) for item in ds] dataset = torch.cat(images) loader = DataLoader(dataset, batch_size=128, shuffle=True, drop_last=True) print(f"Dataset: {len(dataset):,} images, {len(loader)} batches") """) code("""\ latent_dim = 100 netG = Generator(latent_dim=latent_dim).to(device) netD = Discriminator().to(device) print(f"G params: {sum(p.numel() for p in netG.parameters()):,}") print(f"D params: {sum(p.numel() for p in netD.parameters()):,}") """) md("""\ ## 训练 > ⏱ 预估耗时:**30 epoch × ~30s/epoch ≈ 15 分钟**(M4 Max, batch_size=128) > GAN 训练较慢,如果想快速看效果可以把 `NUM_EPOCHS` 改到 10。 """) code("""\ NUM_EPOCHS = 30 LR = 0.0002 BETA1 = 0.5 LABEL_SMOOTHING = True criterion = nn.BCELoss() optimizerG = optim.Adam(netG.parameters(), lr=LR, betas=(BETA1, 0.999)) optimizerD = optim.Adam(netD.parameters(), lr=LR, betas=(BETA1, 0.999)) real_label = 0.9 if LABEL_SMOOTHING else 1.0 fake_label = 0.0 fixed_noise = torch.randn(64, latent_dim, 1, 1, device=device) g_losses, d_losses, d_x_vals = [], [], [] for epoch in range(1, NUM_EPOCHS + 1): for i, real_images in enumerate(loader): batch_size = real_images.size(0) real_images = real_images.to(device) # Train D netD.zero_grad() output = netD(real_images) label = torch.full((batch_size,), real_label, device=device) lossD_real = criterion(output, label) lossD_real.backward() D_x = output.mean().item() noise = torch.randn(batch_size, latent_dim, 1, 1, device=device) fake = netG(noise) output = netD(fake.detach()) label.fill_(fake_label) lossD_fake = criterion(output, label) lossD_fake.backward() optimizerD.step() # Train G netG.zero_grad() output = netD(fake) label.fill_(real_label) lossG = criterion(output, label) lossG.backward() optimizerG.step() d_loss = lossD_real.item() + lossD_fake.item() g_losses.append(lossG.item()) d_losses.append(d_loss) d_x_vals.append(D_x) print(f"Epoch [{epoch:2d}/{NUM_EPOCHS}] D: {d_loss:.3f} G: {lossG.item():.3f} D(x): {D_x:.3f}") """) md("""## Loss 曲线""") code("""\ import matplotlib.pyplot as plt plt.figure(figsize=(8, 4)) plt.plot(g_losses, label='G loss', alpha=0.8) plt.plot(d_losses, label='D loss', alpha=0.8) plt.xlabel("Epoch"); plt.ylabel("Loss"); plt.legend(); plt.grid(True) plt.title("GAN Training Loss"); plt.show() """) md("""## 生成样本 训练完成后,用固定噪声生成一批图像看看效果。""") code("""\ import matplotlib.pyplot as plt import torchvision.utils as vutils from utils.device import get_device with torch.no_grad(): fake = netG(fixed_noise).cpu() fake = (fake + 1) / 2 # [-1,1] → [0,1] grid = vutils.make_grid(fake, nrow=8, padding=2, normalize=False) plt.figure(figsize=(10, 10)) plt.axis("off") plt.imshow(grid.permute(1, 2, 0).clamp(0, 1)) plt.title("Generated CelebA Samples"); plt.show() """) md("""\ ## 思考题 1. D(x) 接近 0 或 1 分别意味着什么?理想值应该是多少? 2. 为什么 G 的第一层不用 BatchNorm,D 的第一层也不用? 3. 标签平滑(label smoothing)为什么能帮助 GAN 训练? 4. 如果 G 和 D 的 Loss 都趋于稳定但生成效果很差,可能是什么问题? 5. 试试把 `NUM_EPOCHS` 加到 100,观察生成质量的变化。 """) nb.cells = cells out = "gen/dcgan/dcgan.ipynb" with open(out, "w") as f: nbf.write(nb, f) print(f"Generated {out}")