#!/usr/bin/env python3 """Generate CNN notebook.""" import nbformat as nbf nb = nbf.v4.new_notebook() nb.metadata = { "kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"name": "python", "version": "3.12.0"}, } cells = [] def md(s): cells.append(nbf.v4.new_markdown_cell(s)) def code(s): cells.append(nbf.v4.new_code_cell(s)) md("""\ # CNN: Convolutional Neural Network Simple CNN for CIFAR-10 image classification — Conv×3 + Pool×3 + FC×2. """) md("""\ ## 背景 卷积神经网络(CNN)是计算机视觉的基础模型。核心操作是**卷积**: 一个可学习的滤波器(kernel)在输入上滑动,提取局部特征。 相比全连接网络,CNN 有三个关键特性: - **局部连接**:每个神经元只关注局部区域 - **权重共享**:同一个滤波器在整个输入上复用 - **平移不变性**:特征检测器对位置不敏感 这些归纳偏置让 CNN 在处理图像时远优于 MLP。 """) md("""\ ## 数学原理 ### 卷积 $$(I * K)_{i,j} = \\sum_{m} \\sum_{n} I_{i+m, j+n} \\cdot K_{m,n}$$ 其中 $I$ 是输入图像,$K$ 是卷积核。每个通道独立做二维卷积,再跨通道求和。 ### 最大池化 取 $k \\times k$ 区域内的最大值,降低空间分辨率: $$\\text{MaxPool}(I)_{i,j} = \\max_{p,q \\in [0,k)} I_{i+p, j+q}$$ ### 架构 ``` Input(3×32×32) → Conv(3→64) → ReLU → MaxPool(2) → (16×16) → Conv(64→128) → ReLU → MaxPool(2) → (8×8) → Conv(128→256) → ReLU → MaxPool(2) → (4×4) → Flatten → FC(256×4×4 → 256) → ReLU → FC(256 → 10) ``` """) code("""\ import torch import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import CosineAnnealingLR from torch.utils.data import DataLoader from torchvision import transforms from datasets import load_dataset from utils.device import get_device device = get_device() print(f"Device: {device}") """) code("""\ # CIFAR-10 data loading CIFAR10_MEAN = (0.4914, 0.4822, 0.4465) CIFAR10_STD = (0.2470, 0.2435, 0.2616) def build_transform(augment=False): ops = [transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip()] if augment else [] ops += [transforms.ToTensor(), transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)] return transforms.Compose(ops) def transform_batch(batch, fn): batch["img"] = [fn(img.convert("RGB")) for img in batch["img"]] return batch ds_train = load_dataset("uoft-cs/cifar10", split="train") ds_test = load_dataset("uoft-cs/cifar10", split="test") ds_train.set_transform(lambda b: transform_batch(b, build_transform(augment=True))) ds_test.set_transform(lambda b: transform_batch(b, build_transform(augment=False))) train_loader = DataLoader(ds_train, batch_size=128, shuffle=True, num_workers=4) test_loader = DataLoader(ds_test, batch_size=128, shuffle=False, num_workers=4) CIFAR10_CLASSES = ["airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"] print(f"Train: {len(ds_train):,} Test: {len(ds_test):,}") """) code("""\ from cv.simplecnn.model import SimpleCNN model = SimpleCNN(num_classes=10).to(device) print(f"Parameters: {sum(p.numel() for p in model.parameters()):,}") """) md("""\ ## 训练 > ⏱ 预估耗时:**30 epoch × ~30s/epoch ≈ 15 分钟**(M4 Max, batch_size=128) """) code("""\ NUM_EPOCHS = 30 LR = 1e-3 criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=LR) scheduler = CosineAnnealingLR(optimizer, T_max=NUM_EPOCHS) train_loss_hist, test_acc_hist = [], [] for epoch in range(1, NUM_EPOCHS + 1): model.train() train_loss = 0.0 for batch in train_loader: images, labels = batch["img"].to(device), batch["label"].to(device) optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() train_loss += loss.item() scheduler.step() model.eval() correct = total = 0 with torch.no_grad(): for batch in test_loader: images, labels = batch["img"].to(device), batch["label"].to(device) outputs = model(images) _, pred = torch.max(outputs, 1) correct += (pred == labels).sum().item() total += labels.size(0) avg_loss = train_loss / len(train_loader) test_acc = correct / total * 100 train_loss_hist.append(avg_loss) test_acc_hist.append(test_acc) print(f"Epoch [{epoch:2d}/{NUM_EPOCHS}] Loss: {avg_loss:.4f} Test Acc: {test_acc:.2f}%") """) md("""## Loss 曲线 & 测试准确率""") code("""\ import matplotlib.pyplot as plt from utils.device import get_device fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4)) ax1.plot(train_loss_hist, marker='o') ax1.set_xlabel("Epoch"); ax1.set_ylabel("Loss"); ax1.set_title("Training Loss"); ax1.grid(True) ax2.plot(test_acc_hist, marker='o', color='green') ax2.set_xlabel("Epoch"); ax2.set_ylabel("Test Acc (%)"); ax2.set_title("Test Accuracy"); ax2.grid(True) plt.tight_layout(); plt.show() """) md("""## 类别准确率分析""") code("""\ model.eval() class_correct = [0] * 10 class_total = [0] * 10 with torch.no_grad(): for batch in test_loader: images, labels = batch["img"].to(device), batch["label"].to(device) outputs = model(images) _, pred = torch.max(outputs, 1) for i in range(labels.size(0)): label = labels[i].item() class_total[label] += 1 if pred[i].item() == label: class_correct[label] += 1 print(f"{'Class':<12} {'Accuracy':>8}") print("-" * 22) for i, name in enumerate(CIFAR10_CLASSES): acc = class_correct[i] / class_total[i] * 100 print(f"{name:<12} {acc:>7.2f}%") """) md("""\ ## 思考题 1. 为什么 CNN 比全连接网络更适合图像分类?(提示:三个归纳偏置) 2. 池化层的作用是什么?去掉池化层会怎样? 3. 卷积核大小(3×3 vs 5×5)对感受野和参数量有什么影响? 4. 试着把 Conv 层数从 3 加到 4,参数量和准确率会怎么变? """) nb.cells = cells out = "cv/simplecnn/cnn.ipynb" with open(out, "w") as f: nbf.write(nb, f) print(f"Generated {out}")