diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..3d0909d31bc0ad12083fbc45e32c3ed698a93560 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +data/cifar-10-batches-py/data_batch_1 filter=lfs diff=lfs merge=lfs -text +data/cifar-10-batches-py/data_batch_2 filter=lfs diff=lfs merge=lfs -text +data/cifar-10-batches-py/data_batch_3 filter=lfs diff=lfs merge=lfs -text +data/cifar-10-batches-py/data_batch_4 filter=lfs diff=lfs merge=lfs -text +data/cifar-10-batches-py/data_batch_5 filter=lfs diff=lfs merge=lfs -text +data/cifar-10-batches-py/test_batch filter=lfs diff=lfs merge=lfs -text +fig/feature_maps_conv1.png filter=lfs diff=lfs merge=lfs -text diff --git a/VGG_BatchNorm/.DS_Store b/VGG_BatchNorm/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..55689b508e6fff2278aba9438833270a93765430 Binary files /dev/null and b/VGG_BatchNorm/.DS_Store differ diff --git a/VGG_BatchNorm/VGG_Loss_Landscape.py b/VGG_BatchNorm/VGG_Loss_Landscape.py new file mode 100644 index 0000000000000000000000000000000000000000..fed9e8bfeaac62dafc5c1971297a699e5f06623c --- /dev/null +++ b/VGG_BatchNorm/VGG_Loss_Landscape.py @@ -0,0 +1,256 @@ +import matplotlib as mpl +mpl.use('Agg') +import matplotlib.pyplot as plt +from torch import nn +import numpy as np +import torch +import os +import random +from tqdm import tqdm as tqdm +from IPython import display + +from models.vgg import VGG_A +from models.vgg import VGG_A_BatchNorm # you need to implement this network +from data.loaders import get_cifar_loader + +# ## Constants (parameters) initialization +device_id = [0,1,2,3] +num_workers = 4 +batch_size = 128 + +# add our package dir to path +module_path = os.path.dirname(os.getcwd()) +home_path = module_path +figures_path = os.path.join(home_path, 'reports', 'figures') +models_path = os.path.join(home_path, 'reports', 'models') + +# Make sure you are using the right device. +device_id = device_id +os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" +device = torch.device("cuda:{}".format(3) if torch.cuda.is_available() else "cpu") +print(device) +print(torch.cuda.get_device_name(3)) + + + +# Initialize your data loader and +# make sure that dataloader works +# as expected by observing one +# sample from it. +train_loader = get_cifar_loader(train=True) +val_loader = get_cifar_loader(train=False) +for X,y in train_loader: + ## -------------------- + # Add code as needed + # + # + # + # + ## -------------------- + break + + + +# This function is used to calculate the accuracy of model classification +def get_accuracy(model, data_loader): + model.eval() + correct = 0 + total = 0 + with torch.no_grad(): + for x, y in data_loader: + x, y = x.to(device), y.to(device) + outputs = model(x) + _, predicted = torch.max(outputs, 1) + total += y.size(0) + correct += (predicted == y).sum().item() + return correct / total + +# Set a random seed to ensure reproducible results +def set_random_seeds(seed_value=0, device='cpu'): + np.random.seed(seed_value) + torch.manual_seed(seed_value) + random.seed(seed_value) + if device != 'cpu': + torch.cuda.manual_seed(seed_value) + torch.cuda.manual_seed_all(seed_value) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + +# We use this function to complete the entire +# training process. In order to plot the loss landscape, +# you need to record the loss value of each step. +# Of course, as before, you can test your model +# after drawing a training round and save the curve +# to observe the training +def train(model, optimizer, criterion, train_loader, val_loader, scheduler=None, epochs_n=100, best_model_path=None): + model.to(device) + learning_curve = [] + train_accuracy_curve = [] + val_accuracy_curve = [] + for epoch in tqdm(range(epochs_n), unit='epoch'): + model.train() + running_loss = 0.0 + for data in train_loader: + x, y = data + x = x.to(device) + y = y.to(device) + optimizer.zero_grad() + prediction = model(x) + loss = criterion(prediction, y) + loss.backward() + optimizer.step() + running_loss += loss.item() + avg_loss = running_loss / len(train_loader) + learning_curve.append(avg_loss) + train_acc = get_accuracy(model, train_loader) + val_acc = get_accuracy(model, val_loader) + train_accuracy_curve.append(train_acc) + val_accuracy_curve.append(val_acc) + print(f'Epoch {epoch+1}: loss={avg_loss:.4f}, train_acc={train_acc:.4f}, val_acc={val_acc:.4f}') + return learning_curve, train_accuracy_curve, val_accuracy_curve + +def train_stepwise(model, optimizer, criterion, train_loader, val_loader, epochs_n=10, save_prefix=''): + model.to(device) + model.train() + step_losses = [] + step_grads = [] + for epoch in range(epochs_n): + for i, (x, y) in enumerate(train_loader): + x, y = x.to(device), y.to(device) + optimizer.zero_grad() + prediction = model(x) + loss = criterion(prediction, y) + loss.backward() + # 记录loss + step_losses.append(loss.item()) + # 记录梯度范数 + total_norm = 0.0 + for p in model.parameters(): + if p.grad is not None: + param_norm = p.grad.data.norm(2) + total_norm += param_norm.item() ** 2 + total_norm = total_norm ** 0.5 + step_grads.append(total_norm) + optimizer.step() + # 保存loss和grad + np.save(f'{save_prefix}_step_losses.npy', np.array(step_losses)) + np.save(f'{save_prefix}_step_grads.npy', np.array(step_grads)) + return step_losses, step_grads + +# Train your model +# feel free to modify +epo = 5 +loss_save_path = '' +# grad_save_path = '' + +set_random_seeds(seed_value=2020, device=device) +model = VGG_A() +lr = 0.001 +optimizer = torch.optim.Adam(model.parameters(), lr = lr) +criterion = nn.CrossEntropyLoss() +loss, train_acc, val_acc = train(model, optimizer, criterion, train_loader, val_loader, epochs_n=epo) +np.savetxt(os.path.join(loss_save_path, 'loss.txt'), loss, fmt='%s', delimiter=' ') +# np.savetxt(os.path.join(grad_save_path, 'grads.txt'), grads, fmt='%s', delimiter=' ') + +# Maintain two lists: max_curve and min_curve, +# select the maximum value of loss in all models +# on the same step, add it to max_curve, and +# the minimum value to min_curve +min_curve = [] +max_curve = [] +## -------------------- +# Add your code +# +# +# +# +## -------------------- + +# Use this function to plot the final loss landscape, +# fill the area between the two curves can use plt.fill_between() +def plot_loss_landscape(): + ## -------------------- + # Add your code + # + # + # + # + ## -------------------- + pass + +if __name__ == "__main__": + epo = 10 + set_random_seeds(seed_value=2020, device=device) + + learning_rates = [1e-3, 2e-3, 1e-4, 5e-4] + criterion = nn.CrossEntropyLoss() + all_loss_a = [] + all_loss_bn = [] + all_grad_a = [] + all_grad_bn = [] + for lr in learning_rates: + # VGG-A + model_a = VGG_A() + optimizer_a = torch.optim.Adam(model_a.parameters(), lr=lr) + prefix_a = f'models/vgg_a_lr{lr}' + step_losses_a, step_grads_a = train_stepwise(model_a, optimizer_a, criterion, train_loader, val_loader, epochs_n=epo, save_prefix=prefix_a) + torch.save(model_a.state_dict(), f'{prefix_a}.pth') + all_loss_a.append(step_losses_a) + all_grad_a.append(step_grads_a) + + # VGG-A-BN + model_bn = VGG_A_BatchNorm() + optimizer_bn = torch.optim.Adam(model_bn.parameters(), lr=lr) + prefix_bn = f'models/vgg_bn_lr{lr}' + step_losses_bn, step_grads_bn = train_stepwise(model_bn, optimizer_bn, criterion, train_loader, val_loader, epochs_n=epo, save_prefix=prefix_bn) + torch.save(model_bn.state_dict(), f'{prefix_bn}.pth') + all_loss_bn.append(step_losses_bn) + all_grad_bn.append(step_grads_bn) + + # 分别为每个learning rate单独画图 + for lr in learning_rates: + # 读取loss和grad + loss_a = np.load(f'models/vgg_a_lr{lr}_step_losses.npy') + loss_bn = np.load(f'models/vgg_bn_lr{lr}_step_losses.npy') + grad_a = np.load(f'models/vgg_a_lr{lr}_step_grads.npy') + grad_bn = np.load(f'models/vgg_bn_lr{lr}_step_grads.npy') + steps = np.arange(1, min(len(loss_a), len(loss_bn)) + 1) + + # Loss对比 + plt.figure(figsize=(8,5)) + plt.plot(steps, loss_a[:len(steps)], 'r-', label='VGG-A (no BN)') + plt.plot(steps, loss_bn[:len(steps)], 'b-', label='VGG-A (with BN)') + plt.xlabel('Step') + plt.ylabel('Training Loss') + plt.title(f'Loss Curve (lr={lr})') + plt.legend() + plt.tight_layout() + plt.savefig(f'vgg_loss_curve_lr{lr}.png') + plt.close() + + # 梯度范数对比 + plt.figure(figsize=(8,5)) + plt.plot(steps, grad_a[:len(steps)], 'r-', label='VGG-A (no BN)') + plt.plot(steps, grad_bn[:len(steps)], 'b-', label='VGG-A (with BN)') + plt.xlabel('Step') + plt.ylabel('Gradient Norm') + plt.title(f'Gradient Norm Curve (lr={lr})') + plt.legend() + plt.tight_layout() + plt.savefig(f'vgg_grad_norm_curve_lr{lr}.png') + plt.close() + + # 最大梯度差 + grad_diff_a = np.abs(np.diff(grad_a[:len(steps)])) + grad_diff_bn = np.abs(np.diff(grad_bn[:len(steps)])) + plt.figure(figsize=(8,5)) + plt.plot(steps[1:], grad_diff_a, 'r-', label='VGG-A (no BN)') + plt.plot(steps[1:], grad_diff_bn, 'b-', label='VGG-A (with BN)') + plt.xlabel('Step') + plt.ylabel('Gradient Difference') + plt.title(f'Max Gradient Difference (lr={lr})') + plt.legend() + plt.tight_layout() + plt.savefig(f'vgg_max_grad_diff_lr{lr}.png') + plt.close() \ No newline at end of file diff --git a/VGG_BatchNorm/data/__init__.py b/VGG_BatchNorm/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6833bb646085bcde6c12336a09139941ce03123c --- /dev/null +++ b/VGG_BatchNorm/data/__init__.py @@ -0,0 +1,5 @@ +''' +Scripts to download and generate data +''' + +from . import loaders \ No newline at end of file diff --git a/VGG_BatchNorm/data/__pycache__/__init__.cpython-310.pyc b/VGG_BatchNorm/data/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bf9b823eae03fe5d59f89f4651f4e9b92a75266d Binary files /dev/null and b/VGG_BatchNorm/data/__pycache__/__init__.cpython-310.pyc differ diff --git a/VGG_BatchNorm/data/__pycache__/__init__.cpython-37.pyc b/VGG_BatchNorm/data/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0be9bd46131bf60e847c77fbbb41a8c5e95d9742 Binary files /dev/null and b/VGG_BatchNorm/data/__pycache__/__init__.cpython-37.pyc differ diff --git a/VGG_BatchNorm/data/__pycache__/loaders.cpython-310.pyc b/VGG_BatchNorm/data/__pycache__/loaders.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fb82538671c00bd6785c82cb73b8d2e73fcecf73 Binary files /dev/null and b/VGG_BatchNorm/data/__pycache__/loaders.cpython-310.pyc differ diff --git a/VGG_BatchNorm/data/__pycache__/loaders.cpython-37.pyc b/VGG_BatchNorm/data/__pycache__/loaders.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e0fce06f4e3f9d3e1bf2e9a72719308ba906ff71 Binary files /dev/null and b/VGG_BatchNorm/data/__pycache__/loaders.cpython-37.pyc differ diff --git a/VGG_BatchNorm/data/cifar-10-python.tar.gz b/VGG_BatchNorm/data/cifar-10-python.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc9f0378736b12bdd4f605fb86f7f94defee5962 --- /dev/null +++ b/VGG_BatchNorm/data/cifar-10-python.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f45163964244cea80d8b2367396f1a64e529767c1a4e2c0c91c67b8ac2f691e1 +size 5226496 diff --git a/VGG_BatchNorm/data/loaders.py b/VGG_BatchNorm/data/loaders.py new file mode 100644 index 0000000000000000000000000000000000000000..0ee7206d9dbc016022fe3ca0b1b3ad3247f996cf --- /dev/null +++ b/VGG_BatchNorm/data/loaders.py @@ -0,0 +1,53 @@ +""" +Data loaders +""" +import matplotlib as mpl +mpl.use('Agg') +import matplotlib.pyplot as plt +import numpy as np +from torch.utils.data import DataLoader, Dataset +from torchvision import transforms +import torchvision.datasets as datasets + + + +class PartialDataset(Dataset): + def __init__(self, dataset, n_items=10): + self.dataset = dataset + self.n_items = n_items + + def __getitem__(self): + return self.dataset.__getitem__() + + def __len__(self): + return min(self.n_items, len(self.dataset)) + + +def get_cifar_loader(root='../data/', batch_size=128, train=True, shuffle=True, num_workers=4, n_items=-1): + normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], + std=[0.5, 0.5, 0.5]) + + data_transforms = transforms.Compose( + [transforms.ToTensor(), + normalize]) + + dataset = datasets.CIFAR10(root=root, train=train, download=True, transform=data_transforms) + if n_items > 0: + dataset = PartialDataset(dataset, n_items) + + loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) + + return loader + +if __name__ == '__main__': + train_loader = get_cifar_loader() + for X, y in train_loader: + print(X[0]) + print(y[0]) + print(X[0].shape) + img = np.transpose(X[0], [1,2,0]) + plt.imshow(img*0.5 + 0.5) + plt.savefig('sample.png') + print(X[0].max()) + print(X[0].min()) + break \ No newline at end of file diff --git a/VGG_BatchNorm/loss.txt b/VGG_BatchNorm/loss.txt new file mode 100644 index 0000000000000000000000000000000000000000..477086e5390e314290795c4834abe23d89ab7f50 --- /dev/null +++ b/VGG_BatchNorm/loss.txt @@ -0,0 +1,5 @@ +1.7658132662248733 +1.2146569554458189 +0.9086841719839579 +0.7367523306471002 +0.6139206301678172 diff --git a/VGG_BatchNorm/models/__init__.py b/VGG_BatchNorm/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f7ec16d2b5f4915b462ffc99d45a2e68a2c53121 --- /dev/null +++ b/VGG_BatchNorm/models/__init__.py @@ -0,0 +1,5 @@ +''' +Models implementation and training & evaluating functions +''' + +from . import vgg \ No newline at end of file diff --git a/VGG_BatchNorm/models/__pycache__/__init__.cpython-310.pyc b/VGG_BatchNorm/models/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3e6e90bbc1dc6664098cf517265b27d3be64ea2b Binary files /dev/null and b/VGG_BatchNorm/models/__pycache__/__init__.cpython-310.pyc differ diff --git a/VGG_BatchNorm/models/__pycache__/__init__.cpython-37.pyc b/VGG_BatchNorm/models/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7b8e4e614ddfe0d470b0a4c3f1886d18afa8d938 Binary files /dev/null and b/VGG_BatchNorm/models/__pycache__/__init__.cpython-37.pyc differ diff --git a/VGG_BatchNorm/models/__pycache__/vgg.cpython-310.pyc b/VGG_BatchNorm/models/__pycache__/vgg.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..931eba538ebfa704967a42af0b3067e9fabee883 Binary files /dev/null and b/VGG_BatchNorm/models/__pycache__/vgg.cpython-310.pyc differ diff --git a/VGG_BatchNorm/models/__pycache__/vgg.cpython-37.pyc b/VGG_BatchNorm/models/__pycache__/vgg.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eb29fc856f25c524c1d6e80fc1246341f0f88b86 Binary files /dev/null and b/VGG_BatchNorm/models/__pycache__/vgg.cpython-37.pyc differ diff --git a/VGG_BatchNorm/models/vgg.py b/VGG_BatchNorm/models/vgg.py new file mode 100644 index 0000000000000000000000000000000000000000..bc3fd85d95d4832b78e8449c3b684f6f10c729ee --- /dev/null +++ b/VGG_BatchNorm/models/vgg.py @@ -0,0 +1,243 @@ +""" +VGG +""" +import numpy as np +from torch import nn + +from utils.nn import init_weights_ + +# ## Models implementation +def get_number_of_parameters(model): + parameters_n = 0 + for parameter in model.parameters(): + parameters_n += np.prod(parameter.shape).item() + + return parameters_n + + +class VGG_A(nn.Module): + """VGG_A model + + size of Linear layers is smaller since input assumed to be 32x32x3, instead of + 224x224x3 + """ + + def __init__(self, inp_ch=3, num_classes=10, init_weights=True): + super().__init__() + + self.features = nn.Sequential( + # stage 1 + nn.Conv2d(in_channels=inp_ch, out_channels=64, kernel_size=3, padding=1), + nn.ReLU(True), + nn.MaxPool2d(kernel_size=2, stride=2), + + # stage 2 + nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1), + nn.ReLU(True), + nn.MaxPool2d(kernel_size=2, stride=2), + + # stage 3 + nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1), + nn.ReLU(True), + nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1), + nn.ReLU(True), + nn.MaxPool2d(kernel_size=2, stride=2), + + # stage 4 + nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1), + nn.ReLU(True), + nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1), + nn.ReLU(True), + nn.MaxPool2d(kernel_size=2, stride=2), + + # stage5 + nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1), + nn.ReLU(True), + nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1), + nn.ReLU(True), + nn.MaxPool2d(kernel_size=2, stride=2)) + + self.classifier = nn.Sequential( + nn.Linear(512 * 1 * 1, 512), + nn.ReLU(), + nn.Linear(512, 512), + nn.ReLU(), + nn.Linear(512, num_classes)) + + if init_weights: + self._init_weights() + + def forward(self, x): + x = self.features(x) + x = self.classifier(x.view(-1, 512 * 1 * 1)) + return x + + def _init_weights(self): + for m in self.modules(): + init_weights_(m) + + +class VGG_A_Light(nn.Module): + def __init__(self, inp_ch=3, num_classes=10): + super().__init__() + + self.stage1 = nn.Sequential( + nn.Conv2d(in_channels=inp_ch, out_channels=16, kernel_size=3, padding=1), + nn.ReLU(), + nn.MaxPool2d(kernel_size=2, stride=2)) + + self.stage2 = nn.Sequential( + nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1), + nn.ReLU(), + nn.MaxPool2d(kernel_size=2, stride=2)) + ''' + self.stage3 = nn.Sequential( + nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1), + nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1), + nn.ReLU(), + nn.MaxPool2d(kernel_size=2, stride=2)) + + self.stage4 = nn.Sequential( + nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1), + nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1), + nn.ReLU(), + nn.MaxPool2d(kernel_size=2, stride=2)) + + self.stage5 = nn.Sequential( + nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1), + nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1), + nn.ReLU(), + nn.MaxPool2d(kernel_size=2, stride=2)) + ''' + self.classifier = nn.Sequential( + nn.Linear(32 * 8 * 8, 128), + nn.ReLU(), + nn.Linear(128, 128), + nn.ReLU(), + nn.Linear(128, num_classes)) + + def forward(self, x): + x = self.stage1(x) + x = self.stage2(x) + # x = self.stage3(x) + # x = self.stage4(x) + # x = self.stage5(x) + x = self.classifier(x.view(-1, 32 * 8 * 8)) + return x + + +class VGG_A_Dropout(nn.Module): + def __init__(self, inp_ch=3, num_classes=10): + super().__init__() + + self.stage1 = nn.Sequential( + nn.Conv2d(in_channels=inp_ch, out_channels=64, kernel_size=3, padding=1), + nn.ReLU(True), + nn.MaxPool2d(kernel_size=2, stride=2)) + + self.stage2 = nn.Sequential( + nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1), + nn.ReLU(True), + nn.MaxPool2d(kernel_size=2, stride=2)) + + self.stage3 = nn.Sequential( + nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1), + nn.ReLU(True), + nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1), + nn.ReLU(True), + nn.MaxPool2d(kernel_size=2, stride=2)) + + self.stage4 = nn.Sequential( + nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1), + nn.ReLU(True), + nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1), + nn.ReLU(True), + nn.MaxPool2d(kernel_size=2, stride=2)) + + self.stage5 = nn.Sequential( + nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1), + nn.ReLU(True), + nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1), + nn.ReLU(True), + nn.MaxPool2d(kernel_size=2, stride=2)) + + self.classifier = nn.Sequential( + nn.Dropout(), + nn.Linear(512 * 1 * 1, 512), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(512, 512), + nn.ReLU(True), + nn.Linear(512, num_classes)) + + def forward(self, x): + x = self.stage1(x) + x = self.stage2(x) + x = self.stage3(x) + x = self.stage4(x) + x = self.stage5(x) + x = self.classifier(x.view(-1, 512 * 1 * 1)) + return x + + +class VGG_A_BatchNorm(nn.Module): + def __init__(self, inp_ch=3, num_classes=10, init_weights=True): + super().__init__() + self.features = nn.Sequential( + nn.Conv2d(inp_ch, 64, 3, padding=1), + nn.BatchNorm2d(64), + nn.ReLU(True), + nn.MaxPool2d(2, 2), + + nn.Conv2d(64, 128, 3, padding=1), + nn.BatchNorm2d(128), + nn.ReLU(True), + nn.MaxPool2d(2, 2), + + nn.Conv2d(128, 256, 3, padding=1), + nn.BatchNorm2d(256), + nn.ReLU(True), + nn.Conv2d(256, 256, 3, padding=1), + nn.BatchNorm2d(256), + nn.ReLU(True), + nn.MaxPool2d(2, 2), + + nn.Conv2d(256, 512, 3, padding=1), + nn.BatchNorm2d(512), + nn.ReLU(True), + nn.Conv2d(512, 512, 3, padding=1), + nn.BatchNorm2d(512), + nn.ReLU(True), + nn.MaxPool2d(2, 2), + + nn.Conv2d(512, 512, 3, padding=1), + nn.BatchNorm2d(512), + nn.ReLU(True), + nn.Conv2d(512, 512, 3, padding=1), + nn.BatchNorm2d(512), + nn.ReLU(True), + nn.MaxPool2d(2, 2) + ) + self.classifier = nn.Sequential( + nn.Linear(512 * 1 * 1, 512), + nn.ReLU(), + nn.Linear(512, 512), + nn.ReLU(), + nn.Linear(512, num_classes) + ) + if init_weights: + self._init_weights() + def forward(self, x): + x = self.features(x) + x = self.classifier(x.view(-1, 512 * 1 * 1)) + return x + def _init_weights(self): + for m in self.modules(): + init_weights_(m) + + +if __name__ == '__main__': + print(get_number_of_parameters(VGG_A())) + print(get_number_of_parameters(VGG_A_Light())) + print(get_number_of_parameters(VGG_A_Dropout())) + print(get_number_of_parameters(VGG_A_BatchNorm())) \ No newline at end of file diff --git a/VGG_BatchNorm/models/vgg_a_lr0.0001.pth b/VGG_BatchNorm/models/vgg_a_lr0.0001.pth new file mode 100644 index 0000000000000000000000000000000000000000..4cce77cac1114aa5fa6bd16cebcde8d3dc4cea11 --- /dev/null +++ b/VGG_BatchNorm/models/vgg_a_lr0.0001.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:105171da7fd7bf680dd5d4c6d9484019a469849021e775edade244a84b139bfb +size 39011786 diff --git a/VGG_BatchNorm/models/vgg_a_lr0.0001_step_grads.npy b/VGG_BatchNorm/models/vgg_a_lr0.0001_step_grads.npy new file mode 100644 index 0000000000000000000000000000000000000000..0eea150ae98f23fc66a779819d45fb3cbb3a5b44 --- /dev/null +++ b/VGG_BatchNorm/models/vgg_a_lr0.0001_step_grads.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40fd79ad6ea0064b44c91905f42549bac9f3e4439f2ac7b3e4f4b61681b7a81f +size 31408 diff --git a/VGG_BatchNorm/models/vgg_a_lr0.0001_step_losses.npy b/VGG_BatchNorm/models/vgg_a_lr0.0001_step_losses.npy new file mode 100644 index 0000000000000000000000000000000000000000..94e0ce2dd4f26dab4bc0f71c799676ed6bce9ebe --- /dev/null +++ b/VGG_BatchNorm/models/vgg_a_lr0.0001_step_losses.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc50830495cff83662ea2603a3b39b8f882f45341a6aa95ac8cc9a731c315196 +size 31408 diff --git a/VGG_BatchNorm/models/vgg_a_lr0.0005.pth b/VGG_BatchNorm/models/vgg_a_lr0.0005.pth new file mode 100644 index 0000000000000000000000000000000000000000..f3b7fdbaa22a4db6694633d1f31018ff113ee3e4 --- /dev/null +++ b/VGG_BatchNorm/models/vgg_a_lr0.0005.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3db4e136d92dc94a1f8601d7700e922e466a049e91891840f0374d6d27c98db +size 39011786 diff --git a/VGG_BatchNorm/models/vgg_a_lr0.0005_step_grads.npy b/VGG_BatchNorm/models/vgg_a_lr0.0005_step_grads.npy new file mode 100644 index 0000000000000000000000000000000000000000..d5d0486a7ef302a6a3220e986215af84b62beda2 --- /dev/null +++ b/VGG_BatchNorm/models/vgg_a_lr0.0005_step_grads.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5703f3db77174dc12448202f46dac32866fa7ce47383c961b0c0d85bfc4c89b7 +size 31408 diff --git a/VGG_BatchNorm/models/vgg_a_lr0.0005_step_losses.npy b/VGG_BatchNorm/models/vgg_a_lr0.0005_step_losses.npy new file mode 100644 index 0000000000000000000000000000000000000000..19ea4b1c264b737ac18d49fb4880a0f7acc42f5b --- /dev/null +++ b/VGG_BatchNorm/models/vgg_a_lr0.0005_step_losses.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3838ee654dddc55efa6b63f65594901803a410f17e90e2d2402ba51127c94db +size 31408 diff --git a/VGG_BatchNorm/models/vgg_a_lr0.001.pth b/VGG_BatchNorm/models/vgg_a_lr0.001.pth new file mode 100644 index 0000000000000000000000000000000000000000..0b628e9684a031e2570839ad834317b228c0b2af --- /dev/null +++ b/VGG_BatchNorm/models/vgg_a_lr0.001.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7bab30b087815d4470cb665e2aeadcb2c2c786f5f21746bcf07e93cef1e3669 +size 39011760 diff --git a/VGG_BatchNorm/models/vgg_a_lr0.001_step_grads.npy b/VGG_BatchNorm/models/vgg_a_lr0.001_step_grads.npy new file mode 100644 index 0000000000000000000000000000000000000000..f6d8ecf087353ffc97ab87af0e34f9a077494079 --- /dev/null +++ b/VGG_BatchNorm/models/vgg_a_lr0.001_step_grads.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68a558422f2f2eb6233e061ed0e24fea94a7525074255f8f91dd5d6ed6b2dd1c +size 31408 diff --git a/VGG_BatchNorm/models/vgg_a_lr0.001_step_losses.npy b/VGG_BatchNorm/models/vgg_a_lr0.001_step_losses.npy new file mode 100644 index 0000000000000000000000000000000000000000..f2c45d9c98f5434404d2447d422f407a64cd2fdd --- /dev/null +++ b/VGG_BatchNorm/models/vgg_a_lr0.001_step_losses.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7f74eebcff55116c216fbaca90a56280090d6a01ab7223cad91d8cb42c97747 +size 31408 diff --git a/VGG_BatchNorm/models/vgg_a_lr0.002.pth b/VGG_BatchNorm/models/vgg_a_lr0.002.pth new file mode 100644 index 0000000000000000000000000000000000000000..4fb0a3752bc1907b70bcd2be7b83c35f949d5697 --- /dev/null +++ b/VGG_BatchNorm/models/vgg_a_lr0.002.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e8453a024e60b3bb8e680b0aeed1d17bcf1545b2de4ca030f57513e0c5f7be +size 39011760 diff --git a/VGG_BatchNorm/models/vgg_a_lr0.002_step_grads.npy b/VGG_BatchNorm/models/vgg_a_lr0.002_step_grads.npy new file mode 100644 index 0000000000000000000000000000000000000000..d2295f0d5bed212c83cadd726bd67d8745283181 --- /dev/null +++ b/VGG_BatchNorm/models/vgg_a_lr0.002_step_grads.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a22fc25871c1a180f423cc498b60f1083d07c2e45d605939889b5c90a35afcfa +size 31408 diff --git a/VGG_BatchNorm/models/vgg_a_lr0.002_step_losses.npy b/VGG_BatchNorm/models/vgg_a_lr0.002_step_losses.npy new file mode 100644 index 0000000000000000000000000000000000000000..8635ed4da9ee3b74dad76513bfe9be638e0e199e --- /dev/null +++ b/VGG_BatchNorm/models/vgg_a_lr0.002_step_losses.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7868e3df3d4ae5b25889df52f98ef5e4bb98dcd741f62667b4671b3e16ae981 +size 31408 diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.0001.pth b/VGG_BatchNorm/models/vgg_bn_lr0.0001.pth new file mode 100644 index 0000000000000000000000000000000000000000..4ed387e41b6a2805868919b6f4e4f6a8f86a1cb8 --- /dev/null +++ b/VGG_BatchNorm/models/vgg_bn_lr0.0001.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9451b3372e6897dd96be45a1902a9629e8f313e9019162d8f9ce5251ffc542a3 +size 39068716 diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.0001_step_grads.npy b/VGG_BatchNorm/models/vgg_bn_lr0.0001_step_grads.npy new file mode 100644 index 0000000000000000000000000000000000000000..487e006e5e8f8fdf80b8c85200e74222604e201b --- /dev/null +++ b/VGG_BatchNorm/models/vgg_bn_lr0.0001_step_grads.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8880316d60e582db12979390d79b6f5df7a1b0c3b31e90ff164de5f0930d0aef +size 31408 diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.0001_step_losses.npy b/VGG_BatchNorm/models/vgg_bn_lr0.0001_step_losses.npy new file mode 100644 index 0000000000000000000000000000000000000000..39125903e15767e1db180d7a258a6641dfae4234 --- /dev/null +++ b/VGG_BatchNorm/models/vgg_bn_lr0.0001_step_losses.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a2ee1fe92633edf653d27d5139c882ffdd331319832aedf391a9d8f9a2c42e8 +size 31408 diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.0005.pth b/VGG_BatchNorm/models/vgg_bn_lr0.0005.pth new file mode 100644 index 0000000000000000000000000000000000000000..3c42ba42723734274400869801f86ce4546a1144 --- /dev/null +++ b/VGG_BatchNorm/models/vgg_bn_lr0.0005.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17b108a6b387ba7f8dbaf5f8192cebd96e753d3b2eec071fdb9047ec18ac56af +size 39068716 diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.0005_step_grads.npy b/VGG_BatchNorm/models/vgg_bn_lr0.0005_step_grads.npy new file mode 100644 index 0000000000000000000000000000000000000000..022806d24048f2e3a75eb1e3ca2f5a52b97ec888 --- /dev/null +++ b/VGG_BatchNorm/models/vgg_bn_lr0.0005_step_grads.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75477b2424055e4773105ed1c3e30b3f865263d93620cdb0ab8ea8cc7ab90e5b +size 31408 diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.0005_step_losses.npy b/VGG_BatchNorm/models/vgg_bn_lr0.0005_step_losses.npy new file mode 100644 index 0000000000000000000000000000000000000000..11b3269759d851bec462dc0a94c22597de6b583a --- /dev/null +++ b/VGG_BatchNorm/models/vgg_bn_lr0.0005_step_losses.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3261b9564be6ae816ec048ac8189aec7e16eb0b9ed6a31a4851356a19f09ce04 +size 31408 diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.001.pth b/VGG_BatchNorm/models/vgg_bn_lr0.001.pth new file mode 100644 index 0000000000000000000000000000000000000000..51bad31a372d317f8a344a46c116ca5aed094c31 --- /dev/null +++ b/VGG_BatchNorm/models/vgg_bn_lr0.001.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9765324e222a9d6ff7b51ae6b5f3f97a1e1cc99197654f906f8e7611b9b8d193 +size 39068650 diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.001_step_grads.npy b/VGG_BatchNorm/models/vgg_bn_lr0.001_step_grads.npy new file mode 100644 index 0000000000000000000000000000000000000000..396d62a8670f1c611884aa321c3514bc906e2197 --- /dev/null +++ b/VGG_BatchNorm/models/vgg_bn_lr0.001_step_grads.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a2224404b4ba0e30fb143ddbaee1976528671afe3b2f25a14499dc7dd63a87d +size 31408 diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.001_step_losses.npy b/VGG_BatchNorm/models/vgg_bn_lr0.001_step_losses.npy new file mode 100644 index 0000000000000000000000000000000000000000..05c5d8746c0d03d83998dfae8ccce157e62aee15 --- /dev/null +++ b/VGG_BatchNorm/models/vgg_bn_lr0.001_step_losses.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50030e0c9e31a85a981d7b60d0094b758fa8efb37f89ca309ec7025cfa71787e +size 31408 diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.002.pth b/VGG_BatchNorm/models/vgg_bn_lr0.002.pth new file mode 100644 index 0000000000000000000000000000000000000000..6db683741e511477d11759e9bc03b71a66b242f0 --- /dev/null +++ b/VGG_BatchNorm/models/vgg_bn_lr0.002.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1488c03104e9de23b07a9deb22ef6afb1ba3b7f6aaa5dad9ba5a46a4c51082c4 +size 39068650 diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.002_step_grads.npy b/VGG_BatchNorm/models/vgg_bn_lr0.002_step_grads.npy new file mode 100644 index 0000000000000000000000000000000000000000..57092ccccbeac1b9164e5dd1f36ce2e393333524 --- /dev/null +++ b/VGG_BatchNorm/models/vgg_bn_lr0.002_step_grads.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebf1d2594a3cd9783d29c71a69d124d00928c24ba50d143dfb58c72cd06f601b +size 31408 diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.002_step_losses.npy b/VGG_BatchNorm/models/vgg_bn_lr0.002_step_losses.npy new file mode 100644 index 0000000000000000000000000000000000000000..11527236b3579f6d93ce0840ef0cb1fbac35e1be --- /dev/null +++ b/VGG_BatchNorm/models/vgg_bn_lr0.002_step_losses.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:215478b68ca6ea0590bc76f7353771a923a1f10422c5b9dfa022526d1ad0d41b +size 31408 diff --git a/VGG_BatchNorm/utils/__init__.py b/VGG_BatchNorm/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1d9731c28b513a4297ce41cea337733a6258eb14 --- /dev/null +++ b/VGG_BatchNorm/utils/__init__.py @@ -0,0 +1,4 @@ +''' +Several utils, in particular for experiments +''' +from . import nn \ No newline at end of file diff --git a/VGG_BatchNorm/utils/__pycache__/__init__.cpython-310.pyc b/VGG_BatchNorm/utils/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ea57a0f5f86460d314f7e8be193acf9788313c9f Binary files /dev/null and b/VGG_BatchNorm/utils/__pycache__/__init__.cpython-310.pyc differ diff --git a/VGG_BatchNorm/utils/__pycache__/__init__.cpython-37.pyc b/VGG_BatchNorm/utils/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e25fdad670f50921969fc674905e3687f4b60bf6 Binary files /dev/null and b/VGG_BatchNorm/utils/__pycache__/__init__.cpython-37.pyc differ diff --git a/VGG_BatchNorm/utils/__pycache__/nn.cpython-310.pyc b/VGG_BatchNorm/utils/__pycache__/nn.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..574e81e80af31a672a0437311a3902fc1a0d9664 Binary files /dev/null and b/VGG_BatchNorm/utils/__pycache__/nn.cpython-310.pyc differ diff --git a/VGG_BatchNorm/utils/__pycache__/nn.cpython-37.pyc b/VGG_BatchNorm/utils/__pycache__/nn.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..05b0e61e89f08d724f5cee36e43d6482a7bb30f3 Binary files /dev/null and b/VGG_BatchNorm/utils/__pycache__/nn.cpython-37.pyc differ diff --git a/VGG_BatchNorm/utils/nn.py b/VGG_BatchNorm/utils/nn.py new file mode 100644 index 0000000000000000000000000000000000000000..3945cb8e0b96a005aa593a0ba8652a8e1b81ec58 --- /dev/null +++ b/VGG_BatchNorm/utils/nn.py @@ -0,0 +1,31 @@ +""" +Utils for neural networks +""" + +from torch import nn + + +def init_weights_(m): + """ + Initializes weights of m according to Xavier normal method. + + :param m: module + :return: + """ + if isinstance(m, nn.Conv2d): + nn.init.xavier_normal_(m.weight) + if m.bias is not None: + nn.init.zeros_(m.bias) + + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + + elif isinstance(m, nn.BatchNorm1d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + + elif isinstance(m, nn.Linear): + nn.init.xavier_normal_(m.weight) + if m.bias is not None: + nn.init.zeros_(m.bias) diff --git a/VGG_BatchNorm/vgg_bn_compare.png b/VGG_BatchNorm/vgg_bn_compare.png new file mode 100644 index 0000000000000000000000000000000000000000..b1fb21faf3a58aa4cabc0b0e929535e3f416048f Binary files /dev/null and b/VGG_BatchNorm/vgg_bn_compare.png differ diff --git a/VGG_BatchNorm/vgg_bn_loss_landscape.png b/VGG_BatchNorm/vgg_bn_loss_landscape.png new file mode 100644 index 0000000000000000000000000000000000000000..064534761285b05f22bfb9432f870cb408bad697 Binary files /dev/null and b/VGG_BatchNorm/vgg_bn_loss_landscape.png differ diff --git a/VGG_BatchNorm/vgg_grad_norm_curve_lr0.0001.png b/VGG_BatchNorm/vgg_grad_norm_curve_lr0.0001.png new file mode 100644 index 0000000000000000000000000000000000000000..a8b36ecd26a3105ed63f46fa91b0b3a1e5ab95cd Binary files /dev/null and b/VGG_BatchNorm/vgg_grad_norm_curve_lr0.0001.png differ diff --git a/VGG_BatchNorm/vgg_grad_norm_curve_lr0.0005.png b/VGG_BatchNorm/vgg_grad_norm_curve_lr0.0005.png new file mode 100644 index 0000000000000000000000000000000000000000..71032927e92f065d98399e65a8af937a89ff58fd Binary files /dev/null and b/VGG_BatchNorm/vgg_grad_norm_curve_lr0.0005.png differ diff --git a/VGG_BatchNorm/vgg_grad_norm_curve_lr0.001.png b/VGG_BatchNorm/vgg_grad_norm_curve_lr0.001.png new file mode 100644 index 0000000000000000000000000000000000000000..4dc03fb9186699e3e50ed0bfff758000bcd7368c Binary files /dev/null and b/VGG_BatchNorm/vgg_grad_norm_curve_lr0.001.png differ diff --git a/VGG_BatchNorm/vgg_grad_norm_curve_lr0.002.png b/VGG_BatchNorm/vgg_grad_norm_curve_lr0.002.png new file mode 100644 index 0000000000000000000000000000000000000000..84ab1e2fed855b42dbebafff3bb2d67f05f10b69 Binary files /dev/null and b/VGG_BatchNorm/vgg_grad_norm_curve_lr0.002.png differ diff --git a/VGG_BatchNorm/vgg_loss_curve_lr0.0001.png b/VGG_BatchNorm/vgg_loss_curve_lr0.0001.png new file mode 100644 index 0000000000000000000000000000000000000000..b75aead56d2ecfd80e1290d7d64e3da5bacbeafb Binary files /dev/null and b/VGG_BatchNorm/vgg_loss_curve_lr0.0001.png differ diff --git a/VGG_BatchNorm/vgg_loss_curve_lr0.0005.png b/VGG_BatchNorm/vgg_loss_curve_lr0.0005.png new file mode 100644 index 0000000000000000000000000000000000000000..47c45e3ec8cd63ca056184917ae24fa752238178 Binary files /dev/null and b/VGG_BatchNorm/vgg_loss_curve_lr0.0005.png differ diff --git a/VGG_BatchNorm/vgg_loss_curve_lr0.001.png b/VGG_BatchNorm/vgg_loss_curve_lr0.001.png new file mode 100644 index 0000000000000000000000000000000000000000..645a2a2825b50fef42dcaafabae83bbedeaddee7 Binary files /dev/null and b/VGG_BatchNorm/vgg_loss_curve_lr0.001.png differ diff --git a/VGG_BatchNorm/vgg_loss_curve_lr0.002.png b/VGG_BatchNorm/vgg_loss_curve_lr0.002.png new file mode 100644 index 0000000000000000000000000000000000000000..e26101d1c6ce67b66f93745c5b29451859e7507a Binary files /dev/null and b/VGG_BatchNorm/vgg_loss_curve_lr0.002.png differ diff --git a/VGG_BatchNorm/vgg_max_grad_diff_lr0.0001.png b/VGG_BatchNorm/vgg_max_grad_diff_lr0.0001.png new file mode 100644 index 0000000000000000000000000000000000000000..31ae8e2b3efc95342e9646fff42740159dc3d7e1 Binary files /dev/null and b/VGG_BatchNorm/vgg_max_grad_diff_lr0.0001.png differ diff --git a/VGG_BatchNorm/vgg_max_grad_diff_lr0.0005.png b/VGG_BatchNorm/vgg_max_grad_diff_lr0.0005.png new file mode 100644 index 0000000000000000000000000000000000000000..88ee29de2cd349fe9d777ad303e6e29c8f92e2ce Binary files /dev/null and b/VGG_BatchNorm/vgg_max_grad_diff_lr0.0005.png differ diff --git a/VGG_BatchNorm/vgg_max_grad_diff_lr0.001.png b/VGG_BatchNorm/vgg_max_grad_diff_lr0.001.png new file mode 100644 index 0000000000000000000000000000000000000000..08805778a252ce3ba08b63cdcef3443d58954df0 Binary files /dev/null and b/VGG_BatchNorm/vgg_max_grad_diff_lr0.001.png differ diff --git a/VGG_BatchNorm/vgg_max_grad_diff_lr0.002.png b/VGG_BatchNorm/vgg_max_grad_diff_lr0.002.png new file mode 100644 index 0000000000000000000000000000000000000000..e3b597355852798857601f976a7f04296c0ee8ba Binary files /dev/null and b/VGG_BatchNorm/vgg_max_grad_diff_lr0.002.png differ diff --git a/__pycache__/mymodel.cpython-310.pyc b/__pycache__/mymodel.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..615db8a57ddbbb3ba4653b06ecb14bbecd34bd01 Binary files /dev/null and b/__pycache__/mymodel.cpython-310.pyc differ diff --git a/best_model.pth b/best_model.pth new file mode 100644 index 0000000000000000000000000000000000000000..79358c4a96a2fa41e4233e95f5795ac5ea90f3ab --- /dev/null +++ b/best_model.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6382bea60097cd0a85ec970df6cea916256ac88a59298b9442b0132955f2ae4f +size 2187810 diff --git a/data/cifar-10-batches-py/batches.meta b/data/cifar-10-batches-py/batches.meta new file mode 100644 index 0000000000000000000000000000000000000000..4467a6ec2e886a9f14f25e31776fb0152d8ac64a Binary files /dev/null and b/data/cifar-10-batches-py/batches.meta differ diff --git a/data/cifar-10-batches-py/data_batch_1 b/data/cifar-10-batches-py/data_batch_1 new file mode 100644 index 0000000000000000000000000000000000000000..1b9ff789bbf08b02df98fea255e1343119eaa8d6 --- /dev/null +++ b/data/cifar-10-batches-py/data_batch_1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54636561a3ce25bd3e19253c6b0d8538147b0ae398331ac4a2d86c6d987368cd +size 31035704 diff --git a/data/cifar-10-batches-py/data_batch_2 b/data/cifar-10-batches-py/data_batch_2 new file mode 100644 index 0000000000000000000000000000000000000000..da8acc0d33edbd9889f8a11226e2be1f53bdf1f5 --- /dev/null +++ b/data/cifar-10-batches-py/data_batch_2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:766b2cef9fbc745cf056b3152224f7cf77163b330ea9a15f9392beb8b89bc5a8 +size 31035320 diff --git a/data/cifar-10-batches-py/data_batch_3 b/data/cifar-10-batches-py/data_batch_3 new file mode 100644 index 0000000000000000000000000000000000000000..e98eb3e45d5a9778ad227d2703c7d4b1290a5d64 --- /dev/null +++ b/data/cifar-10-batches-py/data_batch_3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f00d98ebfb30b3ec0ad19f9756dc2630b89003e10525f5e148445e82aa6a1f9 +size 31035999 diff --git a/data/cifar-10-batches-py/data_batch_4 b/data/cifar-10-batches-py/data_batch_4 new file mode 100644 index 0000000000000000000000000000000000000000..9b81f87873afbf46bdda4fa1ee82434857a58ecc --- /dev/null +++ b/data/cifar-10-batches-py/data_batch_4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f7bb240661948b8f4d53e36ec720d8306f5668bd0071dcb4e6c947f78e9682b +size 31035696 diff --git a/data/cifar-10-batches-py/data_batch_5 b/data/cifar-10-batches-py/data_batch_5 new file mode 100644 index 0000000000000000000000000000000000000000..0428cfda4f34db9278991559bcbc322d4f79e6ac --- /dev/null +++ b/data/cifar-10-batches-py/data_batch_5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d91802434d8376bbaeeadf58a737e3a1b12ac839077e931237e0dcd43adcb154 +size 31035623 diff --git a/data/cifar-10-batches-py/readme.html b/data/cifar-10-batches-py/readme.html new file mode 100644 index 0000000000000000000000000000000000000000..e377adef45c85dc91051edf2dee72c1d4d57732c --- /dev/null +++ b/data/cifar-10-batches-py/readme.html @@ -0,0 +1 @@ + diff --git a/data/cifar-10-batches-py/test_batch b/data/cifar-10-batches-py/test_batch new file mode 100644 index 0000000000000000000000000000000000000000..7cb1691b21c2eaf98ca33dc302ab6df2c2984121 --- /dev/null +++ b/data/cifar-10-batches-py/test_batch @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f53d8d457504f7cff4ea9e021afcf0e0ad8e24a91f3fc42091b8adef61157831 +size 31035526 diff --git a/data/cifar-10-python.tar.gz b/data/cifar-10-python.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3026cc501ad5b158f4de147d06c95ebbc112ea48 --- /dev/null +++ b/data/cifar-10-python.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce +size 170498071 diff --git a/fig/all_filters_conv1.png b/fig/all_filters_conv1.png new file mode 100644 index 0000000000000000000000000000000000000000..5b20480bdd8338ea69b84a49113d47ce468961b9 Binary files /dev/null and b/fig/all_filters_conv1.png differ diff --git a/fig/all_filters_conv2.png b/fig/all_filters_conv2.png new file mode 100644 index 0000000000000000000000000000000000000000..fc066550dff84eb811ad13afa9890fe439afef16 Binary files /dev/null and b/fig/all_filters_conv2.png differ diff --git a/fig/feature_maps.png b/fig/feature_maps.png new file mode 100644 index 0000000000000000000000000000000000000000..e7653e7093bde956985c378b0719baa652b3f9d1 Binary files /dev/null and b/fig/feature_maps.png differ diff --git a/fig/feature_maps_conv1.png b/fig/feature_maps_conv1.png new file mode 100644 index 0000000000000000000000000000000000000000..e9bacab659d428479f885969bbe447bbc802c6b4 --- /dev/null +++ b/fig/feature_maps_conv1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa306749b1897efefd38d982852c9f562e84fe6d1da7db764e7869679cb2f333 +size 101683 diff --git a/fig/feature_maps_conv2.png b/fig/feature_maps_conv2.png new file mode 100644 index 0000000000000000000000000000000000000000..4ab54a6209d889a1368a34aa4b745ecf2fa00ba6 Binary files /dev/null and b/fig/feature_maps_conv2.png differ diff --git a/fig/loss_curve.png b/fig/loss_curve.png new file mode 100644 index 0000000000000000000000000000000000000000..bb3778e02353d1892b27ebceb7cd91b1bf1c59c6 Binary files /dev/null and b/fig/loss_curve.png differ diff --git a/fig/loss_landscape.png b/fig/loss_landscape.png new file mode 100644 index 0000000000000000000000000000000000000000..b6e495727a28245152617a99868e4aa85d05c042 Binary files /dev/null and b/fig/loss_landscape.png differ diff --git a/fig/my_filters.png b/fig/my_filters.png new file mode 100644 index 0000000000000000000000000000000000000000..83392e1e9847f35c5e67640066b94e17555371fa Binary files /dev/null and b/fig/my_filters.png differ diff --git a/init.py b/init.py new file mode 100644 index 0000000000000000000000000000000000000000..7ae957a7f11b9391d87a2e7d7ddbf311d2f66b3e --- /dev/null +++ b/init.py @@ -0,0 +1 @@ +# This file makes the directory a Python package. diff --git a/model/best_model_1.pth b/model/best_model_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..993d41e0af5a602c2e216caca8cf9b1f752ca8d6 --- /dev/null +++ b/model/best_model_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ab2eecf0175932fa74fd2806269b07f355bd39b9b5b30248701220840a20fb2 +size 2188002 diff --git a/model/best_model_20250523-152515.pth b/model/best_model_20250523-152515.pth new file mode 100644 index 0000000000000000000000000000000000000000..13b37a1a8d026a9f08d0666fa7d12d593c15afaa --- /dev/null +++ b/model/best_model_20250523-152515.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff90946d79f3a15174eb9c0d54a709dab5ee8a9fa36c83c15c1deeb96963ea21 +size 2183972 diff --git a/model/best_model_20250523-153037.pth b/model/best_model_20250523-153037.pth new file mode 100644 index 0000000000000000000000000000000000000000..9ca266031d111389f409c41802b0e5becaea761b --- /dev/null +++ b/model/best_model_20250523-153037.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1bdfe0cc12793c7f534e58982a100367015620d7a01342450dfe8171797787f +size 2188418 diff --git a/model/best_model_20250523-153308.pth b/model/best_model_20250523-153308.pth new file mode 100644 index 0000000000000000000000000000000000000000..05aeb2b32e70aa638f4e436a34b0eadb1bca2bf7 --- /dev/null +++ b/model/best_model_20250523-153308.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dd89c66ec30ec81a73328f5145baa8698678faafc8f69cf784981e185f830d9 +size 2183972 diff --git a/model/best_model_20250523-153543.pth b/model/best_model_20250523-153543.pth new file mode 100644 index 0000000000000000000000000000000000000000..29065fdbe80b7cd07d8cc8a0814140a8e4d19f29 --- /dev/null +++ b/model/best_model_20250523-153543.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cd33c68761affe3d97e99ac8ce63a65a5a8808767bef7f6e431f3518d47c1e1 +size 2188418 diff --git a/model/best_model_20250523-153830.pth b/model/best_model_20250523-153830.pth new file mode 100644 index 0000000000000000000000000000000000000000..0dfdde827e0b30154feabef03fcb743df81e5767 --- /dev/null +++ b/model/best_model_20250523-153830.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9f8bccbb1f3dd266740be283c53b88fef4f376f93fa273c5f68965095302026 +size 2188418 diff --git a/model/best_model_20250523-154144.pth b/model/best_model_20250523-154144.pth new file mode 100644 index 0000000000000000000000000000000000000000..a231977646d83b989c67ac4c6d022afa5d47f1c3 --- /dev/null +++ b/model/best_model_20250523-154144.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2327ea8eeff1a51e1171ac893dceeef751c2144e59389cd344e6cddc4e1097f5 +size 2188418 diff --git a/model/best_model_20250524-055438.pth b/model/best_model_20250524-055438.pth new file mode 100644 index 0000000000000000000000000000000000000000..337bd95abd57ab684758dbee65019ce3ad679ed6 --- /dev/null +++ b/model/best_model_20250524-055438.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f854a5352e4466a4821627e8e36e01d71f199e2519add0b4d27ed133c8bd6c0 +size 2188418 diff --git a/model/best_model_20250524-060548.pth b/model/best_model_20250524-060548.pth new file mode 100644 index 0000000000000000000000000000000000000000..a9815d964cad16dc38b330c51d4fc54f8ca00824 --- /dev/null +++ b/model/best_model_20250524-060548.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36dad5eeb4b8040386f5d11db8751a4213b3d00744f8624fa9b4ab8fa4a8ce19 +size 4907522 diff --git a/model/best_model_20250524-061343.pth b/model/best_model_20250524-061343.pth new file mode 100644 index 0000000000000000000000000000000000000000..cce4f154a3deaea546fbdef58d2d32d7887000e6 --- /dev/null +++ b/model/best_model_20250524-061343.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68ef7e771b0909b839561713e64790ddde49c7c6559b4e97bca150368d59b7b3 +size 2325634 diff --git a/model/best_model_20250524-062209.pth b/model/best_model_20250524-062209.pth new file mode 100644 index 0000000000000000000000000000000000000000..1695c5b94604d3f6359dd7b6ccd4c7147e4e5351 --- /dev/null +++ b/model/best_model_20250524-062209.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e96f5650e0c676b24336dd999c5afd47ad066d62849042358a9244757179a3b8 +size 2490274 diff --git a/model/best_model_20250524-062621.pth b/model/best_model_20250524-062621.pth new file mode 100644 index 0000000000000000000000000000000000000000..506c0162c48c9093d9cd97e73b470f5f85087873 --- /dev/null +++ b/model/best_model_20250524-062621.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41bce6d604ecf674e466fea87aba7b3f5b9b20048b5793270bc0f428130ffb5c +size 12534946 diff --git a/model/best_model_20250524-065041.pth b/model/best_model_20250524-065041.pth new file mode 100644 index 0000000000000000000000000000000000000000..ea406d8639fc804276255645ee5432186b1e7c28 --- /dev/null +++ b/model/best_model_20250524-065041.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bed43b184195b492c5180047cf7fc88c5da3bbb6dcbff7b8dbfc767bc8a39ec +size 12534946 diff --git a/model/best_model_20250524-071202.pth b/model/best_model_20250524-071202.pth new file mode 100644 index 0000000000000000000000000000000000000000..9b97f0bec0e49e0553cfed07968827ad9047f9a3 --- /dev/null +++ b/model/best_model_20250524-071202.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34a33d28cd512efa69a841d209f178429cbf9856fd6dba29cb932fc25ed479ee +size 2188418 diff --git a/model/best_model_20250524-071415.pth b/model/best_model_20250524-071415.pth new file mode 100644 index 0000000000000000000000000000000000000000..39170b7092e13cd5d218e0a34d86428e906e0401 --- /dev/null +++ b/model/best_model_20250524-071415.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:691524c379e256802b670a857e3a7ecb8d79d6d809b313c6817c05b9c3dd545b +size 2188418 diff --git a/model/best_model_20250524-073418.pth b/model/best_model_20250524-073418.pth new file mode 100644 index 0000000000000000000000000000000000000000..fdd6811b1c67a5e03a0d70da557761c629d95cf6 --- /dev/null +++ b/model/best_model_20250524-073418.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:047a0db3710fb3c1d28e5ca6b76dbd116e9a337b1142c09cb72437c566853892 +size 2188418 diff --git a/model/best_model_20250524-073654.pth b/model/best_model_20250524-073654.pth new file mode 100644 index 0000000000000000000000000000000000000000..b1c2ac925cf047f0f4c1924ecfd95af6fb715088 --- /dev/null +++ b/model/best_model_20250524-073654.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6639d12aa73241e276e62d7c063a4067b90572bab124dcbce72e19dbf54e632f +size 2188418 diff --git a/model/best_model_20250524-073909.pth b/model/best_model_20250524-073909.pth new file mode 100644 index 0000000000000000000000000000000000000000..25a5d01c6c8cb83c4f8b1f4716601891b8161178 --- /dev/null +++ b/model/best_model_20250524-073909.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31ed0b2801cec50c3856d5b43d5661cb25bc5fc4a3c9f49fa43e27a61b976dab +size 2188418 diff --git a/model/best_model_20250606-074637.pth b/model/best_model_20250606-074637.pth new file mode 100644 index 0000000000000000000000000000000000000000..5c725758e994f6283d2d340586763849c6b3144f --- /dev/null +++ b/model/best_model_20250606-074637.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34eefc6f51475e11aa8201e5875501a6ebb645a44abfeb295811caee9af56b89 +size 2188418 diff --git a/mymodel.py b/mymodel.py new file mode 100644 index 0000000000000000000000000000000000000000..dde6bf784f6ba89f7e91c4487103bf5f372f9172 --- /dev/null +++ b/mymodel.py @@ -0,0 +1,48 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +class MyCIFAR10Net(nn.Module): + def __init__(self, num_classes=10, use_batchnorm=True, use_dropout=False, activation='relu'): + super(MyCIFAR10Net, self).__init__() + # Example: 2 conv layers, pooling, batchnorm, dropout, fully connected + self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1) + self.bn1 = nn.BatchNorm2d(32) if use_batchnorm else nn.Identity() + self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1) + self.bn2 = nn.BatchNorm2d(64) if use_batchnorm else nn.Identity() + self.pool = nn.MaxPool2d(2, 2) + self.dropout = nn.Dropout(0.25) if use_dropout else nn.Identity() + self.fc1 = nn.Linear(64 * 8 * 8, 128) + self.fc2 = nn.Linear(128, num_classes) + self.activation = activation + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self._activate(x) + x = self.pool(x) + x = self.conv2(x) + x = self.bn2(x) + x = self._activate(x) + x = self.pool(x) + x = self.dropout(x) + x = x.view(x.size(0), -1) + x = self.fc1(x) + x = self._activate(x) + x = self.dropout(x) + x = self.fc2(x) + return x + + def _activate(self, x): + if self.activation == 'relu': + return F.relu(x) + elif self.activation == 'leakyrelu': + return F.leaky_relu(x) + elif self.activation == 'tanh': + return torch.tanh(x) + elif self.activation == 'sigmoid': + return torch.sigmoid(x) + else: + raise ValueError(f"Unknown activation: {self.activation}") + +# You can add more model variants or residual blocks here as needed. diff --git a/test.py b/test.py new file mode 100644 index 0000000000000000000000000000000000000000..c82a1305634b32d5eda160f0c0758fd6453e6975 --- /dev/null +++ b/test.py @@ -0,0 +1,33 @@ +import torch +import torchvision +import torchvision.transforms as transforms +from torch.utils.data import DataLoader +from mymodel import MyCIFAR10Net + +# Data loading (test set) +transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) +]) +testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform) +testloader = DataLoader(testset, batch_size=64, shuffle=False, num_workers=2) + +# Load model +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +model = MyCIFAR10Net(num_classes=10, use_batchnorm=True, use_dropout=True, activation='relu').to(device) +model.load_state_dict(torch.load('model/best_model_1.pth', map_location=device)) +model.eval() + +# Evaluate +correct = 0 +total = 0 +with torch.no_grad(): + for inputs, labels in testloader: + inputs, labels = inputs.to(device), labels.to(device) + outputs = model(inputs) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels).sum().item() + +print(f'Test Accuracy: {100 * correct / total:.2f}%') +print(f'Test Error: {100 - 100 * correct / total:.2f}%') diff --git a/train.py b/train.py new file mode 100644 index 0000000000000000000000000000000000000000..cd5d8d5a9c8eaa38eb77c7038ef1a9418a7c74b1 --- /dev/null +++ b/train.py @@ -0,0 +1,209 @@ +import torch +import torchvision +import torchvision.transforms as transforms +from torch.utils.data import DataLoader, random_split +from mymodel import MyCIFAR10Net +import torch.optim as optim +import torch.nn as nn +import matplotlib.pyplot as plt +import numpy as np + +# 1. Data loading +transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) +]) +trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform) +train_size = int(0.8 * len(trainset)) +valid_size = len(trainset) - train_size +train_subset, valid_subset = random_split(trainset, [train_size, valid_size]) +trainloader = DataLoader(train_subset, batch_size=128, shuffle=True, num_workers=2) +valloader = DataLoader(valid_subset, batch_size=128, shuffle=False, num_workers=2) +testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform) +testloader = DataLoader(testset, batch_size=128, shuffle=False, num_workers=2) + +# 2. Model, loss, optimizer +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +model = MyCIFAR10Net(num_classes=10, use_batchnorm=True, use_dropout=True, activation='leakyrelu').to(device) +loss_fn = nn.CrossEntropyLoss() # Try different loss functions here +optimizer = optim.Adam(model.parameters(), lr=0.001) # Try different optimizers here +# 切换优化器 +# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4) +# optimizer = optim.RMSprop(model.parameters(), lr=0.001, weight_decay=1e-4) + +epoch_losses = [] + +def train(num_epochs=10): + for epoch in range(num_epochs): + model.train() + running_loss = 0.0 + for i, (inputs, labels) in enumerate(trainloader): + inputs, labels = inputs.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(inputs) + # loss = loss_fn(outputs, labels) + + l2_lambda = 1e-4 # L2正则化强度 + l2_reg = torch.tensor(0., device=device) + for param in model.parameters(): + l2_reg += torch.norm(param, 2) + loss = loss_fn(outputs, labels) + l2_lambda * l2_reg + loss.backward() + optimizer.step() + running_loss += loss.item() + avg_loss = running_loss/len(trainloader) + print(f"Epoch {epoch+1}, Loss: {avg_loss:.4f}") + epoch_losses.append(avg_loss) + validate() + # Save best model if needed + import time + time_stamp = time.strftime("%Y%m%d-%H%M%S") + path=f'model/best_model_{time_stamp}.pth' + torch.save(model.state_dict(), path) + # 保存loss曲线 + plt.figure() + plt.plot(range(1, len(epoch_losses)+1), epoch_losses, marker='o') + plt.xlabel('Epoch') + plt.ylabel('Loss') + plt.title('Training Loss per Epoch') + plt.savefig('loss_curve.png') + plt.close() + +def validate(): + model.eval() + correct = 0 + total = 0 + with torch.no_grad(): + for inputs, labels in valloader: + inputs, labels = inputs.to(device), labels.to(device) + outputs = model(inputs) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels).sum().item() + print(f'Validation Accuracy: {100 * correct / total:.2f}%') + +def save_all_conv_filters(model, filename='all_filters.png', layer='conv1'): + if layer == 'conv1': + filters = model.conv1.weight.data.clone().cpu() + elif layer == 'conv2': + filters = model.conv2.weight.data.clone().cpu() + else: + raise ValueError("layer must be 'conv1' or 'conv2'") + num_filters = filters.shape[0] + ncols = 8 + nrows = (num_filters + ncols - 1) // ncols + fig, axes = plt.subplots(nrows, ncols, figsize=(ncols*2, nrows*2)) + for i in range(num_filters): + r, c = divmod(i, ncols) + f = filters[i] + f_min, f_max = f.min(), f.max() + f = (f - f_min) / (f_max - f_min) + if f.shape[0] == 3: # RGB + axes[r, c].imshow(f.permute(1, 2, 0)) + else: # 单通道 + axes[r, c].imshow(f[0], cmap='gray') + axes[r, c].axis('off') + for i in range(num_filters, nrows * ncols): + r, c = divmod(i, ncols) + axes[r, c].axis('off') + plt.tight_layout() + plt.savefig(filename) + plt.close() + +def visualize_all_feature_maps(model, image, filename='feature_maps.png', after='conv1'): + model.eval() + with torch.no_grad(): + x = image.unsqueeze(0).to(next(model.parameters()).device) + x = model.conv1(x) + x = model.bn1(x) + x = model._activate(x) + if after == 'conv2': + x = model.pool(x) + x = model.conv2(x) + x = model.bn2(x) + x = model._activate(x) + feature_maps = x.cpu().squeeze(0) + num_maps = feature_maps.shape[0] + ncols = 8 + nrows = (num_maps + ncols - 1) // ncols + fig, axes = plt.subplots(nrows, ncols, figsize=(ncols*2, nrows*2)) + for i in range(num_maps): + r, c = divmod(i, ncols) + fmap = feature_maps[i] + fmap_min, fmap_max = fmap.min(), fmap.max() + fmap = (fmap - fmap_min) / (fmap_max - fmap_min) + axes[r, c].imshow(fmap, cmap='viridis') + axes[r, c].axis('off') + for i in range(num_maps, nrows * ncols): + r, c = divmod(i, ncols) + axes[r, c].axis('off') + plt.tight_layout() + plt.savefig(filename) + plt.close() + +def plot_loss_landscape(model, dataloader, loss_fn, steps=20, alpha=0.5): + w = model.fc1.weight.data.clone() + direction1 = torch.randn_like(w) + direction2 = torch.randn_like(w) + losses = np.zeros((steps, steps)) + device = next(model.parameters()).device + for i, a in enumerate(np.linspace(-alpha, alpha, steps)): + for j, b in enumerate(np.linspace(-alpha, alpha, steps)): + model.fc1.weight.data = w + a * direction1 + b * direction2 + total_loss = 0 + count = 0 + for inputs, labels in dataloader: + inputs, labels = inputs.to(device), labels.to(device) + outputs = model(inputs) + loss = loss_fn(outputs, labels) + total_loss += loss.item() + count += 1 + if count > 2: # 只用少量batch加速 + break + losses[i, j] = total_loss / count + model.fc1.weight.data = w # 恢复原权重 + plt.figure(figsize=(6,5)) + plt.contourf(losses, levels=50, cmap='viridis') + plt.colorbar() + plt.title('Loss Landscape (fc1 weight)') + plt.xlabel('Direction 1') + plt.ylabel('Direction 2') + plt.savefig('loss_landscape.png') + plt.close() + +if __name__ == "__main__": + train(num_epochs=10) + # model.eval() + # save_conv1_filters(model) + + # 加载已有模型 + # model.load_state_dict(torch.load('best_model.pth', map_location=device)) + # model.eval() + + # # 保存所有卷积核 + # save_all_conv_filters(model, filename='all_filters_conv1.png', layer='conv1') + # save_all_conv_filters(model, filename='all_filters_conv2.png', layer='conv2') + + # # 取一张验证集图片 + # sample_img, _ = next(iter(valloader)) + + # # 可视化feature map + # visualize_all_feature_maps(model, sample_img[0], filename='feature_maps_conv1.png', after='conv1') + # visualize_all_feature_maps(model, sample_img[0], filename='feature_maps_conv2.png', after='conv2') + + # # Loss landscape visualization + # plot_loss_landscape(model, valloader, loss_fn) + + # Evaluate on test set after training + correct = 0 + total = 0 + with torch.no_grad(): + for inputs, labels in testloader: + inputs, labels = inputs.to(device), labels.to(device) + outputs = model(inputs) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels).sum().item() + + print(f'Test Accuracy: {100 * correct / total:.2f}%') + print(f'Test Error: {100 - 100 * correct / total:.2f}%') \ No newline at end of file diff --git a/visualize.py b/visualize.py new file mode 100644 index 0000000000000000000000000000000000000000..734e3351761bd23f97d1259677b2b8b3a8a649ce --- /dev/null +++ b/visualize.py @@ -0,0 +1,27 @@ +import torch +import matplotlib.pyplot as plt +from mymodel import MyCIFAR10Net + +# Example: Visualize first conv layer filters + +def visualize_filters(model_path='best_model.pth', save_path='filters.png'): + model = MyCIFAR10Net(num_classes=10) + model.load_state_dict(torch.load(model_path)) + conv1_weights = model.conv1.weight.data.cpu() + num_filters = conv1_weights.shape[0] + fig, axes = plt.subplots(1, num_filters, figsize=(num_filters*2, 2)) + for i in range(num_filters): + ax = axes[i] + # Normalize to [0,1] for visualization + w = conv1_weights[i] + w = (w - w.min()) / (w.max() - w.min()) + ax.imshow(w.permute(1,2,0)) + ax.axis('off') + plt.tight_layout() + plt.savefig(save_path) + plt.show() + +# You can add more visualization functions (e.g., loss landscape, feature maps, etc.) + +if __name__ == "__main__": + visualize_filters(save_path='my_filters.png')