unfair221 commited on Jun 6, 2025

Commit

0bae2fa

verified ·

1 Parent(s): bc57809

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +7 -0
VGG_BatchNorm/.DS_Store +0 -0
VGG_BatchNorm/VGG_Loss_Landscape.py +256 -0
VGG_BatchNorm/data/__init__.py +5 -0
VGG_BatchNorm/data/__pycache__/__init__.cpython-310.pyc +0 -0
VGG_BatchNorm/data/__pycache__/__init__.cpython-37.pyc +0 -0
VGG_BatchNorm/data/__pycache__/loaders.cpython-310.pyc +0 -0
VGG_BatchNorm/data/__pycache__/loaders.cpython-37.pyc +0 -0
VGG_BatchNorm/data/cifar-10-python.tar.gz +3 -0
VGG_BatchNorm/data/loaders.py +53 -0
VGG_BatchNorm/loss.txt +5 -0
VGG_BatchNorm/models/__init__.py +5 -0
VGG_BatchNorm/models/__pycache__/__init__.cpython-310.pyc +0 -0
VGG_BatchNorm/models/__pycache__/__init__.cpython-37.pyc +0 -0
VGG_BatchNorm/models/__pycache__/vgg.cpython-310.pyc +0 -0
VGG_BatchNorm/models/__pycache__/vgg.cpython-37.pyc +0 -0
VGG_BatchNorm/models/vgg.py +243 -0
VGG_BatchNorm/models/vgg_a_lr0.0001.pth +3 -0
VGG_BatchNorm/models/vgg_a_lr0.0001_step_grads.npy +3 -0
VGG_BatchNorm/models/vgg_a_lr0.0001_step_losses.npy +3 -0
VGG_BatchNorm/models/vgg_a_lr0.0005.pth +3 -0
VGG_BatchNorm/models/vgg_a_lr0.0005_step_grads.npy +3 -0
VGG_BatchNorm/models/vgg_a_lr0.0005_step_losses.npy +3 -0
VGG_BatchNorm/models/vgg_a_lr0.001.pth +3 -0
VGG_BatchNorm/models/vgg_a_lr0.001_step_grads.npy +3 -0
VGG_BatchNorm/models/vgg_a_lr0.001_step_losses.npy +3 -0
VGG_BatchNorm/models/vgg_a_lr0.002.pth +3 -0
VGG_BatchNorm/models/vgg_a_lr0.002_step_grads.npy +3 -0
VGG_BatchNorm/models/vgg_a_lr0.002_step_losses.npy +3 -0
VGG_BatchNorm/models/vgg_bn_lr0.0001.pth +3 -0
VGG_BatchNorm/models/vgg_bn_lr0.0001_step_grads.npy +3 -0
VGG_BatchNorm/models/vgg_bn_lr0.0001_step_losses.npy +3 -0
VGG_BatchNorm/models/vgg_bn_lr0.0005.pth +3 -0
VGG_BatchNorm/models/vgg_bn_lr0.0005_step_grads.npy +3 -0
VGG_BatchNorm/models/vgg_bn_lr0.0005_step_losses.npy +3 -0
VGG_BatchNorm/models/vgg_bn_lr0.001.pth +3 -0
VGG_BatchNorm/models/vgg_bn_lr0.001_step_grads.npy +3 -0
VGG_BatchNorm/models/vgg_bn_lr0.001_step_losses.npy +3 -0
VGG_BatchNorm/models/vgg_bn_lr0.002.pth +3 -0
VGG_BatchNorm/models/vgg_bn_lr0.002_step_grads.npy +3 -0
VGG_BatchNorm/models/vgg_bn_lr0.002_step_losses.npy +3 -0
VGG_BatchNorm/utils/__init__.py +4 -0
VGG_BatchNorm/utils/__pycache__/__init__.cpython-310.pyc +0 -0
VGG_BatchNorm/utils/__pycache__/__init__.cpython-37.pyc +0 -0
VGG_BatchNorm/utils/__pycache__/nn.cpython-310.pyc +0 -0
VGG_BatchNorm/utils/__pycache__/nn.cpython-37.pyc +0 -0
VGG_BatchNorm/utils/nn.py +31 -0
VGG_BatchNorm/vgg_bn_compare.png +0 -0
VGG_BatchNorm/vgg_bn_loss_landscape.png +0 -0
VGG_BatchNorm/vgg_grad_norm_curve_lr0.0001.png +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data/cifar-10-batches-py/data_batch_1 filter=lfs diff=lfs merge=lfs -text
+data/cifar-10-batches-py/data_batch_2 filter=lfs diff=lfs merge=lfs -text
+data/cifar-10-batches-py/data_batch_3 filter=lfs diff=lfs merge=lfs -text
+data/cifar-10-batches-py/data_batch_4 filter=lfs diff=lfs merge=lfs -text
+data/cifar-10-batches-py/data_batch_5 filter=lfs diff=lfs merge=lfs -text
+data/cifar-10-batches-py/test_batch filter=lfs diff=lfs merge=lfs -text
+fig/feature_maps_conv1.png filter=lfs diff=lfs merge=lfs -text

VGG_BatchNorm/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

VGG_BatchNorm/VGG_Loss_Landscape.py ADDED Viewed

	@@ -0,0 +1,256 @@

+import matplotlib as mpl
+mpl.use('Agg')
+import matplotlib.pyplot as plt
+from torch import nn
+import numpy as np
+import torch
+import os
+import random
+from tqdm import tqdm as tqdm
+from IPython import display
+from models.vgg import VGG_A
+from models.vgg import VGG_A_BatchNorm # you need to implement this network
+from data.loaders import get_cifar_loader
+# ## Constants (parameters) initialization
+device_id = [0,1,2,3]
+num_workers = 4
+batch_size = 128
+# add our package dir to path
+module_path = os.path.dirname(os.getcwd())
+home_path = module_path
+figures_path = os.path.join(home_path, 'reports', 'figures')
+models_path = os.path.join(home_path, 'reports', 'models')
+# Make sure you are using the right device.
+device_id = device_id
+os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
+device = torch.device("cuda:{}".format(3) if torch.cuda.is_available() else "cpu")
+print(device)
+print(torch.cuda.get_device_name(3))
+# Initialize your data loader and
+# make sure that dataloader works
+# as expected by observing one
+# sample from it.
+train_loader = get_cifar_loader(train=True)
+val_loader = get_cifar_loader(train=False)
+for X,y in train_loader:
+    ## --------------------
+    # Add code as needed
+    #
+    #
+    #
+    #
+    ## --------------------
+    break
+# This function is used to calculate the accuracy of model classification
+def get_accuracy(model, data_loader):
+    model.eval()
+    correct = 0
+    total = 0
+    with torch.no_grad():
+        for x, y in data_loader:
+            x, y = x.to(device), y.to(device)
+            outputs = model(x)
+            _, predicted = torch.max(outputs, 1)
+            total += y.size(0)
+            correct += (predicted == y).sum().item()
+    return correct / total
+# Set a random seed to ensure reproducible results
+def set_random_seeds(seed_value=0, device='cpu'):
+    np.random.seed(seed_value)
+    torch.manual_seed(seed_value)
+    random.seed(seed_value)
+    if device != 'cpu':
+        torch.cuda.manual_seed(seed_value)
+        torch.cuda.manual_seed_all(seed_value)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+# We use this function to complete the entire
+# training process. In order to plot the loss landscape,
+# you need to record the loss value of each step.
+# Of course, as before, you can test your model
+# after drawing a training round and save the curve
+# to observe the training
+def train(model, optimizer, criterion, train_loader, val_loader, scheduler=None, epochs_n=100, best_model_path=None):
+    model.to(device)
+    learning_curve = []
+    train_accuracy_curve = []
+    val_accuracy_curve = []
+    for epoch in tqdm(range(epochs_n), unit='epoch'):
+        model.train()
+        running_loss = 0.0
+        for data in train_loader:
+            x, y = data
+            x = x.to(device)
+            y = y.to(device)
+            optimizer.zero_grad()
+            prediction = model(x)
+            loss = criterion(prediction, y)
+            loss.backward()
+            optimizer.step()
+            running_loss += loss.item()
+        avg_loss = running_loss / len(train_loader)
+        learning_curve.append(avg_loss)
+        train_acc = get_accuracy(model, train_loader)
+        val_acc = get_accuracy(model, val_loader)
+        train_accuracy_curve.append(train_acc)
+        val_accuracy_curve.append(val_acc)
+        print(f'Epoch {epoch+1}: loss={avg_loss:.4f}, train_acc={train_acc:.4f}, val_acc={val_acc:.4f}')
+    return learning_curve, train_accuracy_curve, val_accuracy_curve
+def train_stepwise(model, optimizer, criterion, train_loader, val_loader, epochs_n=10, save_prefix=''):
+    model.to(device)
+    model.train()
+    step_losses = []
+    step_grads = []
+    for epoch in range(epochs_n):
+        for i, (x, y) in enumerate(train_loader):
+            x, y = x.to(device), y.to(device)
+            optimizer.zero_grad()
+            prediction = model(x)
+            loss = criterion(prediction, y)
+            loss.backward()
+            # 记录loss
+            step_losses.append(loss.item())
+            # 记录梯度范数
+            total_norm = 0.0
+            for p in model.parameters():
+                if p.grad is not None:
+                    param_norm = p.grad.data.norm(2)
+                    total_norm += param_norm.item() ** 2
+            total_norm = total_norm ** 0.5
+            step_grads.append(total_norm)
+            optimizer.step()
+    # 保存loss和grad
+    np.save(f'{save_prefix}_step_losses.npy', np.array(step_losses))
+    np.save(f'{save_prefix}_step_grads.npy', np.array(step_grads))
+    return step_losses, step_grads
+# Train your model
+# feel free to modify
+epo = 5
+loss_save_path = ''
+# grad_save_path = ''
+set_random_seeds(seed_value=2020, device=device)
+model = VGG_A()
+lr = 0.001
+optimizer = torch.optim.Adam(model.parameters(), lr = lr)
+criterion = nn.CrossEntropyLoss()
+loss, train_acc, val_acc = train(model, optimizer, criterion, train_loader, val_loader, epochs_n=epo)
+np.savetxt(os.path.join(loss_save_path, 'loss.txt'), loss, fmt='%s', delimiter=' ')
+# np.savetxt(os.path.join(grad_save_path, 'grads.txt'), grads, fmt='%s', delimiter=' ')
+# Maintain two lists: max_curve and min_curve,
+# select the maximum value of loss in all models
+# on the same step, add it to max_curve, and
+# the minimum value to min_curve
+min_curve = []
+max_curve = []
+## --------------------
+# Add your code
+#
+#
+#
+#
+## --------------------
+# Use this function to plot the final loss landscape,
+# fill the area between the two curves can use plt.fill_between()
+def plot_loss_landscape():
+    ## --------------------
+    # Add your code
+    #
+    #
+    #
+    #
+    ## --------------------
+    pass
+if __name__ == "__main__":
+    epo = 10
+    set_random_seeds(seed_value=2020, device=device)
+    learning_rates = [1e-3, 2e-3, 1e-4, 5e-4]
+    criterion = nn.CrossEntropyLoss()
+    all_loss_a = []
+    all_loss_bn = []
+    all_grad_a = []
+    all_grad_bn = []
+    for lr in learning_rates:
+        # VGG-A
+        model_a = VGG_A()
+        optimizer_a = torch.optim.Adam(model_a.parameters(), lr=lr)
+        prefix_a = f'models/vgg_a_lr{lr}'
+        step_losses_a, step_grads_a = train_stepwise(model_a, optimizer_a, criterion, train_loader, val_loader, epochs_n=epo, save_prefix=prefix_a)
+        torch.save(model_a.state_dict(), f'{prefix_a}.pth')
+        all_loss_a.append(step_losses_a)
+        all_grad_a.append(step_grads_a)
+        # VGG-A-BN
+        model_bn = VGG_A_BatchNorm()
+        optimizer_bn = torch.optim.Adam(model_bn.parameters(), lr=lr)
+        prefix_bn = f'models/vgg_bn_lr{lr}'
+        step_losses_bn, step_grads_bn = train_stepwise(model_bn, optimizer_bn, criterion, train_loader, val_loader, epochs_n=epo, save_prefix=prefix_bn)
+        torch.save(model_bn.state_dict(), f'{prefix_bn}.pth')
+        all_loss_bn.append(step_losses_bn)
+        all_grad_bn.append(step_grads_bn)
+    # 分别为每个learning rate单独画图
+    for lr in learning_rates:
+        # 读取loss和grad
+        loss_a = np.load(f'models/vgg_a_lr{lr}_step_losses.npy')
+        loss_bn = np.load(f'models/vgg_bn_lr{lr}_step_losses.npy')
+        grad_a = np.load(f'models/vgg_a_lr{lr}_step_grads.npy')
+        grad_bn = np.load(f'models/vgg_bn_lr{lr}_step_grads.npy')
+        steps = np.arange(1, min(len(loss_a), len(loss_bn)) + 1)
+        # Loss对比
+        plt.figure(figsize=(8,5))
+        plt.plot(steps, loss_a[:len(steps)], 'r-', label='VGG-A (no BN)')
+        plt.plot(steps, loss_bn[:len(steps)], 'b-', label='VGG-A (with BN)')
+        plt.xlabel('Step')
+        plt.ylabel('Training Loss')
+        plt.title(f'Loss Curve (lr={lr})')
+        plt.legend()
+        plt.tight_layout()
+        plt.savefig(f'vgg_loss_curve_lr{lr}.png')
+        plt.close()
+        # 梯度范数对比
+        plt.figure(figsize=(8,5))
+        plt.plot(steps, grad_a[:len(steps)], 'r-', label='VGG-A (no BN)')
+        plt.plot(steps, grad_bn[:len(steps)], 'b-', label='VGG-A (with BN)')
+        plt.xlabel('Step')
+        plt.ylabel('Gradient Norm')
+        plt.title(f'Gradient Norm Curve (lr={lr})')
+        plt.legend()
+        plt.tight_layout()
+        plt.savefig(f'vgg_grad_norm_curve_lr{lr}.png')
+        plt.close()
+        # 最大梯度差
+        grad_diff_a = np.abs(np.diff(grad_a[:len(steps)]))
+        grad_diff_bn = np.abs(np.diff(grad_bn[:len(steps)]))
+        plt.figure(figsize=(8,5))
+        plt.plot(steps[1:], grad_diff_a, 'r-', label='VGG-A (no BN)')
+        plt.plot(steps[1:], grad_diff_bn, 'b-', label='VGG-A (with BN)')
+        plt.xlabel('Step')
+        plt.ylabel('Gradient Difference')
+        plt.title(f'Max Gradient Difference (lr={lr})')
+        plt.legend()
+        plt.tight_layout()
+        plt.savefig(f'vgg_max_grad_diff_lr{lr}.png')
+        plt.close()

VGG_BatchNorm/data/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+'''
+Scripts to download and generate data
+'''
+from . import loaders

VGG_BatchNorm/data/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (248 Bytes). View file

VGG_BatchNorm/data/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (259 Bytes). View file

VGG_BatchNorm/data/__pycache__/loaders.cpython-310.pyc ADDED Viewed

Binary file (1.93 kB). View file

VGG_BatchNorm/data/__pycache__/loaders.cpython-37.pyc ADDED Viewed

Binary file (1.9 kB). View file

VGG_BatchNorm/data/cifar-10-python.tar.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f45163964244cea80d8b2367396f1a64e529767c1a4e2c0c91c67b8ac2f691e1
+size 5226496

VGG_BatchNorm/data/loaders.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""
+Data loaders
+"""
+import matplotlib as mpl
+mpl.use('Agg')
+import matplotlib.pyplot as plt
+import numpy as np
+from torch.utils.data import DataLoader, Dataset
+from torchvision import transforms
+import torchvision.datasets as datasets
+class PartialDataset(Dataset):
+    def __init__(self, dataset, n_items=10):
+        self.dataset = dataset
+        self.n_items = n_items
+    def __getitem__(self):
+        return self.dataset.__getitem__()
+    def __len__(self):
+        return min(self.n_items, len(self.dataset))
+def get_cifar_loader(root='../data/', batch_size=128, train=True, shuffle=True, num_workers=4, n_items=-1):
+    normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5],
+                                     std=[0.5, 0.5, 0.5])
+    data_transforms = transforms.Compose(
+        [transforms.ToTensor(),
+        normalize])
+    dataset = datasets.CIFAR10(root=root, train=train, download=True, transform=data_transforms)
+    if n_items > 0:
+        dataset = PartialDataset(dataset, n_items)
+    loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
+    return loader
+if __name__ == '__main__':
+    train_loader = get_cifar_loader()
+    for X, y in train_loader:
+        print(X[0])
+        print(y[0])
+        print(X[0].shape)
+        img = np.transpose(X[0], [1,2,0])
+        plt.imshow(img*0.5 + 0.5)
+        plt.savefig('sample.png')
+        print(X[0].max())
+        print(X[0].min())
+        break

VGG_BatchNorm/loss.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+1.7658132662248733
+1.2146569554458189
+0.9086841719839579
+0.7367523306471002
+0.6139206301678172

VGG_BatchNorm/models/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+'''
+Models implementation and training & evaluating functions
+'''
+from . import vgg

VGG_BatchNorm/models/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (266 Bytes). View file

VGG_BatchNorm/models/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (277 Bytes). View file

VGG_BatchNorm/models/__pycache__/vgg.cpython-310.pyc ADDED Viewed

Binary file (4.93 kB). View file

VGG_BatchNorm/models/__pycache__/vgg.cpython-37.pyc ADDED Viewed

Binary file (3.96 kB). View file

VGG_BatchNorm/models/vgg.py ADDED Viewed

	@@ -0,0 +1,243 @@

+"""
+VGG
+"""
+import numpy as np
+from torch import nn
+from utils.nn import init_weights_
+# ## Models implementation
+def get_number_of_parameters(model):
+    parameters_n = 0
+    for parameter in model.parameters():
+        parameters_n += np.prod(parameter.shape).item()
+    return parameters_n
+class VGG_A(nn.Module):
+    """VGG_A model
+    size of Linear layers is smaller since input assumed to be 32x32x3, instead of
+    224x224x3
+    """
+    def __init__(self, inp_ch=3, num_classes=10, init_weights=True):
+        super().__init__()
+        self.features = nn.Sequential(
+            # stage 1
+            nn.Conv2d(in_channels=inp_ch, out_channels=64, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            # stage 2
+            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            # stage 3
+            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            # stage 4
+            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            # stage5
+            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+        self.classifier = nn.Sequential(
+            nn.Linear(512 * 1 * 1, 512),
+            nn.ReLU(),
+            nn.Linear(512, 512),
+            nn.ReLU(),
+            nn.Linear(512, num_classes))
+        if init_weights:
+            self._init_weights()
+    def forward(self, x):
+        x = self.features(x)
+        x = self.classifier(x.view(-1, 512 * 1 * 1))
+        return x
+    def _init_weights(self):
+        for m in self.modules():
+            init_weights_(m)
+class VGG_A_Light(nn.Module):
+    def __init__(self, inp_ch=3, num_classes=10):
+        super().__init__()
+        self.stage1 = nn.Sequential(
+            nn.Conv2d(in_channels=inp_ch, out_channels=16, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+        self.stage2 = nn.Sequential(
+            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+        '''
+        self.stage3 = nn.Sequential(
+            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
+            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+        self.stage4 = nn.Sequential(
+            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
+            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+        self.stage5 = nn.Sequential(
+            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
+            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+        '''
+        self.classifier = nn.Sequential(
+            nn.Linear(32 * 8 * 8, 128),
+            nn.ReLU(),
+            nn.Linear(128, 128),
+            nn.ReLU(),
+            nn.Linear(128, num_classes))
+    def forward(self, x):
+        x = self.stage1(x)
+        x = self.stage2(x)
+        # x = self.stage3(x)
+        # x = self.stage4(x)
+        # x = self.stage5(x)
+        x = self.classifier(x.view(-1, 32 * 8 * 8))
+        return x
+class VGG_A_Dropout(nn.Module):
+    def __init__(self, inp_ch=3, num_classes=10):
+        super().__init__()
+        self.stage1 = nn.Sequential(
+            nn.Conv2d(in_channels=inp_ch, out_channels=64, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+        self.stage2 = nn.Sequential(
+            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+        self.stage3 = nn.Sequential(
+            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+        self.stage4 = nn.Sequential(
+            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+        self.stage5 = nn.Sequential(
+            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+        self.classifier = nn.Sequential(
+            nn.Dropout(),
+            nn.Linear(512 * 1 * 1, 512),
+            nn.ReLU(True),
+            nn.Dropout(),
+            nn.Linear(512, 512),
+            nn.ReLU(True),
+            nn.Linear(512, num_classes))
+    def forward(self, x):
+        x = self.stage1(x)
+        x = self.stage2(x)
+        x = self.stage3(x)
+        x = self.stage4(x)
+        x = self.stage5(x)
+        x = self.classifier(x.view(-1, 512 * 1 * 1))
+        return x
+class VGG_A_BatchNorm(nn.Module):
+    def __init__(self, inp_ch=3, num_classes=10, init_weights=True):
+        super().__init__()
+        self.features = nn.Sequential(
+            nn.Conv2d(inp_ch, 64, 3, padding=1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(True),
+            nn.MaxPool2d(2, 2),
+            nn.Conv2d(64, 128, 3, padding=1),
+            nn.BatchNorm2d(128),
+            nn.ReLU(True),
+            nn.MaxPool2d(2, 2),
+            nn.Conv2d(128, 256, 3, padding=1),
+            nn.BatchNorm2d(256),
+            nn.ReLU(True),
+            nn.Conv2d(256, 256, 3, padding=1),
+            nn.BatchNorm2d(256),
+            nn.ReLU(True),
+            nn.MaxPool2d(2, 2),
+            nn.Conv2d(256, 512, 3, padding=1),
+            nn.BatchNorm2d(512),
+            nn.ReLU(True),
+            nn.Conv2d(512, 512, 3, padding=1),
+            nn.BatchNorm2d(512),
+            nn.ReLU(True),
+            nn.MaxPool2d(2, 2),
+            nn.Conv2d(512, 512, 3, padding=1),
+            nn.BatchNorm2d(512),
+            nn.ReLU(True),
+            nn.Conv2d(512, 512, 3, padding=1),
+            nn.BatchNorm2d(512),
+            nn.ReLU(True),
+            nn.MaxPool2d(2, 2)
+        )
+        self.classifier = nn.Sequential(
+            nn.Linear(512 * 1 * 1, 512),
+            nn.ReLU(),
+            nn.Linear(512, 512),
+            nn.ReLU(),
+            nn.Linear(512, num_classes)
+        )
+        if init_weights:
+            self._init_weights()
+    def forward(self, x):
+        x = self.features(x)
+        x = self.classifier(x.view(-1, 512 * 1 * 1))
+        return x
+    def _init_weights(self):
+        for m in self.modules():
+            init_weights_(m)
+if __name__ == '__main__':
+    print(get_number_of_parameters(VGG_A()))
+    print(get_number_of_parameters(VGG_A_Light()))
+    print(get_number_of_parameters(VGG_A_Dropout()))
+    print(get_number_of_parameters(VGG_A_BatchNorm()))

VGG_BatchNorm/models/vgg_a_lr0.0001.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:105171da7fd7bf680dd5d4c6d9484019a469849021e775edade244a84b139bfb
+size 39011786

VGG_BatchNorm/models/vgg_a_lr0.0001_step_grads.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:40fd79ad6ea0064b44c91905f42549bac9f3e4439f2ac7b3e4f4b61681b7a81f
+size 31408

VGG_BatchNorm/models/vgg_a_lr0.0001_step_losses.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc50830495cff83662ea2603a3b39b8f882f45341a6aa95ac8cc9a731c315196
+size 31408

VGG_BatchNorm/models/vgg_a_lr0.0005.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b3db4e136d92dc94a1f8601d7700e922e466a049e91891840f0374d6d27c98db
+size 39011786

VGG_BatchNorm/models/vgg_a_lr0.0005_step_grads.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5703f3db77174dc12448202f46dac32866fa7ce47383c961b0c0d85bfc4c89b7
+size 31408

VGG_BatchNorm/models/vgg_a_lr0.0005_step_losses.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3838ee654dddc55efa6b63f65594901803a410f17e90e2d2402ba51127c94db
+size 31408

VGG_BatchNorm/models/vgg_a_lr0.001.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d7bab30b087815d4470cb665e2aeadcb2c2c786f5f21746bcf07e93cef1e3669
+size 39011760

VGG_BatchNorm/models/vgg_a_lr0.001_step_grads.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:68a558422f2f2eb6233e061ed0e24fea94a7525074255f8f91dd5d6ed6b2dd1c
+size 31408

VGG_BatchNorm/models/vgg_a_lr0.001_step_losses.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e7f74eebcff55116c216fbaca90a56280090d6a01ab7223cad91d8cb42c97747
+size 31408

VGG_BatchNorm/models/vgg_a_lr0.002.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5e8453a024e60b3bb8e680b0aeed1d17bcf1545b2de4ca030f57513e0c5f7be
+size 39011760

VGG_BatchNorm/models/vgg_a_lr0.002_step_grads.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a22fc25871c1a180f423cc498b60f1083d07c2e45d605939889b5c90a35afcfa
+size 31408

VGG_BatchNorm/models/vgg_a_lr0.002_step_losses.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7868e3df3d4ae5b25889df52f98ef5e4bb98dcd741f62667b4671b3e16ae981
+size 31408

VGG_BatchNorm/models/vgg_bn_lr0.0001.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9451b3372e6897dd96be45a1902a9629e8f313e9019162d8f9ce5251ffc542a3
+size 39068716

VGG_BatchNorm/models/vgg_bn_lr0.0001_step_grads.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8880316d60e582db12979390d79b6f5df7a1b0c3b31e90ff164de5f0930d0aef
+size 31408

VGG_BatchNorm/models/vgg_bn_lr0.0001_step_losses.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a2ee1fe92633edf653d27d5139c882ffdd331319832aedf391a9d8f9a2c42e8
+size 31408

VGG_BatchNorm/models/vgg_bn_lr0.0005.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17b108a6b387ba7f8dbaf5f8192cebd96e753d3b2eec071fdb9047ec18ac56af
+size 39068716

VGG_BatchNorm/models/vgg_bn_lr0.0005_step_grads.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:75477b2424055e4773105ed1c3e30b3f865263d93620cdb0ab8ea8cc7ab90e5b
+size 31408

VGG_BatchNorm/models/vgg_bn_lr0.0005_step_losses.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3261b9564be6ae816ec048ac8189aec7e16eb0b9ed6a31a4851356a19f09ce04
+size 31408

VGG_BatchNorm/models/vgg_bn_lr0.001.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9765324e222a9d6ff7b51ae6b5f3f97a1e1cc99197654f906f8e7611b9b8d193
+size 39068650

VGG_BatchNorm/models/vgg_bn_lr0.001_step_grads.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a2224404b4ba0e30fb143ddbaee1976528671afe3b2f25a14499dc7dd63a87d
+size 31408

VGG_BatchNorm/models/vgg_bn_lr0.001_step_losses.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50030e0c9e31a85a981d7b60d0094b758fa8efb37f89ca309ec7025cfa71787e
+size 31408

VGG_BatchNorm/models/vgg_bn_lr0.002.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1488c03104e9de23b07a9deb22ef6afb1ba3b7f6aaa5dad9ba5a46a4c51082c4
+size 39068650

VGG_BatchNorm/models/vgg_bn_lr0.002_step_grads.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ebf1d2594a3cd9783d29c71a69d124d00928c24ba50d143dfb58c72cd06f601b
+size 31408

VGG_BatchNorm/models/vgg_bn_lr0.002_step_losses.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:215478b68ca6ea0590bc76f7353771a923a1f10422c5b9dfa022526d1ad0d41b
+size 31408

VGG_BatchNorm/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+'''
+Several utils, in particular for experiments
+'''
+from . import nn

VGG_BatchNorm/utils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (251 Bytes). View file

VGG_BatchNorm/utils/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (262 Bytes). View file

VGG_BatchNorm/utils/__pycache__/nn.cpython-310.pyc ADDED Viewed

Binary file (788 Bytes). View file

VGG_BatchNorm/utils/__pycache__/nn.cpython-37.pyc ADDED Viewed

Binary file (771 Bytes). View file

VGG_BatchNorm/utils/nn.py ADDED Viewed

	@@ -0,0 +1,31 @@

+"""
+Utils for neural networks
+"""
+from torch import nn
+def init_weights_(m):
+    """
+    Initializes weights of m according to Xavier normal method.
+    :param m: module
+    :return:
+    """
+    if isinstance(m, nn.Conv2d):
+        nn.init.xavier_normal_(m.weight)
+        if m.bias is not None:
+            nn.init.zeros_(m.bias)
+    elif isinstance(m, nn.BatchNorm2d):
+        nn.init.ones_(m.weight)
+        nn.init.zeros_(m.bias)
+    elif isinstance(m, nn.BatchNorm1d):
+        nn.init.ones_(m.weight)
+        nn.init.zeros_(m.bias)
+    elif isinstance(m, nn.Linear):
+        nn.init.xavier_normal_(m.weight)
+        if m.bias is not None:
+            nn.init.zeros_(m.bias)

VGG_BatchNorm/vgg_bn_compare.png ADDED Viewed

VGG_BatchNorm/vgg_bn_loss_landscape.png ADDED Viewed

VGG_BatchNorm/vgg_grad_norm_curve_lr0.0001.png ADDED Viewed