diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..3d0909d31bc0ad12083fbc45e32c3ed698a93560 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data/cifar-10-batches-py/data_batch_1 filter=lfs diff=lfs merge=lfs -text
+data/cifar-10-batches-py/data_batch_2 filter=lfs diff=lfs merge=lfs -text
+data/cifar-10-batches-py/data_batch_3 filter=lfs diff=lfs merge=lfs -text
+data/cifar-10-batches-py/data_batch_4 filter=lfs diff=lfs merge=lfs -text
+data/cifar-10-batches-py/data_batch_5 filter=lfs diff=lfs merge=lfs -text
+data/cifar-10-batches-py/test_batch filter=lfs diff=lfs merge=lfs -text
+fig/feature_maps_conv1.png filter=lfs diff=lfs merge=lfs -text
diff --git a/VGG_BatchNorm/.DS_Store b/VGG_BatchNorm/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..55689b508e6fff2278aba9438833270a93765430
Binary files /dev/null and b/VGG_BatchNorm/.DS_Store differ
diff --git a/VGG_BatchNorm/VGG_Loss_Landscape.py b/VGG_BatchNorm/VGG_Loss_Landscape.py
new file mode 100644
index 0000000000000000000000000000000000000000..fed9e8bfeaac62dafc5c1971297a699e5f06623c
--- /dev/null
+++ b/VGG_BatchNorm/VGG_Loss_Landscape.py
@@ -0,0 +1,256 @@
+import matplotlib as mpl
+mpl.use('Agg')
+import matplotlib.pyplot as plt
+from torch import nn
+import numpy as np
+import torch
+import os
+import random
+from tqdm import tqdm as tqdm
+from IPython import display
+
+from models.vgg import VGG_A
+from models.vgg import VGG_A_BatchNorm # you need to implement this network
+from data.loaders import get_cifar_loader
+
+# ## Constants (parameters) initialization
+device_id = [0,1,2,3]
+num_workers = 4
+batch_size = 128
+
+# add our package dir to path 
+module_path = os.path.dirname(os.getcwd())
+home_path = module_path
+figures_path = os.path.join(home_path, 'reports', 'figures')
+models_path = os.path.join(home_path, 'reports', 'models')
+
+# Make sure you are using the right device.
+device_id = device_id
+os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
+device = torch.device("cuda:{}".format(3) if torch.cuda.is_available() else "cpu")
+print(device)
+print(torch.cuda.get_device_name(3))
+
+
+
+# Initialize your data loader and
+# make sure that dataloader works
+# as expected by observing one
+# sample from it.
+train_loader = get_cifar_loader(train=True)
+val_loader = get_cifar_loader(train=False)
+for X,y in train_loader:
+    ## --------------------
+    # Add code as needed
+    #
+    #
+    #
+    #
+    ## --------------------
+    break
+
+
+
+# This function is used to calculate the accuracy of model classification
+def get_accuracy(model, data_loader):
+    model.eval()
+    correct = 0
+    total = 0
+    with torch.no_grad():
+        for x, y in data_loader:
+            x, y = x.to(device), y.to(device)
+            outputs = model(x)
+            _, predicted = torch.max(outputs, 1)
+            total += y.size(0)
+            correct += (predicted == y).sum().item()
+    return correct / total
+
+# Set a random seed to ensure reproducible results
+def set_random_seeds(seed_value=0, device='cpu'):
+    np.random.seed(seed_value)
+    torch.manual_seed(seed_value)
+    random.seed(seed_value)
+    if device != 'cpu': 
+        torch.cuda.manual_seed(seed_value)
+        torch.cuda.manual_seed_all(seed_value)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+
+
+# We use this function to complete the entire
+# training process. In order to plot the loss landscape,
+# you need to record the loss value of each step.
+# Of course, as before, you can test your model
+# after drawing a training round and save the curve
+# to observe the training
+def train(model, optimizer, criterion, train_loader, val_loader, scheduler=None, epochs_n=100, best_model_path=None):
+    model.to(device)
+    learning_curve = []
+    train_accuracy_curve = []
+    val_accuracy_curve = []
+    for epoch in tqdm(range(epochs_n), unit='epoch'):
+        model.train()
+        running_loss = 0.0
+        for data in train_loader:
+            x, y = data
+            x = x.to(device)
+            y = y.to(device)
+            optimizer.zero_grad()
+            prediction = model(x)
+            loss = criterion(prediction, y)
+            loss.backward()
+            optimizer.step()
+            running_loss += loss.item()
+        avg_loss = running_loss / len(train_loader)
+        learning_curve.append(avg_loss)
+        train_acc = get_accuracy(model, train_loader)
+        val_acc = get_accuracy(model, val_loader)
+        train_accuracy_curve.append(train_acc)
+        val_accuracy_curve.append(val_acc)
+        print(f'Epoch {epoch+1}: loss={avg_loss:.4f}, train_acc={train_acc:.4f}, val_acc={val_acc:.4f}')
+    return learning_curve, train_accuracy_curve, val_accuracy_curve
+
+def train_stepwise(model, optimizer, criterion, train_loader, val_loader, epochs_n=10, save_prefix=''):
+    model.to(device)
+    model.train()
+    step_losses = []
+    step_grads = []
+    for epoch in range(epochs_n):
+        for i, (x, y) in enumerate(train_loader):
+            x, y = x.to(device), y.to(device)
+            optimizer.zero_grad()
+            prediction = model(x)
+            loss = criterion(prediction, y)
+            loss.backward()
+            # 记录loss
+            step_losses.append(loss.item())
+            # 记录梯度范数
+            total_norm = 0.0
+            for p in model.parameters():
+                if p.grad is not None:
+                    param_norm = p.grad.data.norm(2)
+                    total_norm += param_norm.item() ** 2
+            total_norm = total_norm ** 0.5
+            step_grads.append(total_norm)
+            optimizer.step()
+    # 保存loss和grad
+    np.save(f'{save_prefix}_step_losses.npy', np.array(step_losses))
+    np.save(f'{save_prefix}_step_grads.npy', np.array(step_grads))
+    return step_losses, step_grads
+
+# Train your model
+# feel free to modify
+epo = 5
+loss_save_path = ''
+# grad_save_path = ''
+
+set_random_seeds(seed_value=2020, device=device)
+model = VGG_A()
+lr = 0.001
+optimizer = torch.optim.Adam(model.parameters(), lr = lr)
+criterion = nn.CrossEntropyLoss()
+loss, train_acc, val_acc = train(model, optimizer, criterion, train_loader, val_loader, epochs_n=epo)
+np.savetxt(os.path.join(loss_save_path, 'loss.txt'), loss, fmt='%s', delimiter=' ')
+# np.savetxt(os.path.join(grad_save_path, 'grads.txt'), grads, fmt='%s', delimiter=' ')
+
+# Maintain two lists: max_curve and min_curve,
+# select the maximum value of loss in all models
+# on the same step, add it to max_curve, and
+# the minimum value to min_curve
+min_curve = []
+max_curve = []
+## --------------------
+# Add your code
+#
+#
+#
+#
+## --------------------
+
+# Use this function to plot the final loss landscape,
+# fill the area between the two curves can use plt.fill_between()
+def plot_loss_landscape():
+    ## --------------------
+    # Add your code
+    #
+    #
+    #
+    #
+    ## --------------------
+    pass
+
+if __name__ == "__main__":
+    epo = 10
+    set_random_seeds(seed_value=2020, device=device)
+
+    learning_rates = [1e-3, 2e-3, 1e-4, 5e-4]
+    criterion = nn.CrossEntropyLoss()
+    all_loss_a = []
+    all_loss_bn = []
+    all_grad_a = []
+    all_grad_bn = []
+    for lr in learning_rates:
+        # VGG-A
+        model_a = VGG_A()
+        optimizer_a = torch.optim.Adam(model_a.parameters(), lr=lr)
+        prefix_a = f'models/vgg_a_lr{lr}'
+        step_losses_a, step_grads_a = train_stepwise(model_a, optimizer_a, criterion, train_loader, val_loader, epochs_n=epo, save_prefix=prefix_a)
+        torch.save(model_a.state_dict(), f'{prefix_a}.pth')
+        all_loss_a.append(step_losses_a)
+        all_grad_a.append(step_grads_a)
+
+        # VGG-A-BN
+        model_bn = VGG_A_BatchNorm()
+        optimizer_bn = torch.optim.Adam(model_bn.parameters(), lr=lr)
+        prefix_bn = f'models/vgg_bn_lr{lr}'
+        step_losses_bn, step_grads_bn = train_stepwise(model_bn, optimizer_bn, criterion, train_loader, val_loader, epochs_n=epo, save_prefix=prefix_bn)
+        torch.save(model_bn.state_dict(), f'{prefix_bn}.pth')
+        all_loss_bn.append(step_losses_bn)
+        all_grad_bn.append(step_grads_bn)
+
+    # 分别为每个learning rate单独画图
+    for lr in learning_rates:
+        # 读取loss和grad
+        loss_a = np.load(f'models/vgg_a_lr{lr}_step_losses.npy')
+        loss_bn = np.load(f'models/vgg_bn_lr{lr}_step_losses.npy')
+        grad_a = np.load(f'models/vgg_a_lr{lr}_step_grads.npy')
+        grad_bn = np.load(f'models/vgg_bn_lr{lr}_step_grads.npy')
+        steps = np.arange(1, min(len(loss_a), len(loss_bn)) + 1)
+
+        # Loss对比
+        plt.figure(figsize=(8,5))
+        plt.plot(steps, loss_a[:len(steps)], 'r-', label='VGG-A (no BN)')
+        plt.plot(steps, loss_bn[:len(steps)], 'b-', label='VGG-A (with BN)')
+        plt.xlabel('Step')
+        plt.ylabel('Training Loss')
+        plt.title(f'Loss Curve (lr={lr})')
+        plt.legend()
+        plt.tight_layout()
+        plt.savefig(f'vgg_loss_curve_lr{lr}.png')
+        plt.close()
+
+        # 梯度范数对比
+        plt.figure(figsize=(8,5))
+        plt.plot(steps, grad_a[:len(steps)], 'r-', label='VGG-A (no BN)')
+        plt.plot(steps, grad_bn[:len(steps)], 'b-', label='VGG-A (with BN)')
+        plt.xlabel('Step')
+        plt.ylabel('Gradient Norm')
+        plt.title(f'Gradient Norm Curve (lr={lr})')
+        plt.legend()
+        plt.tight_layout()
+        plt.savefig(f'vgg_grad_norm_curve_lr{lr}.png')
+        plt.close()
+
+        # 最大梯度差
+        grad_diff_a = np.abs(np.diff(grad_a[:len(steps)]))
+        grad_diff_bn = np.abs(np.diff(grad_bn[:len(steps)]))
+        plt.figure(figsize=(8,5))
+        plt.plot(steps[1:], grad_diff_a, 'r-', label='VGG-A (no BN)')
+        plt.plot(steps[1:], grad_diff_bn, 'b-', label='VGG-A (with BN)')
+        plt.xlabel('Step')
+        plt.ylabel('Gradient Difference')
+        plt.title(f'Max Gradient Difference (lr={lr})')
+        plt.legend()
+        plt.tight_layout()
+        plt.savefig(f'vgg_max_grad_diff_lr{lr}.png')
+        plt.close()
\ No newline at end of file
diff --git a/VGG_BatchNorm/data/__init__.py b/VGG_BatchNorm/data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6833bb646085bcde6c12336a09139941ce03123c
--- /dev/null
+++ b/VGG_BatchNorm/data/__init__.py
@@ -0,0 +1,5 @@
+'''
+Scripts to download and generate data
+'''
+
+from . import loaders
\ No newline at end of file
diff --git a/VGG_BatchNorm/data/__pycache__/__init__.cpython-310.pyc b/VGG_BatchNorm/data/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bf9b823eae03fe5d59f89f4651f4e9b92a75266d
Binary files /dev/null and b/VGG_BatchNorm/data/__pycache__/__init__.cpython-310.pyc differ
diff --git a/VGG_BatchNorm/data/__pycache__/__init__.cpython-37.pyc b/VGG_BatchNorm/data/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0be9bd46131bf60e847c77fbbb41a8c5e95d9742
Binary files /dev/null and b/VGG_BatchNorm/data/__pycache__/__init__.cpython-37.pyc differ
diff --git a/VGG_BatchNorm/data/__pycache__/loaders.cpython-310.pyc b/VGG_BatchNorm/data/__pycache__/loaders.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fb82538671c00bd6785c82cb73b8d2e73fcecf73
Binary files /dev/null and b/VGG_BatchNorm/data/__pycache__/loaders.cpython-310.pyc differ
diff --git a/VGG_BatchNorm/data/__pycache__/loaders.cpython-37.pyc b/VGG_BatchNorm/data/__pycache__/loaders.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e0fce06f4e3f9d3e1bf2e9a72719308ba906ff71
Binary files /dev/null and b/VGG_BatchNorm/data/__pycache__/loaders.cpython-37.pyc differ
diff --git a/VGG_BatchNorm/data/cifar-10-python.tar.gz b/VGG_BatchNorm/data/cifar-10-python.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..cc9f0378736b12bdd4f605fb86f7f94defee5962
--- /dev/null
+++ b/VGG_BatchNorm/data/cifar-10-python.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f45163964244cea80d8b2367396f1a64e529767c1a4e2c0c91c67b8ac2f691e1
+size 5226496
diff --git a/VGG_BatchNorm/data/loaders.py b/VGG_BatchNorm/data/loaders.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ee7206d9dbc016022fe3ca0b1b3ad3247f996cf
--- /dev/null
+++ b/VGG_BatchNorm/data/loaders.py
@@ -0,0 +1,53 @@
+"""
+Data loaders
+"""
+import matplotlib as mpl
+mpl.use('Agg')
+import matplotlib.pyplot as plt
+import numpy as np
+from torch.utils.data import DataLoader, Dataset
+from torchvision import transforms
+import torchvision.datasets as datasets
+
+
+
+class PartialDataset(Dataset):
+    def __init__(self, dataset, n_items=10):
+        self.dataset = dataset
+        self.n_items = n_items
+
+    def __getitem__(self):
+        return self.dataset.__getitem__()
+
+    def __len__(self):
+        return min(self.n_items, len(self.dataset))
+
+
+def get_cifar_loader(root='../data/', batch_size=128, train=True, shuffle=True, num_workers=4, n_items=-1):
+    normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5],
+                                     std=[0.5, 0.5, 0.5])
+
+    data_transforms = transforms.Compose(
+        [transforms.ToTensor(),
+        normalize])
+
+    dataset = datasets.CIFAR10(root=root, train=train, download=True, transform=data_transforms)
+    if n_items > 0:
+        dataset = PartialDataset(dataset, n_items)
+
+    loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
+
+    return loader
+
+if __name__ == '__main__':
+    train_loader = get_cifar_loader()
+    for X, y in train_loader:
+        print(X[0])
+        print(y[0])
+        print(X[0].shape)
+        img = np.transpose(X[0], [1,2,0])
+        plt.imshow(img*0.5 + 0.5)
+        plt.savefig('sample.png')
+        print(X[0].max())
+        print(X[0].min())
+        break
\ No newline at end of file
diff --git a/VGG_BatchNorm/loss.txt b/VGG_BatchNorm/loss.txt
new file mode 100644
index 0000000000000000000000000000000000000000..477086e5390e314290795c4834abe23d89ab7f50
--- /dev/null
+++ b/VGG_BatchNorm/loss.txt
@@ -0,0 +1,5 @@
+1.7658132662248733
+1.2146569554458189
+0.9086841719839579
+0.7367523306471002
+0.6139206301678172
diff --git a/VGG_BatchNorm/models/__init__.py b/VGG_BatchNorm/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7ec16d2b5f4915b462ffc99d45a2e68a2c53121
--- /dev/null
+++ b/VGG_BatchNorm/models/__init__.py
@@ -0,0 +1,5 @@
+'''
+Models implementation and training & evaluating functions
+'''
+
+from . import vgg
\ No newline at end of file
diff --git a/VGG_BatchNorm/models/__pycache__/__init__.cpython-310.pyc b/VGG_BatchNorm/models/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3e6e90bbc1dc6664098cf517265b27d3be64ea2b
Binary files /dev/null and b/VGG_BatchNorm/models/__pycache__/__init__.cpython-310.pyc differ
diff --git a/VGG_BatchNorm/models/__pycache__/__init__.cpython-37.pyc b/VGG_BatchNorm/models/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7b8e4e614ddfe0d470b0a4c3f1886d18afa8d938
Binary files /dev/null and b/VGG_BatchNorm/models/__pycache__/__init__.cpython-37.pyc differ
diff --git a/VGG_BatchNorm/models/__pycache__/vgg.cpython-310.pyc b/VGG_BatchNorm/models/__pycache__/vgg.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..931eba538ebfa704967a42af0b3067e9fabee883
Binary files /dev/null and b/VGG_BatchNorm/models/__pycache__/vgg.cpython-310.pyc differ
diff --git a/VGG_BatchNorm/models/__pycache__/vgg.cpython-37.pyc b/VGG_BatchNorm/models/__pycache__/vgg.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..eb29fc856f25c524c1d6e80fc1246341f0f88b86
Binary files /dev/null and b/VGG_BatchNorm/models/__pycache__/vgg.cpython-37.pyc differ
diff --git a/VGG_BatchNorm/models/vgg.py b/VGG_BatchNorm/models/vgg.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc3fd85d95d4832b78e8449c3b684f6f10c729ee
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg.py
@@ -0,0 +1,243 @@
+"""
+VGG
+"""
+import numpy as np
+from torch import nn
+
+from utils.nn import init_weights_
+
+# ## Models implementation
+def get_number_of_parameters(model):
+    parameters_n = 0
+    for parameter in model.parameters():
+        parameters_n += np.prod(parameter.shape).item()
+
+    return parameters_n
+
+
+class VGG_A(nn.Module):
+    """VGG_A model
+
+    size of Linear layers is smaller since input assumed to be 32x32x3, instead of
+    224x224x3
+    """
+
+    def __init__(self, inp_ch=3, num_classes=10, init_weights=True):
+        super().__init__()
+
+        self.features = nn.Sequential(
+            # stage 1
+            nn.Conv2d(in_channels=inp_ch, out_channels=64, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+
+            # stage 2
+            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+
+            # stage 3
+            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+
+            # stage 4
+            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+
+            # stage5
+            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+
+        self.classifier = nn.Sequential(
+            nn.Linear(512 * 1 * 1, 512),
+            nn.ReLU(),
+            nn.Linear(512, 512),
+            nn.ReLU(),
+            nn.Linear(512, num_classes))
+
+        if init_weights:
+            self._init_weights()
+
+    def forward(self, x):
+        x = self.features(x)
+        x = self.classifier(x.view(-1, 512 * 1 * 1))
+        return x
+
+    def _init_weights(self):
+        for m in self.modules():
+            init_weights_(m)
+
+
+class VGG_A_Light(nn.Module):
+    def __init__(self, inp_ch=3, num_classes=10):
+        super().__init__()
+
+        self.stage1 = nn.Sequential(
+            nn.Conv2d(in_channels=inp_ch, out_channels=16, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+
+        self.stage2 = nn.Sequential(
+            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+        '''
+        self.stage3 = nn.Sequential(
+            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
+            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+
+        self.stage4 = nn.Sequential(
+            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
+            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+
+        self.stage5 = nn.Sequential(
+            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
+            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+        '''
+        self.classifier = nn.Sequential(
+            nn.Linear(32 * 8 * 8, 128),
+            nn.ReLU(),
+            nn.Linear(128, 128),
+            nn.ReLU(),
+            nn.Linear(128, num_classes))
+
+    def forward(self, x):
+        x = self.stage1(x)
+        x = self.stage2(x)
+        # x = self.stage3(x)
+        # x = self.stage4(x)
+        # x = self.stage5(x)
+        x = self.classifier(x.view(-1, 32 * 8 * 8))
+        return x
+
+
+class VGG_A_Dropout(nn.Module):
+    def __init__(self, inp_ch=3, num_classes=10):
+        super().__init__()
+
+        self.stage1 = nn.Sequential(
+            nn.Conv2d(in_channels=inp_ch, out_channels=64, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+
+        self.stage2 = nn.Sequential(
+            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+
+        self.stage3 = nn.Sequential(
+            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+
+        self.stage4 = nn.Sequential(
+            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+
+        self.stage5 = nn.Sequential(
+            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
+            nn.ReLU(True),
+            nn.MaxPool2d(kernel_size=2, stride=2))
+
+        self.classifier = nn.Sequential(
+            nn.Dropout(),
+            nn.Linear(512 * 1 * 1, 512),
+            nn.ReLU(True),
+            nn.Dropout(),
+            nn.Linear(512, 512),
+            nn.ReLU(True),
+            nn.Linear(512, num_classes))
+
+    def forward(self, x):
+        x = self.stage1(x)
+        x = self.stage2(x)
+        x = self.stage3(x)
+        x = self.stage4(x)
+        x = self.stage5(x)
+        x = self.classifier(x.view(-1, 512 * 1 * 1))
+        return x
+
+
+class VGG_A_BatchNorm(nn.Module):
+    def __init__(self, inp_ch=3, num_classes=10, init_weights=True):
+        super().__init__()
+        self.features = nn.Sequential(
+            nn.Conv2d(inp_ch, 64, 3, padding=1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(True),
+            nn.MaxPool2d(2, 2),
+
+            nn.Conv2d(64, 128, 3, padding=1),
+            nn.BatchNorm2d(128),
+            nn.ReLU(True),
+            nn.MaxPool2d(2, 2),
+
+            nn.Conv2d(128, 256, 3, padding=1),
+            nn.BatchNorm2d(256),
+            nn.ReLU(True),
+            nn.Conv2d(256, 256, 3, padding=1),
+            nn.BatchNorm2d(256),
+            nn.ReLU(True),
+            nn.MaxPool2d(2, 2),
+
+            nn.Conv2d(256, 512, 3, padding=1),
+            nn.BatchNorm2d(512),
+            nn.ReLU(True),
+            nn.Conv2d(512, 512, 3, padding=1),
+            nn.BatchNorm2d(512),
+            nn.ReLU(True),
+            nn.MaxPool2d(2, 2),
+
+            nn.Conv2d(512, 512, 3, padding=1),
+            nn.BatchNorm2d(512),
+            nn.ReLU(True),
+            nn.Conv2d(512, 512, 3, padding=1),
+            nn.BatchNorm2d(512),
+            nn.ReLU(True),
+            nn.MaxPool2d(2, 2)
+        )
+        self.classifier = nn.Sequential(
+            nn.Linear(512 * 1 * 1, 512),
+            nn.ReLU(),
+            nn.Linear(512, 512),
+            nn.ReLU(),
+            nn.Linear(512, num_classes)
+        )
+        if init_weights:
+            self._init_weights()
+    def forward(self, x):
+        x = self.features(x)
+        x = self.classifier(x.view(-1, 512 * 1 * 1))
+        return x
+    def _init_weights(self):
+        for m in self.modules():
+            init_weights_(m)
+
+
+if __name__ == '__main__':
+    print(get_number_of_parameters(VGG_A()))
+    print(get_number_of_parameters(VGG_A_Light()))
+    print(get_number_of_parameters(VGG_A_Dropout()))
+    print(get_number_of_parameters(VGG_A_BatchNorm()))
\ No newline at end of file
diff --git a/VGG_BatchNorm/models/vgg_a_lr0.0001.pth b/VGG_BatchNorm/models/vgg_a_lr0.0001.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4cce77cac1114aa5fa6bd16cebcde8d3dc4cea11
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_a_lr0.0001.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:105171da7fd7bf680dd5d4c6d9484019a469849021e775edade244a84b139bfb
+size 39011786
diff --git a/VGG_BatchNorm/models/vgg_a_lr0.0001_step_grads.npy b/VGG_BatchNorm/models/vgg_a_lr0.0001_step_grads.npy
new file mode 100644
index 0000000000000000000000000000000000000000..0eea150ae98f23fc66a779819d45fb3cbb3a5b44
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_a_lr0.0001_step_grads.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40fd79ad6ea0064b44c91905f42549bac9f3e4439f2ac7b3e4f4b61681b7a81f
+size 31408
diff --git a/VGG_BatchNorm/models/vgg_a_lr0.0001_step_losses.npy b/VGG_BatchNorm/models/vgg_a_lr0.0001_step_losses.npy
new file mode 100644
index 0000000000000000000000000000000000000000..94e0ce2dd4f26dab4bc0f71c799676ed6bce9ebe
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_a_lr0.0001_step_losses.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc50830495cff83662ea2603a3b39b8f882f45341a6aa95ac8cc9a731c315196
+size 31408
diff --git a/VGG_BatchNorm/models/vgg_a_lr0.0005.pth b/VGG_BatchNorm/models/vgg_a_lr0.0005.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f3b7fdbaa22a4db6694633d1f31018ff113ee3e4
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_a_lr0.0005.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3db4e136d92dc94a1f8601d7700e922e466a049e91891840f0374d6d27c98db
+size 39011786
diff --git a/VGG_BatchNorm/models/vgg_a_lr0.0005_step_grads.npy b/VGG_BatchNorm/models/vgg_a_lr0.0005_step_grads.npy
new file mode 100644
index 0000000000000000000000000000000000000000..d5d0486a7ef302a6a3220e986215af84b62beda2
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_a_lr0.0005_step_grads.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5703f3db77174dc12448202f46dac32866fa7ce47383c961b0c0d85bfc4c89b7
+size 31408
diff --git a/VGG_BatchNorm/models/vgg_a_lr0.0005_step_losses.npy b/VGG_BatchNorm/models/vgg_a_lr0.0005_step_losses.npy
new file mode 100644
index 0000000000000000000000000000000000000000..19ea4b1c264b737ac18d49fb4880a0f7acc42f5b
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_a_lr0.0005_step_losses.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3838ee654dddc55efa6b63f65594901803a410f17e90e2d2402ba51127c94db
+size 31408
diff --git a/VGG_BatchNorm/models/vgg_a_lr0.001.pth b/VGG_BatchNorm/models/vgg_a_lr0.001.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0b628e9684a031e2570839ad834317b228c0b2af
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_a_lr0.001.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7bab30b087815d4470cb665e2aeadcb2c2c786f5f21746bcf07e93cef1e3669
+size 39011760
diff --git a/VGG_BatchNorm/models/vgg_a_lr0.001_step_grads.npy b/VGG_BatchNorm/models/vgg_a_lr0.001_step_grads.npy
new file mode 100644
index 0000000000000000000000000000000000000000..f6d8ecf087353ffc97ab87af0e34f9a077494079
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_a_lr0.001_step_grads.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68a558422f2f2eb6233e061ed0e24fea94a7525074255f8f91dd5d6ed6b2dd1c
+size 31408
diff --git a/VGG_BatchNorm/models/vgg_a_lr0.001_step_losses.npy b/VGG_BatchNorm/models/vgg_a_lr0.001_step_losses.npy
new file mode 100644
index 0000000000000000000000000000000000000000..f2c45d9c98f5434404d2447d422f407a64cd2fdd
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_a_lr0.001_step_losses.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7f74eebcff55116c216fbaca90a56280090d6a01ab7223cad91d8cb42c97747
+size 31408
diff --git a/VGG_BatchNorm/models/vgg_a_lr0.002.pth b/VGG_BatchNorm/models/vgg_a_lr0.002.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4fb0a3752bc1907b70bcd2be7b83c35f949d5697
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_a_lr0.002.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5e8453a024e60b3bb8e680b0aeed1d17bcf1545b2de4ca030f57513e0c5f7be
+size 39011760
diff --git a/VGG_BatchNorm/models/vgg_a_lr0.002_step_grads.npy b/VGG_BatchNorm/models/vgg_a_lr0.002_step_grads.npy
new file mode 100644
index 0000000000000000000000000000000000000000..d2295f0d5bed212c83cadd726bd67d8745283181
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_a_lr0.002_step_grads.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a22fc25871c1a180f423cc498b60f1083d07c2e45d605939889b5c90a35afcfa
+size 31408
diff --git a/VGG_BatchNorm/models/vgg_a_lr0.002_step_losses.npy b/VGG_BatchNorm/models/vgg_a_lr0.002_step_losses.npy
new file mode 100644
index 0000000000000000000000000000000000000000..8635ed4da9ee3b74dad76513bfe9be638e0e199e
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_a_lr0.002_step_losses.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7868e3df3d4ae5b25889df52f98ef5e4bb98dcd741f62667b4671b3e16ae981
+size 31408
diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.0001.pth b/VGG_BatchNorm/models/vgg_bn_lr0.0001.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4ed387e41b6a2805868919b6f4e4f6a8f86a1cb8
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_bn_lr0.0001.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9451b3372e6897dd96be45a1902a9629e8f313e9019162d8f9ce5251ffc542a3
+size 39068716
diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.0001_step_grads.npy b/VGG_BatchNorm/models/vgg_bn_lr0.0001_step_grads.npy
new file mode 100644
index 0000000000000000000000000000000000000000..487e006e5e8f8fdf80b8c85200e74222604e201b
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_bn_lr0.0001_step_grads.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8880316d60e582db12979390d79b6f5df7a1b0c3b31e90ff164de5f0930d0aef
+size 31408
diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.0001_step_losses.npy b/VGG_BatchNorm/models/vgg_bn_lr0.0001_step_losses.npy
new file mode 100644
index 0000000000000000000000000000000000000000..39125903e15767e1db180d7a258a6641dfae4234
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_bn_lr0.0001_step_losses.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a2ee1fe92633edf653d27d5139c882ffdd331319832aedf391a9d8f9a2c42e8
+size 31408
diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.0005.pth b/VGG_BatchNorm/models/vgg_bn_lr0.0005.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3c42ba42723734274400869801f86ce4546a1144
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_bn_lr0.0005.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17b108a6b387ba7f8dbaf5f8192cebd96e753d3b2eec071fdb9047ec18ac56af
+size 39068716
diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.0005_step_grads.npy b/VGG_BatchNorm/models/vgg_bn_lr0.0005_step_grads.npy
new file mode 100644
index 0000000000000000000000000000000000000000..022806d24048f2e3a75eb1e3ca2f5a52b97ec888
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_bn_lr0.0005_step_grads.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75477b2424055e4773105ed1c3e30b3f865263d93620cdb0ab8ea8cc7ab90e5b
+size 31408
diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.0005_step_losses.npy b/VGG_BatchNorm/models/vgg_bn_lr0.0005_step_losses.npy
new file mode 100644
index 0000000000000000000000000000000000000000..11b3269759d851bec462dc0a94c22597de6b583a
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_bn_lr0.0005_step_losses.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3261b9564be6ae816ec048ac8189aec7e16eb0b9ed6a31a4851356a19f09ce04
+size 31408
diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.001.pth b/VGG_BatchNorm/models/vgg_bn_lr0.001.pth
new file mode 100644
index 0000000000000000000000000000000000000000..51bad31a372d317f8a344a46c116ca5aed094c31
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_bn_lr0.001.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9765324e222a9d6ff7b51ae6b5f3f97a1e1cc99197654f906f8e7611b9b8d193
+size 39068650
diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.001_step_grads.npy b/VGG_BatchNorm/models/vgg_bn_lr0.001_step_grads.npy
new file mode 100644
index 0000000000000000000000000000000000000000..396d62a8670f1c611884aa321c3514bc906e2197
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_bn_lr0.001_step_grads.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a2224404b4ba0e30fb143ddbaee1976528671afe3b2f25a14499dc7dd63a87d
+size 31408
diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.001_step_losses.npy b/VGG_BatchNorm/models/vgg_bn_lr0.001_step_losses.npy
new file mode 100644
index 0000000000000000000000000000000000000000..05c5d8746c0d03d83998dfae8ccce157e62aee15
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_bn_lr0.001_step_losses.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50030e0c9e31a85a981d7b60d0094b758fa8efb37f89ca309ec7025cfa71787e
+size 31408
diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.002.pth b/VGG_BatchNorm/models/vgg_bn_lr0.002.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6db683741e511477d11759e9bc03b71a66b242f0
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_bn_lr0.002.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1488c03104e9de23b07a9deb22ef6afb1ba3b7f6aaa5dad9ba5a46a4c51082c4
+size 39068650
diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.002_step_grads.npy b/VGG_BatchNorm/models/vgg_bn_lr0.002_step_grads.npy
new file mode 100644
index 0000000000000000000000000000000000000000..57092ccccbeac1b9164e5dd1f36ce2e393333524
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_bn_lr0.002_step_grads.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ebf1d2594a3cd9783d29c71a69d124d00928c24ba50d143dfb58c72cd06f601b
+size 31408
diff --git a/VGG_BatchNorm/models/vgg_bn_lr0.002_step_losses.npy b/VGG_BatchNorm/models/vgg_bn_lr0.002_step_losses.npy
new file mode 100644
index 0000000000000000000000000000000000000000..11527236b3579f6d93ce0840ef0cb1fbac35e1be
--- /dev/null
+++ b/VGG_BatchNorm/models/vgg_bn_lr0.002_step_losses.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:215478b68ca6ea0590bc76f7353771a923a1f10422c5b9dfa022526d1ad0d41b
+size 31408
diff --git a/VGG_BatchNorm/utils/__init__.py b/VGG_BatchNorm/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d9731c28b513a4297ce41cea337733a6258eb14
--- /dev/null
+++ b/VGG_BatchNorm/utils/__init__.py
@@ -0,0 +1,4 @@
+'''
+Several utils, in particular for experiments
+'''
+from . import nn
\ No newline at end of file
diff --git a/VGG_BatchNorm/utils/__pycache__/__init__.cpython-310.pyc b/VGG_BatchNorm/utils/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ea57a0f5f86460d314f7e8be193acf9788313c9f
Binary files /dev/null and b/VGG_BatchNorm/utils/__pycache__/__init__.cpython-310.pyc differ
diff --git a/VGG_BatchNorm/utils/__pycache__/__init__.cpython-37.pyc b/VGG_BatchNorm/utils/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e25fdad670f50921969fc674905e3687f4b60bf6
Binary files /dev/null and b/VGG_BatchNorm/utils/__pycache__/__init__.cpython-37.pyc differ
diff --git a/VGG_BatchNorm/utils/__pycache__/nn.cpython-310.pyc b/VGG_BatchNorm/utils/__pycache__/nn.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..574e81e80af31a672a0437311a3902fc1a0d9664
Binary files /dev/null and b/VGG_BatchNorm/utils/__pycache__/nn.cpython-310.pyc differ
diff --git a/VGG_BatchNorm/utils/__pycache__/nn.cpython-37.pyc b/VGG_BatchNorm/utils/__pycache__/nn.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..05b0e61e89f08d724f5cee36e43d6482a7bb30f3
Binary files /dev/null and b/VGG_BatchNorm/utils/__pycache__/nn.cpython-37.pyc differ
diff --git a/VGG_BatchNorm/utils/nn.py b/VGG_BatchNorm/utils/nn.py
new file mode 100644
index 0000000000000000000000000000000000000000..3945cb8e0b96a005aa593a0ba8652a8e1b81ec58
--- /dev/null
+++ b/VGG_BatchNorm/utils/nn.py
@@ -0,0 +1,31 @@
+"""
+Utils for neural networks
+"""
+
+from torch import nn
+
+
+def init_weights_(m):
+    """
+    Initializes weights of m according to Xavier normal method.
+
+    :param m: module
+    :return:
+    """
+    if isinstance(m, nn.Conv2d):
+        nn.init.xavier_normal_(m.weight)
+        if m.bias is not None:
+            nn.init.zeros_(m.bias)
+
+    elif isinstance(m, nn.BatchNorm2d):
+        nn.init.ones_(m.weight)
+        nn.init.zeros_(m.bias)
+
+    elif isinstance(m, nn.BatchNorm1d):
+        nn.init.ones_(m.weight)
+        nn.init.zeros_(m.bias)
+
+    elif isinstance(m, nn.Linear):
+        nn.init.xavier_normal_(m.weight)
+        if m.bias is not None:
+            nn.init.zeros_(m.bias)
diff --git a/VGG_BatchNorm/vgg_bn_compare.png b/VGG_BatchNorm/vgg_bn_compare.png
new file mode 100644
index 0000000000000000000000000000000000000000..b1fb21faf3a58aa4cabc0b0e929535e3f416048f
Binary files /dev/null and b/VGG_BatchNorm/vgg_bn_compare.png differ
diff --git a/VGG_BatchNorm/vgg_bn_loss_landscape.png b/VGG_BatchNorm/vgg_bn_loss_landscape.png
new file mode 100644
index 0000000000000000000000000000000000000000..064534761285b05f22bfb9432f870cb408bad697
Binary files /dev/null and b/VGG_BatchNorm/vgg_bn_loss_landscape.png differ
diff --git a/VGG_BatchNorm/vgg_grad_norm_curve_lr0.0001.png b/VGG_BatchNorm/vgg_grad_norm_curve_lr0.0001.png
new file mode 100644
index 0000000000000000000000000000000000000000..a8b36ecd26a3105ed63f46fa91b0b3a1e5ab95cd
Binary files /dev/null and b/VGG_BatchNorm/vgg_grad_norm_curve_lr0.0001.png differ
diff --git a/VGG_BatchNorm/vgg_grad_norm_curve_lr0.0005.png b/VGG_BatchNorm/vgg_grad_norm_curve_lr0.0005.png
new file mode 100644
index 0000000000000000000000000000000000000000..71032927e92f065d98399e65a8af937a89ff58fd
Binary files /dev/null and b/VGG_BatchNorm/vgg_grad_norm_curve_lr0.0005.png differ
diff --git a/VGG_BatchNorm/vgg_grad_norm_curve_lr0.001.png b/VGG_BatchNorm/vgg_grad_norm_curve_lr0.001.png
new file mode 100644
index 0000000000000000000000000000000000000000..4dc03fb9186699e3e50ed0bfff758000bcd7368c
Binary files /dev/null and b/VGG_BatchNorm/vgg_grad_norm_curve_lr0.001.png differ
diff --git a/VGG_BatchNorm/vgg_grad_norm_curve_lr0.002.png b/VGG_BatchNorm/vgg_grad_norm_curve_lr0.002.png
new file mode 100644
index 0000000000000000000000000000000000000000..84ab1e2fed855b42dbebafff3bb2d67f05f10b69
Binary files /dev/null and b/VGG_BatchNorm/vgg_grad_norm_curve_lr0.002.png differ
diff --git a/VGG_BatchNorm/vgg_loss_curve_lr0.0001.png b/VGG_BatchNorm/vgg_loss_curve_lr0.0001.png
new file mode 100644
index 0000000000000000000000000000000000000000..b75aead56d2ecfd80e1290d7d64e3da5bacbeafb
Binary files /dev/null and b/VGG_BatchNorm/vgg_loss_curve_lr0.0001.png differ
diff --git a/VGG_BatchNorm/vgg_loss_curve_lr0.0005.png b/VGG_BatchNorm/vgg_loss_curve_lr0.0005.png
new file mode 100644
index 0000000000000000000000000000000000000000..47c45e3ec8cd63ca056184917ae24fa752238178
Binary files /dev/null and b/VGG_BatchNorm/vgg_loss_curve_lr0.0005.png differ
diff --git a/VGG_BatchNorm/vgg_loss_curve_lr0.001.png b/VGG_BatchNorm/vgg_loss_curve_lr0.001.png
new file mode 100644
index 0000000000000000000000000000000000000000..645a2a2825b50fef42dcaafabae83bbedeaddee7
Binary files /dev/null and b/VGG_BatchNorm/vgg_loss_curve_lr0.001.png differ
diff --git a/VGG_BatchNorm/vgg_loss_curve_lr0.002.png b/VGG_BatchNorm/vgg_loss_curve_lr0.002.png
new file mode 100644
index 0000000000000000000000000000000000000000..e26101d1c6ce67b66f93745c5b29451859e7507a
Binary files /dev/null and b/VGG_BatchNorm/vgg_loss_curve_lr0.002.png differ
diff --git a/VGG_BatchNorm/vgg_max_grad_diff_lr0.0001.png b/VGG_BatchNorm/vgg_max_grad_diff_lr0.0001.png
new file mode 100644
index 0000000000000000000000000000000000000000..31ae8e2b3efc95342e9646fff42740159dc3d7e1
Binary files /dev/null and b/VGG_BatchNorm/vgg_max_grad_diff_lr0.0001.png differ
diff --git a/VGG_BatchNorm/vgg_max_grad_diff_lr0.0005.png b/VGG_BatchNorm/vgg_max_grad_diff_lr0.0005.png
new file mode 100644
index 0000000000000000000000000000000000000000..88ee29de2cd349fe9d777ad303e6e29c8f92e2ce
Binary files /dev/null and b/VGG_BatchNorm/vgg_max_grad_diff_lr0.0005.png differ
diff --git a/VGG_BatchNorm/vgg_max_grad_diff_lr0.001.png b/VGG_BatchNorm/vgg_max_grad_diff_lr0.001.png
new file mode 100644
index 0000000000000000000000000000000000000000..08805778a252ce3ba08b63cdcef3443d58954df0
Binary files /dev/null and b/VGG_BatchNorm/vgg_max_grad_diff_lr0.001.png differ
diff --git a/VGG_BatchNorm/vgg_max_grad_diff_lr0.002.png b/VGG_BatchNorm/vgg_max_grad_diff_lr0.002.png
new file mode 100644
index 0000000000000000000000000000000000000000..e3b597355852798857601f976a7f04296c0ee8ba
Binary files /dev/null and b/VGG_BatchNorm/vgg_max_grad_diff_lr0.002.png differ
diff --git a/__pycache__/mymodel.cpython-310.pyc b/__pycache__/mymodel.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..615db8a57ddbbb3ba4653b06ecb14bbecd34bd01
Binary files /dev/null and b/__pycache__/mymodel.cpython-310.pyc differ
diff --git a/best_model.pth b/best_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..79358c4a96a2fa41e4233e95f5795ac5ea90f3ab
--- /dev/null
+++ b/best_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6382bea60097cd0a85ec970df6cea916256ac88a59298b9442b0132955f2ae4f
+size 2187810
diff --git a/data/cifar-10-batches-py/batches.meta b/data/cifar-10-batches-py/batches.meta
new file mode 100644
index 0000000000000000000000000000000000000000..4467a6ec2e886a9f14f25e31776fb0152d8ac64a
Binary files /dev/null and b/data/cifar-10-batches-py/batches.meta differ
diff --git a/data/cifar-10-batches-py/data_batch_1 b/data/cifar-10-batches-py/data_batch_1
new file mode 100644
index 0000000000000000000000000000000000000000..1b9ff789bbf08b02df98fea255e1343119eaa8d6
--- /dev/null
+++ b/data/cifar-10-batches-py/data_batch_1
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54636561a3ce25bd3e19253c6b0d8538147b0ae398331ac4a2d86c6d987368cd
+size 31035704
diff --git a/data/cifar-10-batches-py/data_batch_2 b/data/cifar-10-batches-py/data_batch_2
new file mode 100644
index 0000000000000000000000000000000000000000..da8acc0d33edbd9889f8a11226e2be1f53bdf1f5
--- /dev/null
+++ b/data/cifar-10-batches-py/data_batch_2
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:766b2cef9fbc745cf056b3152224f7cf77163b330ea9a15f9392beb8b89bc5a8
+size 31035320
diff --git a/data/cifar-10-batches-py/data_batch_3 b/data/cifar-10-batches-py/data_batch_3
new file mode 100644
index 0000000000000000000000000000000000000000..e98eb3e45d5a9778ad227d2703c7d4b1290a5d64
--- /dev/null
+++ b/data/cifar-10-batches-py/data_batch_3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f00d98ebfb30b3ec0ad19f9756dc2630b89003e10525f5e148445e82aa6a1f9
+size 31035999
diff --git a/data/cifar-10-batches-py/data_batch_4 b/data/cifar-10-batches-py/data_batch_4
new file mode 100644
index 0000000000000000000000000000000000000000..9b81f87873afbf46bdda4fa1ee82434857a58ecc
--- /dev/null
+++ b/data/cifar-10-batches-py/data_batch_4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f7bb240661948b8f4d53e36ec720d8306f5668bd0071dcb4e6c947f78e9682b
+size 31035696
diff --git a/data/cifar-10-batches-py/data_batch_5 b/data/cifar-10-batches-py/data_batch_5
new file mode 100644
index 0000000000000000000000000000000000000000..0428cfda4f34db9278991559bcbc322d4f79e6ac
--- /dev/null
+++ b/data/cifar-10-batches-py/data_batch_5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d91802434d8376bbaeeadf58a737e3a1b12ac839077e931237e0dcd43adcb154
+size 31035623
diff --git a/data/cifar-10-batches-py/readme.html b/data/cifar-10-batches-py/readme.html
new file mode 100644
index 0000000000000000000000000000000000000000..e377adef45c85dc91051edf2dee72c1d4d57732c
--- /dev/null
+++ b/data/cifar-10-batches-py/readme.html
@@ -0,0 +1 @@
+<meta HTTP-EQUIV="REFRESH" content="0; url=http://www.cs.toronto.edu/~kriz/cifar.html">
diff --git a/data/cifar-10-batches-py/test_batch b/data/cifar-10-batches-py/test_batch
new file mode 100644
index 0000000000000000000000000000000000000000..7cb1691b21c2eaf98ca33dc302ab6df2c2984121
--- /dev/null
+++ b/data/cifar-10-batches-py/test_batch
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f53d8d457504f7cff4ea9e021afcf0e0ad8e24a91f3fc42091b8adef61157831
+size 31035526
diff --git a/data/cifar-10-python.tar.gz b/data/cifar-10-python.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..3026cc501ad5b158f4de147d06c95ebbc112ea48
--- /dev/null
+++ b/data/cifar-10-python.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce
+size 170498071
diff --git a/fig/all_filters_conv1.png b/fig/all_filters_conv1.png
new file mode 100644
index 0000000000000000000000000000000000000000..5b20480bdd8338ea69b84a49113d47ce468961b9
Binary files /dev/null and b/fig/all_filters_conv1.png differ
diff --git a/fig/all_filters_conv2.png b/fig/all_filters_conv2.png
new file mode 100644
index 0000000000000000000000000000000000000000..fc066550dff84eb811ad13afa9890fe439afef16
Binary files /dev/null and b/fig/all_filters_conv2.png differ
diff --git a/fig/feature_maps.png b/fig/feature_maps.png
new file mode 100644
index 0000000000000000000000000000000000000000..e7653e7093bde956985c378b0719baa652b3f9d1
Binary files /dev/null and b/fig/feature_maps.png differ
diff --git a/fig/feature_maps_conv1.png b/fig/feature_maps_conv1.png
new file mode 100644
index 0000000000000000000000000000000000000000..e9bacab659d428479f885969bbe447bbc802c6b4
--- /dev/null
+++ b/fig/feature_maps_conv1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa306749b1897efefd38d982852c9f562e84fe6d1da7db764e7869679cb2f333
+size 101683
diff --git a/fig/feature_maps_conv2.png b/fig/feature_maps_conv2.png
new file mode 100644
index 0000000000000000000000000000000000000000..4ab54a6209d889a1368a34aa4b745ecf2fa00ba6
Binary files /dev/null and b/fig/feature_maps_conv2.png differ
diff --git a/fig/loss_curve.png b/fig/loss_curve.png
new file mode 100644
index 0000000000000000000000000000000000000000..bb3778e02353d1892b27ebceb7cd91b1bf1c59c6
Binary files /dev/null and b/fig/loss_curve.png differ
diff --git a/fig/loss_landscape.png b/fig/loss_landscape.png
new file mode 100644
index 0000000000000000000000000000000000000000..b6e495727a28245152617a99868e4aa85d05c042
Binary files /dev/null and b/fig/loss_landscape.png differ
diff --git a/fig/my_filters.png b/fig/my_filters.png
new file mode 100644
index 0000000000000000000000000000000000000000..83392e1e9847f35c5e67640066b94e17555371fa
Binary files /dev/null and b/fig/my_filters.png differ
diff --git a/init.py b/init.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ae957a7f11b9391d87a2e7d7ddbf311d2f66b3e
--- /dev/null
+++ b/init.py
@@ -0,0 +1 @@
+# This file makes the directory a Python package.
diff --git a/model/best_model_1.pth b/model/best_model_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..993d41e0af5a602c2e216caca8cf9b1f752ca8d6
--- /dev/null
+++ b/model/best_model_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ab2eecf0175932fa74fd2806269b07f355bd39b9b5b30248701220840a20fb2
+size 2188002
diff --git a/model/best_model_20250523-152515.pth b/model/best_model_20250523-152515.pth
new file mode 100644
index 0000000000000000000000000000000000000000..13b37a1a8d026a9f08d0666fa7d12d593c15afaa
--- /dev/null
+++ b/model/best_model_20250523-152515.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff90946d79f3a15174eb9c0d54a709dab5ee8a9fa36c83c15c1deeb96963ea21
+size 2183972
diff --git a/model/best_model_20250523-153037.pth b/model/best_model_20250523-153037.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9ca266031d111389f409c41802b0e5becaea761b
--- /dev/null
+++ b/model/best_model_20250523-153037.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1bdfe0cc12793c7f534e58982a100367015620d7a01342450dfe8171797787f
+size 2188418
diff --git a/model/best_model_20250523-153308.pth b/model/best_model_20250523-153308.pth
new file mode 100644
index 0000000000000000000000000000000000000000..05aeb2b32e70aa638f4e436a34b0eadb1bca2bf7
--- /dev/null
+++ b/model/best_model_20250523-153308.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7dd89c66ec30ec81a73328f5145baa8698678faafc8f69cf784981e185f830d9
+size 2183972
diff --git a/model/best_model_20250523-153543.pth b/model/best_model_20250523-153543.pth
new file mode 100644
index 0000000000000000000000000000000000000000..29065fdbe80b7cd07d8cc8a0814140a8e4d19f29
--- /dev/null
+++ b/model/best_model_20250523-153543.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5cd33c68761affe3d97e99ac8ce63a65a5a8808767bef7f6e431f3518d47c1e1
+size 2188418
diff --git a/model/best_model_20250523-153830.pth b/model/best_model_20250523-153830.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0dfdde827e0b30154feabef03fcb743df81e5767
--- /dev/null
+++ b/model/best_model_20250523-153830.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9f8bccbb1f3dd266740be283c53b88fef4f376f93fa273c5f68965095302026
+size 2188418
diff --git a/model/best_model_20250523-154144.pth b/model/best_model_20250523-154144.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a231977646d83b989c67ac4c6d022afa5d47f1c3
--- /dev/null
+++ b/model/best_model_20250523-154144.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2327ea8eeff1a51e1171ac893dceeef751c2144e59389cd344e6cddc4e1097f5
+size 2188418
diff --git a/model/best_model_20250524-055438.pth b/model/best_model_20250524-055438.pth
new file mode 100644
index 0000000000000000000000000000000000000000..337bd95abd57ab684758dbee65019ce3ad679ed6
--- /dev/null
+++ b/model/best_model_20250524-055438.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f854a5352e4466a4821627e8e36e01d71f199e2519add0b4d27ed133c8bd6c0
+size 2188418
diff --git a/model/best_model_20250524-060548.pth b/model/best_model_20250524-060548.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a9815d964cad16dc38b330c51d4fc54f8ca00824
--- /dev/null
+++ b/model/best_model_20250524-060548.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36dad5eeb4b8040386f5d11db8751a4213b3d00744f8624fa9b4ab8fa4a8ce19
+size 4907522
diff --git a/model/best_model_20250524-061343.pth b/model/best_model_20250524-061343.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cce4f154a3deaea546fbdef58d2d32d7887000e6
--- /dev/null
+++ b/model/best_model_20250524-061343.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68ef7e771b0909b839561713e64790ddde49c7c6559b4e97bca150368d59b7b3
+size 2325634
diff --git a/model/best_model_20250524-062209.pth b/model/best_model_20250524-062209.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1695c5b94604d3f6359dd7b6ccd4c7147e4e5351
--- /dev/null
+++ b/model/best_model_20250524-062209.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e96f5650e0c676b24336dd999c5afd47ad066d62849042358a9244757179a3b8
+size 2490274
diff --git a/model/best_model_20250524-062621.pth b/model/best_model_20250524-062621.pth
new file mode 100644
index 0000000000000000000000000000000000000000..506c0162c48c9093d9cd97e73b470f5f85087873
--- /dev/null
+++ b/model/best_model_20250524-062621.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41bce6d604ecf674e466fea87aba7b3f5b9b20048b5793270bc0f428130ffb5c
+size 12534946
diff --git a/model/best_model_20250524-065041.pth b/model/best_model_20250524-065041.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ea406d8639fc804276255645ee5432186b1e7c28
--- /dev/null
+++ b/model/best_model_20250524-065041.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bed43b184195b492c5180047cf7fc88c5da3bbb6dcbff7b8dbfc767bc8a39ec
+size 12534946
diff --git a/model/best_model_20250524-071202.pth b/model/best_model_20250524-071202.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9b97f0bec0e49e0553cfed07968827ad9047f9a3
--- /dev/null
+++ b/model/best_model_20250524-071202.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34a33d28cd512efa69a841d209f178429cbf9856fd6dba29cb932fc25ed479ee
+size 2188418
diff --git a/model/best_model_20250524-071415.pth b/model/best_model_20250524-071415.pth
new file mode 100644
index 0000000000000000000000000000000000000000..39170b7092e13cd5d218e0a34d86428e906e0401
--- /dev/null
+++ b/model/best_model_20250524-071415.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:691524c379e256802b670a857e3a7ecb8d79d6d809b313c6817c05b9c3dd545b
+size 2188418
diff --git a/model/best_model_20250524-073418.pth b/model/best_model_20250524-073418.pth
new file mode 100644
index 0000000000000000000000000000000000000000..fdd6811b1c67a5e03a0d70da557761c629d95cf6
--- /dev/null
+++ b/model/best_model_20250524-073418.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:047a0db3710fb3c1d28e5ca6b76dbd116e9a337b1142c09cb72437c566853892
+size 2188418
diff --git a/model/best_model_20250524-073654.pth b/model/best_model_20250524-073654.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b1c2ac925cf047f0f4c1924ecfd95af6fb715088
--- /dev/null
+++ b/model/best_model_20250524-073654.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6639d12aa73241e276e62d7c063a4067b90572bab124dcbce72e19dbf54e632f
+size 2188418
diff --git a/model/best_model_20250524-073909.pth b/model/best_model_20250524-073909.pth
new file mode 100644
index 0000000000000000000000000000000000000000..25a5d01c6c8cb83c4f8b1f4716601891b8161178
--- /dev/null
+++ b/model/best_model_20250524-073909.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31ed0b2801cec50c3856d5b43d5661cb25bc5fc4a3c9f49fa43e27a61b976dab
+size 2188418
diff --git a/model/best_model_20250606-074637.pth b/model/best_model_20250606-074637.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5c725758e994f6283d2d340586763849c6b3144f
--- /dev/null
+++ b/model/best_model_20250606-074637.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34eefc6f51475e11aa8201e5875501a6ebb645a44abfeb295811caee9af56b89
+size 2188418
diff --git a/mymodel.py b/mymodel.py
new file mode 100644
index 0000000000000000000000000000000000000000..dde6bf784f6ba89f7e91c4487103bf5f372f9172
--- /dev/null
+++ b/mymodel.py
@@ -0,0 +1,48 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class MyCIFAR10Net(nn.Module):
+    def __init__(self, num_classes=10, use_batchnorm=True, use_dropout=False, activation='relu'):
+        super(MyCIFAR10Net, self).__init__()
+        # Example: 2 conv layers, pooling, batchnorm, dropout, fully connected
+        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
+        self.bn1 = nn.BatchNorm2d(32) if use_batchnorm else nn.Identity()
+        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
+        self.bn2 = nn.BatchNorm2d(64) if use_batchnorm else nn.Identity()
+        self.pool = nn.MaxPool2d(2, 2)
+        self.dropout = nn.Dropout(0.25) if use_dropout else nn.Identity()
+        self.fc1 = nn.Linear(64 * 8 * 8, 128)
+        self.fc2 = nn.Linear(128, num_classes)
+        self.activation = activation
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self._activate(x)
+        x = self.pool(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self._activate(x)
+        x = self.pool(x)
+        x = self.dropout(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc1(x)
+        x = self._activate(x)
+        x = self.dropout(x)
+        x = self.fc2(x)
+        return x
+
+    def _activate(self, x):
+        if self.activation == 'relu':
+            return F.relu(x)
+        elif self.activation == 'leakyrelu':
+            return F.leaky_relu(x)
+        elif self.activation == 'tanh':
+            return torch.tanh(x)
+        elif self.activation == 'sigmoid':
+            return torch.sigmoid(x)
+        else:
+            raise ValueError(f"Unknown activation: {self.activation}")
+
+# You can add more model variants or residual blocks here as needed.
diff --git a/test.py b/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..c82a1305634b32d5eda160f0c0758fd6453e6975
--- /dev/null
+++ b/test.py
@@ -0,0 +1,33 @@
+import torch
+import torchvision
+import torchvision.transforms as transforms
+from torch.utils.data import DataLoader
+from mymodel import MyCIFAR10Net
+
+# Data loading (test set)
+transform = transforms.Compose([
+    transforms.ToTensor(),
+    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+])
+testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
+testloader = DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)
+
+# Load model
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = MyCIFAR10Net(num_classes=10, use_batchnorm=True, use_dropout=True, activation='relu').to(device)
+model.load_state_dict(torch.load('model/best_model_1.pth', map_location=device))
+model.eval()
+
+# Evaluate
+correct = 0
+total = 0
+with torch.no_grad():
+    for inputs, labels in testloader:
+        inputs, labels = inputs.to(device), labels.to(device)
+        outputs = model(inputs)
+        _, predicted = torch.max(outputs.data, 1)
+        total += labels.size(0)
+        correct += (predicted == labels).sum().item()
+
+print(f'Test Accuracy: {100 * correct / total:.2f}%')
+print(f'Test Error: {100 - 100 * correct / total:.2f}%')
diff --git a/train.py b/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd5d8d5a9c8eaa38eb77c7038ef1a9418a7c74b1
--- /dev/null
+++ b/train.py
@@ -0,0 +1,209 @@
+import torch
+import torchvision
+import torchvision.transforms as transforms
+from torch.utils.data import DataLoader, random_split
+from mymodel import MyCIFAR10Net
+import torch.optim as optim
+import torch.nn as nn
+import matplotlib.pyplot as plt
+import numpy as np
+
+# 1. Data loading
+transform = transforms.Compose([
+    transforms.ToTensor(),
+    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+])
+trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
+train_size = int(0.8 * len(trainset))
+valid_size = len(trainset) - train_size
+train_subset, valid_subset = random_split(trainset, [train_size, valid_size])
+trainloader = DataLoader(train_subset, batch_size=128, shuffle=True, num_workers=2)
+valloader = DataLoader(valid_subset, batch_size=128, shuffle=False, num_workers=2)
+testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
+testloader = DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)
+
+# 2. Model, loss, optimizer
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = MyCIFAR10Net(num_classes=10, use_batchnorm=True, use_dropout=True, activation='leakyrelu').to(device)
+loss_fn = nn.CrossEntropyLoss()  # Try different loss functions here
+optimizer = optim.Adam(model.parameters(), lr=0.001)  # Try different optimizers here
+# 切换优化器
+# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
+# optimizer = optim.RMSprop(model.parameters(), lr=0.001, weight_decay=1e-4)
+
+epoch_losses = []
+
+def train(num_epochs=10):
+    for epoch in range(num_epochs):
+        model.train()
+        running_loss = 0.0
+        for i, (inputs, labels) in enumerate(trainloader):
+            inputs, labels = inputs.to(device), labels.to(device)
+            optimizer.zero_grad()
+            outputs = model(inputs)
+            # loss = loss_fn(outputs, labels)
+
+            l2_lambda = 1e-4  # L2正则化强度
+            l2_reg = torch.tensor(0., device=device)
+            for param in model.parameters():
+                l2_reg += torch.norm(param, 2)
+            loss = loss_fn(outputs, labels) + l2_lambda * l2_reg
+            loss.backward()
+            optimizer.step()
+            running_loss += loss.item()
+        avg_loss = running_loss/len(trainloader)
+        print(f"Epoch {epoch+1}, Loss: {avg_loss:.4f}")
+        epoch_losses.append(avg_loss)
+        validate()
+    # Save best model if needed
+    import time
+    time_stamp = time.strftime("%Y%m%d-%H%M%S")
+    path=f'model/best_model_{time_stamp}.pth'
+    torch.save(model.state_dict(), path)
+    # 保存loss曲线
+    plt.figure()
+    plt.plot(range(1, len(epoch_losses)+1), epoch_losses, marker='o')
+    plt.xlabel('Epoch')
+    plt.ylabel('Loss')
+    plt.title('Training Loss per Epoch')
+    plt.savefig('loss_curve.png')
+    plt.close()
+
+def validate():
+    model.eval()
+    correct = 0
+    total = 0
+    with torch.no_grad():
+        for inputs, labels in valloader:
+            inputs, labels = inputs.to(device), labels.to(device)
+            outputs = model(inputs)
+            _, predicted = torch.max(outputs.data, 1)
+            total += labels.size(0)
+            correct += (predicted == labels).sum().item()
+    print(f'Validation Accuracy: {100 * correct / total:.2f}%')
+
+def save_all_conv_filters(model, filename='all_filters.png', layer='conv1'):
+    if layer == 'conv1':
+        filters = model.conv1.weight.data.clone().cpu()
+    elif layer == 'conv2':
+        filters = model.conv2.weight.data.clone().cpu()
+    else:
+        raise ValueError("layer must be 'conv1' or 'conv2'")
+    num_filters = filters.shape[0]
+    ncols = 8
+    nrows = (num_filters + ncols - 1) // ncols
+    fig, axes = plt.subplots(nrows, ncols, figsize=(ncols*2, nrows*2))
+    for i in range(num_filters):
+        r, c = divmod(i, ncols)
+        f = filters[i]
+        f_min, f_max = f.min(), f.max()
+        f = (f - f_min) / (f_max - f_min)
+        if f.shape[0] == 3:  # RGB
+            axes[r, c].imshow(f.permute(1, 2, 0))
+        else:  # 单通道
+            axes[r, c].imshow(f[0], cmap='gray')
+        axes[r, c].axis('off')
+    for i in range(num_filters, nrows * ncols):
+        r, c = divmod(i, ncols)
+        axes[r, c].axis('off')
+    plt.tight_layout()
+    plt.savefig(filename)
+    plt.close()
+
+def visualize_all_feature_maps(model, image, filename='feature_maps.png', after='conv1'):
+    model.eval()
+    with torch.no_grad():
+        x = image.unsqueeze(0).to(next(model.parameters()).device)
+        x = model.conv1(x)
+        x = model.bn1(x)
+        x = model._activate(x)
+        if after == 'conv2':
+            x = model.pool(x)
+            x = model.conv2(x)
+            x = model.bn2(x)
+            x = model._activate(x)
+        feature_maps = x.cpu().squeeze(0)
+        num_maps = feature_maps.shape[0]
+        ncols = 8
+        nrows = (num_maps + ncols - 1) // ncols
+        fig, axes = plt.subplots(nrows, ncols, figsize=(ncols*2, nrows*2))
+        for i in range(num_maps):
+            r, c = divmod(i, ncols)
+            fmap = feature_maps[i]
+            fmap_min, fmap_max = fmap.min(), fmap.max()
+            fmap = (fmap - fmap_min) / (fmap_max - fmap_min)
+            axes[r, c].imshow(fmap, cmap='viridis')
+            axes[r, c].axis('off')
+        for i in range(num_maps, nrows * ncols):
+            r, c = divmod(i, ncols)
+            axes[r, c].axis('off')
+        plt.tight_layout()
+        plt.savefig(filename)
+        plt.close()
+
+def plot_loss_landscape(model, dataloader, loss_fn, steps=20, alpha=0.5):
+    w = model.fc1.weight.data.clone()
+    direction1 = torch.randn_like(w)
+    direction2 = torch.randn_like(w)
+    losses = np.zeros((steps, steps))
+    device = next(model.parameters()).device
+    for i, a in enumerate(np.linspace(-alpha, alpha, steps)):
+        for j, b in enumerate(np.linspace(-alpha, alpha, steps)):
+            model.fc1.weight.data = w + a * direction1 + b * direction2
+            total_loss = 0
+            count = 0
+            for inputs, labels in dataloader:
+                inputs, labels = inputs.to(device), labels.to(device)
+                outputs = model(inputs)
+                loss = loss_fn(outputs, labels)
+                total_loss += loss.item()
+                count += 1
+                if count > 2:  # 只用少量batch加速
+                    break
+            losses[i, j] = total_loss / count
+    model.fc1.weight.data = w  # 恢复原权重
+    plt.figure(figsize=(6,5))
+    plt.contourf(losses, levels=50, cmap='viridis')
+    plt.colorbar()
+    plt.title('Loss Landscape (fc1 weight)')
+    plt.xlabel('Direction 1')
+    plt.ylabel('Direction 2')
+    plt.savefig('loss_landscape.png')
+    plt.close()
+
+if __name__ == "__main__":
+    train(num_epochs=10)
+    # model.eval()
+    # save_conv1_filters(model)
+
+    # 加载已有模型
+    # model.load_state_dict(torch.load('best_model.pth', map_location=device))
+    # model.eval()
+
+    # # 保存所有卷积核
+    # save_all_conv_filters(model, filename='all_filters_conv1.png', layer='conv1')
+    # save_all_conv_filters(model, filename='all_filters_conv2.png', layer='conv2')
+
+    # # 取一张验证集图片
+    # sample_img, _ = next(iter(valloader))
+
+    # # 可视化feature map
+    # visualize_all_feature_maps(model, sample_img[0], filename='feature_maps_conv1.png', after='conv1')
+    # visualize_all_feature_maps(model, sample_img[0], filename='feature_maps_conv2.png', after='conv2')
+
+    # # Loss landscape visualization
+    # plot_loss_landscape(model, valloader, loss_fn)
+
+    # Evaluate on test set after training
+    correct = 0
+    total = 0
+    with torch.no_grad():
+        for inputs, labels in testloader:
+            inputs, labels = inputs.to(device), labels.to(device)
+            outputs = model(inputs)
+            _, predicted = torch.max(outputs.data, 1)
+            total += labels.size(0)
+            correct += (predicted == labels).sum().item()
+
+    print(f'Test Accuracy: {100 * correct / total:.2f}%')
+    print(f'Test Error: {100 - 100 * correct / total:.2f}%')
\ No newline at end of file
diff --git a/visualize.py b/visualize.py
new file mode 100644
index 0000000000000000000000000000000000000000..734e3351761bd23f97d1259677b2b8b3a8a649ce
--- /dev/null
+++ b/visualize.py
@@ -0,0 +1,27 @@
+import torch
+import matplotlib.pyplot as plt
+from mymodel import MyCIFAR10Net
+
+# Example: Visualize first conv layer filters
+
+def visualize_filters(model_path='best_model.pth', save_path='filters.png'):
+    model = MyCIFAR10Net(num_classes=10)
+    model.load_state_dict(torch.load(model_path))
+    conv1_weights = model.conv1.weight.data.cpu()
+    num_filters = conv1_weights.shape[0]
+    fig, axes = plt.subplots(1, num_filters, figsize=(num_filters*2, 2))
+    for i in range(num_filters):
+        ax = axes[i]
+        # Normalize to [0,1] for visualization
+        w = conv1_weights[i]
+        w = (w - w.min()) / (w.max() - w.min())
+        ax.imshow(w.permute(1,2,0))
+        ax.axis('off')
+    plt.tight_layout()
+    plt.savefig(save_path)
+    plt.show()
+
+# You can add more visualization functions (e.g., loss landscape, feature maps, etc.)
+
+if __name__ == "__main__":
+    visualize_filters(save_path='my_filters.png')