Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +7 -0
- VGG_BatchNorm/.DS_Store +0 -0
- VGG_BatchNorm/VGG_Loss_Landscape.py +256 -0
- VGG_BatchNorm/data/__init__.py +5 -0
- VGG_BatchNorm/data/__pycache__/__init__.cpython-310.pyc +0 -0
- VGG_BatchNorm/data/__pycache__/__init__.cpython-37.pyc +0 -0
- VGG_BatchNorm/data/__pycache__/loaders.cpython-310.pyc +0 -0
- VGG_BatchNorm/data/__pycache__/loaders.cpython-37.pyc +0 -0
- VGG_BatchNorm/data/cifar-10-python.tar.gz +3 -0
- VGG_BatchNorm/data/loaders.py +53 -0
- VGG_BatchNorm/loss.txt +5 -0
- VGG_BatchNorm/models/__init__.py +5 -0
- VGG_BatchNorm/models/__pycache__/__init__.cpython-310.pyc +0 -0
- VGG_BatchNorm/models/__pycache__/__init__.cpython-37.pyc +0 -0
- VGG_BatchNorm/models/__pycache__/vgg.cpython-310.pyc +0 -0
- VGG_BatchNorm/models/__pycache__/vgg.cpython-37.pyc +0 -0
- VGG_BatchNorm/models/vgg.py +243 -0
- VGG_BatchNorm/models/vgg_a_lr0.0001.pth +3 -0
- VGG_BatchNorm/models/vgg_a_lr0.0001_step_grads.npy +3 -0
- VGG_BatchNorm/models/vgg_a_lr0.0001_step_losses.npy +3 -0
- VGG_BatchNorm/models/vgg_a_lr0.0005.pth +3 -0
- VGG_BatchNorm/models/vgg_a_lr0.0005_step_grads.npy +3 -0
- VGG_BatchNorm/models/vgg_a_lr0.0005_step_losses.npy +3 -0
- VGG_BatchNorm/models/vgg_a_lr0.001.pth +3 -0
- VGG_BatchNorm/models/vgg_a_lr0.001_step_grads.npy +3 -0
- VGG_BatchNorm/models/vgg_a_lr0.001_step_losses.npy +3 -0
- VGG_BatchNorm/models/vgg_a_lr0.002.pth +3 -0
- VGG_BatchNorm/models/vgg_a_lr0.002_step_grads.npy +3 -0
- VGG_BatchNorm/models/vgg_a_lr0.002_step_losses.npy +3 -0
- VGG_BatchNorm/models/vgg_bn_lr0.0001.pth +3 -0
- VGG_BatchNorm/models/vgg_bn_lr0.0001_step_grads.npy +3 -0
- VGG_BatchNorm/models/vgg_bn_lr0.0001_step_losses.npy +3 -0
- VGG_BatchNorm/models/vgg_bn_lr0.0005.pth +3 -0
- VGG_BatchNorm/models/vgg_bn_lr0.0005_step_grads.npy +3 -0
- VGG_BatchNorm/models/vgg_bn_lr0.0005_step_losses.npy +3 -0
- VGG_BatchNorm/models/vgg_bn_lr0.001.pth +3 -0
- VGG_BatchNorm/models/vgg_bn_lr0.001_step_grads.npy +3 -0
- VGG_BatchNorm/models/vgg_bn_lr0.001_step_losses.npy +3 -0
- VGG_BatchNorm/models/vgg_bn_lr0.002.pth +3 -0
- VGG_BatchNorm/models/vgg_bn_lr0.002_step_grads.npy +3 -0
- VGG_BatchNorm/models/vgg_bn_lr0.002_step_losses.npy +3 -0
- VGG_BatchNorm/utils/__init__.py +4 -0
- VGG_BatchNorm/utils/__pycache__/__init__.cpython-310.pyc +0 -0
- VGG_BatchNorm/utils/__pycache__/__init__.cpython-37.pyc +0 -0
- VGG_BatchNorm/utils/__pycache__/nn.cpython-310.pyc +0 -0
- VGG_BatchNorm/utils/__pycache__/nn.cpython-37.pyc +0 -0
- VGG_BatchNorm/utils/nn.py +31 -0
- VGG_BatchNorm/vgg_bn_compare.png +0 -0
- VGG_BatchNorm/vgg_bn_loss_landscape.png +0 -0
- VGG_BatchNorm/vgg_grad_norm_curve_lr0.0001.png +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
data/cifar-10-batches-py/data_batch_1 filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
data/cifar-10-batches-py/data_batch_2 filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
data/cifar-10-batches-py/data_batch_3 filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
data/cifar-10-batches-py/data_batch_4 filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
data/cifar-10-batches-py/data_batch_5 filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
data/cifar-10-batches-py/test_batch filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
fig/feature_maps_conv1.png filter=lfs diff=lfs merge=lfs -text
|
VGG_BatchNorm/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
VGG_BatchNorm/VGG_Loss_Landscape.py
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import matplotlib as mpl
|
| 2 |
+
mpl.use('Agg')
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
from torch import nn
|
| 5 |
+
import numpy as np
|
| 6 |
+
import torch
|
| 7 |
+
import os
|
| 8 |
+
import random
|
| 9 |
+
from tqdm import tqdm as tqdm
|
| 10 |
+
from IPython import display
|
| 11 |
+
|
| 12 |
+
from models.vgg import VGG_A
|
| 13 |
+
from models.vgg import VGG_A_BatchNorm # you need to implement this network
|
| 14 |
+
from data.loaders import get_cifar_loader
|
| 15 |
+
|
| 16 |
+
# ## Constants (parameters) initialization
|
| 17 |
+
device_id = [0,1,2,3]
|
| 18 |
+
num_workers = 4
|
| 19 |
+
batch_size = 128
|
| 20 |
+
|
| 21 |
+
# add our package dir to path
|
| 22 |
+
module_path = os.path.dirname(os.getcwd())
|
| 23 |
+
home_path = module_path
|
| 24 |
+
figures_path = os.path.join(home_path, 'reports', 'figures')
|
| 25 |
+
models_path = os.path.join(home_path, 'reports', 'models')
|
| 26 |
+
|
| 27 |
+
# Make sure you are using the right device.
|
| 28 |
+
device_id = device_id
|
| 29 |
+
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
|
| 30 |
+
device = torch.device("cuda:{}".format(3) if torch.cuda.is_available() else "cpu")
|
| 31 |
+
print(device)
|
| 32 |
+
print(torch.cuda.get_device_name(3))
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# Initialize your data loader and
|
| 37 |
+
# make sure that dataloader works
|
| 38 |
+
# as expected by observing one
|
| 39 |
+
# sample from it.
|
| 40 |
+
train_loader = get_cifar_loader(train=True)
|
| 41 |
+
val_loader = get_cifar_loader(train=False)
|
| 42 |
+
for X,y in train_loader:
|
| 43 |
+
## --------------------
|
| 44 |
+
# Add code as needed
|
| 45 |
+
#
|
| 46 |
+
#
|
| 47 |
+
#
|
| 48 |
+
#
|
| 49 |
+
## --------------------
|
| 50 |
+
break
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
# This function is used to calculate the accuracy of model classification
|
| 55 |
+
def get_accuracy(model, data_loader):
|
| 56 |
+
model.eval()
|
| 57 |
+
correct = 0
|
| 58 |
+
total = 0
|
| 59 |
+
with torch.no_grad():
|
| 60 |
+
for x, y in data_loader:
|
| 61 |
+
x, y = x.to(device), y.to(device)
|
| 62 |
+
outputs = model(x)
|
| 63 |
+
_, predicted = torch.max(outputs, 1)
|
| 64 |
+
total += y.size(0)
|
| 65 |
+
correct += (predicted == y).sum().item()
|
| 66 |
+
return correct / total
|
| 67 |
+
|
| 68 |
+
# Set a random seed to ensure reproducible results
|
| 69 |
+
def set_random_seeds(seed_value=0, device='cpu'):
|
| 70 |
+
np.random.seed(seed_value)
|
| 71 |
+
torch.manual_seed(seed_value)
|
| 72 |
+
random.seed(seed_value)
|
| 73 |
+
if device != 'cpu':
|
| 74 |
+
torch.cuda.manual_seed(seed_value)
|
| 75 |
+
torch.cuda.manual_seed_all(seed_value)
|
| 76 |
+
torch.backends.cudnn.deterministic = True
|
| 77 |
+
torch.backends.cudnn.benchmark = False
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
# We use this function to complete the entire
|
| 81 |
+
# training process. In order to plot the loss landscape,
|
| 82 |
+
# you need to record the loss value of each step.
|
| 83 |
+
# Of course, as before, you can test your model
|
| 84 |
+
# after drawing a training round and save the curve
|
| 85 |
+
# to observe the training
|
| 86 |
+
def train(model, optimizer, criterion, train_loader, val_loader, scheduler=None, epochs_n=100, best_model_path=None):
|
| 87 |
+
model.to(device)
|
| 88 |
+
learning_curve = []
|
| 89 |
+
train_accuracy_curve = []
|
| 90 |
+
val_accuracy_curve = []
|
| 91 |
+
for epoch in tqdm(range(epochs_n), unit='epoch'):
|
| 92 |
+
model.train()
|
| 93 |
+
running_loss = 0.0
|
| 94 |
+
for data in train_loader:
|
| 95 |
+
x, y = data
|
| 96 |
+
x = x.to(device)
|
| 97 |
+
y = y.to(device)
|
| 98 |
+
optimizer.zero_grad()
|
| 99 |
+
prediction = model(x)
|
| 100 |
+
loss = criterion(prediction, y)
|
| 101 |
+
loss.backward()
|
| 102 |
+
optimizer.step()
|
| 103 |
+
running_loss += loss.item()
|
| 104 |
+
avg_loss = running_loss / len(train_loader)
|
| 105 |
+
learning_curve.append(avg_loss)
|
| 106 |
+
train_acc = get_accuracy(model, train_loader)
|
| 107 |
+
val_acc = get_accuracy(model, val_loader)
|
| 108 |
+
train_accuracy_curve.append(train_acc)
|
| 109 |
+
val_accuracy_curve.append(val_acc)
|
| 110 |
+
print(f'Epoch {epoch+1}: loss={avg_loss:.4f}, train_acc={train_acc:.4f}, val_acc={val_acc:.4f}')
|
| 111 |
+
return learning_curve, train_accuracy_curve, val_accuracy_curve
|
| 112 |
+
|
| 113 |
+
def train_stepwise(model, optimizer, criterion, train_loader, val_loader, epochs_n=10, save_prefix=''):
|
| 114 |
+
model.to(device)
|
| 115 |
+
model.train()
|
| 116 |
+
step_losses = []
|
| 117 |
+
step_grads = []
|
| 118 |
+
for epoch in range(epochs_n):
|
| 119 |
+
for i, (x, y) in enumerate(train_loader):
|
| 120 |
+
x, y = x.to(device), y.to(device)
|
| 121 |
+
optimizer.zero_grad()
|
| 122 |
+
prediction = model(x)
|
| 123 |
+
loss = criterion(prediction, y)
|
| 124 |
+
loss.backward()
|
| 125 |
+
# 记录loss
|
| 126 |
+
step_losses.append(loss.item())
|
| 127 |
+
# 记录梯度范数
|
| 128 |
+
total_norm = 0.0
|
| 129 |
+
for p in model.parameters():
|
| 130 |
+
if p.grad is not None:
|
| 131 |
+
param_norm = p.grad.data.norm(2)
|
| 132 |
+
total_norm += param_norm.item() ** 2
|
| 133 |
+
total_norm = total_norm ** 0.5
|
| 134 |
+
step_grads.append(total_norm)
|
| 135 |
+
optimizer.step()
|
| 136 |
+
# 保存loss和grad
|
| 137 |
+
np.save(f'{save_prefix}_step_losses.npy', np.array(step_losses))
|
| 138 |
+
np.save(f'{save_prefix}_step_grads.npy', np.array(step_grads))
|
| 139 |
+
return step_losses, step_grads
|
| 140 |
+
|
| 141 |
+
# Train your model
|
| 142 |
+
# feel free to modify
|
| 143 |
+
epo = 5
|
| 144 |
+
loss_save_path = ''
|
| 145 |
+
# grad_save_path = ''
|
| 146 |
+
|
| 147 |
+
set_random_seeds(seed_value=2020, device=device)
|
| 148 |
+
model = VGG_A()
|
| 149 |
+
lr = 0.001
|
| 150 |
+
optimizer = torch.optim.Adam(model.parameters(), lr = lr)
|
| 151 |
+
criterion = nn.CrossEntropyLoss()
|
| 152 |
+
loss, train_acc, val_acc = train(model, optimizer, criterion, train_loader, val_loader, epochs_n=epo)
|
| 153 |
+
np.savetxt(os.path.join(loss_save_path, 'loss.txt'), loss, fmt='%s', delimiter=' ')
|
| 154 |
+
# np.savetxt(os.path.join(grad_save_path, 'grads.txt'), grads, fmt='%s', delimiter=' ')
|
| 155 |
+
|
| 156 |
+
# Maintain two lists: max_curve and min_curve,
|
| 157 |
+
# select the maximum value of loss in all models
|
| 158 |
+
# on the same step, add it to max_curve, and
|
| 159 |
+
# the minimum value to min_curve
|
| 160 |
+
min_curve = []
|
| 161 |
+
max_curve = []
|
| 162 |
+
## --------------------
|
| 163 |
+
# Add your code
|
| 164 |
+
#
|
| 165 |
+
#
|
| 166 |
+
#
|
| 167 |
+
#
|
| 168 |
+
## --------------------
|
| 169 |
+
|
| 170 |
+
# Use this function to plot the final loss landscape,
|
| 171 |
+
# fill the area between the two curves can use plt.fill_between()
|
| 172 |
+
def plot_loss_landscape():
|
| 173 |
+
## --------------------
|
| 174 |
+
# Add your code
|
| 175 |
+
#
|
| 176 |
+
#
|
| 177 |
+
#
|
| 178 |
+
#
|
| 179 |
+
## --------------------
|
| 180 |
+
pass
|
| 181 |
+
|
| 182 |
+
if __name__ == "__main__":
|
| 183 |
+
epo = 10
|
| 184 |
+
set_random_seeds(seed_value=2020, device=device)
|
| 185 |
+
|
| 186 |
+
learning_rates = [1e-3, 2e-3, 1e-4, 5e-4]
|
| 187 |
+
criterion = nn.CrossEntropyLoss()
|
| 188 |
+
all_loss_a = []
|
| 189 |
+
all_loss_bn = []
|
| 190 |
+
all_grad_a = []
|
| 191 |
+
all_grad_bn = []
|
| 192 |
+
for lr in learning_rates:
|
| 193 |
+
# VGG-A
|
| 194 |
+
model_a = VGG_A()
|
| 195 |
+
optimizer_a = torch.optim.Adam(model_a.parameters(), lr=lr)
|
| 196 |
+
prefix_a = f'models/vgg_a_lr{lr}'
|
| 197 |
+
step_losses_a, step_grads_a = train_stepwise(model_a, optimizer_a, criterion, train_loader, val_loader, epochs_n=epo, save_prefix=prefix_a)
|
| 198 |
+
torch.save(model_a.state_dict(), f'{prefix_a}.pth')
|
| 199 |
+
all_loss_a.append(step_losses_a)
|
| 200 |
+
all_grad_a.append(step_grads_a)
|
| 201 |
+
|
| 202 |
+
# VGG-A-BN
|
| 203 |
+
model_bn = VGG_A_BatchNorm()
|
| 204 |
+
optimizer_bn = torch.optim.Adam(model_bn.parameters(), lr=lr)
|
| 205 |
+
prefix_bn = f'models/vgg_bn_lr{lr}'
|
| 206 |
+
step_losses_bn, step_grads_bn = train_stepwise(model_bn, optimizer_bn, criterion, train_loader, val_loader, epochs_n=epo, save_prefix=prefix_bn)
|
| 207 |
+
torch.save(model_bn.state_dict(), f'{prefix_bn}.pth')
|
| 208 |
+
all_loss_bn.append(step_losses_bn)
|
| 209 |
+
all_grad_bn.append(step_grads_bn)
|
| 210 |
+
|
| 211 |
+
# 分别为每个learning rate单独画图
|
| 212 |
+
for lr in learning_rates:
|
| 213 |
+
# 读取loss和grad
|
| 214 |
+
loss_a = np.load(f'models/vgg_a_lr{lr}_step_losses.npy')
|
| 215 |
+
loss_bn = np.load(f'models/vgg_bn_lr{lr}_step_losses.npy')
|
| 216 |
+
grad_a = np.load(f'models/vgg_a_lr{lr}_step_grads.npy')
|
| 217 |
+
grad_bn = np.load(f'models/vgg_bn_lr{lr}_step_grads.npy')
|
| 218 |
+
steps = np.arange(1, min(len(loss_a), len(loss_bn)) + 1)
|
| 219 |
+
|
| 220 |
+
# Loss对比
|
| 221 |
+
plt.figure(figsize=(8,5))
|
| 222 |
+
plt.plot(steps, loss_a[:len(steps)], 'r-', label='VGG-A (no BN)')
|
| 223 |
+
plt.plot(steps, loss_bn[:len(steps)], 'b-', label='VGG-A (with BN)')
|
| 224 |
+
plt.xlabel('Step')
|
| 225 |
+
plt.ylabel('Training Loss')
|
| 226 |
+
plt.title(f'Loss Curve (lr={lr})')
|
| 227 |
+
plt.legend()
|
| 228 |
+
plt.tight_layout()
|
| 229 |
+
plt.savefig(f'vgg_loss_curve_lr{lr}.png')
|
| 230 |
+
plt.close()
|
| 231 |
+
|
| 232 |
+
# 梯度范数对比
|
| 233 |
+
plt.figure(figsize=(8,5))
|
| 234 |
+
plt.plot(steps, grad_a[:len(steps)], 'r-', label='VGG-A (no BN)')
|
| 235 |
+
plt.plot(steps, grad_bn[:len(steps)], 'b-', label='VGG-A (with BN)')
|
| 236 |
+
plt.xlabel('Step')
|
| 237 |
+
plt.ylabel('Gradient Norm')
|
| 238 |
+
plt.title(f'Gradient Norm Curve (lr={lr})')
|
| 239 |
+
plt.legend()
|
| 240 |
+
plt.tight_layout()
|
| 241 |
+
plt.savefig(f'vgg_grad_norm_curve_lr{lr}.png')
|
| 242 |
+
plt.close()
|
| 243 |
+
|
| 244 |
+
# 最大梯度差
|
| 245 |
+
grad_diff_a = np.abs(np.diff(grad_a[:len(steps)]))
|
| 246 |
+
grad_diff_bn = np.abs(np.diff(grad_bn[:len(steps)]))
|
| 247 |
+
plt.figure(figsize=(8,5))
|
| 248 |
+
plt.plot(steps[1:], grad_diff_a, 'r-', label='VGG-A (no BN)')
|
| 249 |
+
plt.plot(steps[1:], grad_diff_bn, 'b-', label='VGG-A (with BN)')
|
| 250 |
+
plt.xlabel('Step')
|
| 251 |
+
plt.ylabel('Gradient Difference')
|
| 252 |
+
plt.title(f'Max Gradient Difference (lr={lr})')
|
| 253 |
+
plt.legend()
|
| 254 |
+
plt.tight_layout()
|
| 255 |
+
plt.savefig(f'vgg_max_grad_diff_lr{lr}.png')
|
| 256 |
+
plt.close()
|
VGG_BatchNorm/data/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''
|
| 2 |
+
Scripts to download and generate data
|
| 3 |
+
'''
|
| 4 |
+
|
| 5 |
+
from . import loaders
|
VGG_BatchNorm/data/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (248 Bytes). View file
|
|
|
VGG_BatchNorm/data/__pycache__/__init__.cpython-37.pyc
ADDED
|
Binary file (259 Bytes). View file
|
|
|
VGG_BatchNorm/data/__pycache__/loaders.cpython-310.pyc
ADDED
|
Binary file (1.93 kB). View file
|
|
|
VGG_BatchNorm/data/__pycache__/loaders.cpython-37.pyc
ADDED
|
Binary file (1.9 kB). View file
|
|
|
VGG_BatchNorm/data/cifar-10-python.tar.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f45163964244cea80d8b2367396f1a64e529767c1a4e2c0c91c67b8ac2f691e1
|
| 3 |
+
size 5226496
|
VGG_BatchNorm/data/loaders.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Data loaders
|
| 3 |
+
"""
|
| 4 |
+
import matplotlib as mpl
|
| 5 |
+
mpl.use('Agg')
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
import numpy as np
|
| 8 |
+
from torch.utils.data import DataLoader, Dataset
|
| 9 |
+
from torchvision import transforms
|
| 10 |
+
import torchvision.datasets as datasets
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class PartialDataset(Dataset):
|
| 15 |
+
def __init__(self, dataset, n_items=10):
|
| 16 |
+
self.dataset = dataset
|
| 17 |
+
self.n_items = n_items
|
| 18 |
+
|
| 19 |
+
def __getitem__(self):
|
| 20 |
+
return self.dataset.__getitem__()
|
| 21 |
+
|
| 22 |
+
def __len__(self):
|
| 23 |
+
return min(self.n_items, len(self.dataset))
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def get_cifar_loader(root='../data/', batch_size=128, train=True, shuffle=True, num_workers=4, n_items=-1):
|
| 27 |
+
normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5],
|
| 28 |
+
std=[0.5, 0.5, 0.5])
|
| 29 |
+
|
| 30 |
+
data_transforms = transforms.Compose(
|
| 31 |
+
[transforms.ToTensor(),
|
| 32 |
+
normalize])
|
| 33 |
+
|
| 34 |
+
dataset = datasets.CIFAR10(root=root, train=train, download=True, transform=data_transforms)
|
| 35 |
+
if n_items > 0:
|
| 36 |
+
dataset = PartialDataset(dataset, n_items)
|
| 37 |
+
|
| 38 |
+
loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
|
| 39 |
+
|
| 40 |
+
return loader
|
| 41 |
+
|
| 42 |
+
if __name__ == '__main__':
|
| 43 |
+
train_loader = get_cifar_loader()
|
| 44 |
+
for X, y in train_loader:
|
| 45 |
+
print(X[0])
|
| 46 |
+
print(y[0])
|
| 47 |
+
print(X[0].shape)
|
| 48 |
+
img = np.transpose(X[0], [1,2,0])
|
| 49 |
+
plt.imshow(img*0.5 + 0.5)
|
| 50 |
+
plt.savefig('sample.png')
|
| 51 |
+
print(X[0].max())
|
| 52 |
+
print(X[0].min())
|
| 53 |
+
break
|
VGG_BatchNorm/loss.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
1.7658132662248733
|
| 2 |
+
1.2146569554458189
|
| 3 |
+
0.9086841719839579
|
| 4 |
+
0.7367523306471002
|
| 5 |
+
0.6139206301678172
|
VGG_BatchNorm/models/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''
|
| 2 |
+
Models implementation and training & evaluating functions
|
| 3 |
+
'''
|
| 4 |
+
|
| 5 |
+
from . import vgg
|
VGG_BatchNorm/models/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (266 Bytes). View file
|
|
|
VGG_BatchNorm/models/__pycache__/__init__.cpython-37.pyc
ADDED
|
Binary file (277 Bytes). View file
|
|
|
VGG_BatchNorm/models/__pycache__/vgg.cpython-310.pyc
ADDED
|
Binary file (4.93 kB). View file
|
|
|
VGG_BatchNorm/models/__pycache__/vgg.cpython-37.pyc
ADDED
|
Binary file (3.96 kB). View file
|
|
|
VGG_BatchNorm/models/vgg.py
ADDED
|
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
VGG
|
| 3 |
+
"""
|
| 4 |
+
import numpy as np
|
| 5 |
+
from torch import nn
|
| 6 |
+
|
| 7 |
+
from utils.nn import init_weights_
|
| 8 |
+
|
| 9 |
+
# ## Models implementation
|
| 10 |
+
def get_number_of_parameters(model):
|
| 11 |
+
parameters_n = 0
|
| 12 |
+
for parameter in model.parameters():
|
| 13 |
+
parameters_n += np.prod(parameter.shape).item()
|
| 14 |
+
|
| 15 |
+
return parameters_n
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class VGG_A(nn.Module):
|
| 19 |
+
"""VGG_A model
|
| 20 |
+
|
| 21 |
+
size of Linear layers is smaller since input assumed to be 32x32x3, instead of
|
| 22 |
+
224x224x3
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
def __init__(self, inp_ch=3, num_classes=10, init_weights=True):
|
| 26 |
+
super().__init__()
|
| 27 |
+
|
| 28 |
+
self.features = nn.Sequential(
|
| 29 |
+
# stage 1
|
| 30 |
+
nn.Conv2d(in_channels=inp_ch, out_channels=64, kernel_size=3, padding=1),
|
| 31 |
+
nn.ReLU(True),
|
| 32 |
+
nn.MaxPool2d(kernel_size=2, stride=2),
|
| 33 |
+
|
| 34 |
+
# stage 2
|
| 35 |
+
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
|
| 36 |
+
nn.ReLU(True),
|
| 37 |
+
nn.MaxPool2d(kernel_size=2, stride=2),
|
| 38 |
+
|
| 39 |
+
# stage 3
|
| 40 |
+
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
|
| 41 |
+
nn.ReLU(True),
|
| 42 |
+
nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
|
| 43 |
+
nn.ReLU(True),
|
| 44 |
+
nn.MaxPool2d(kernel_size=2, stride=2),
|
| 45 |
+
|
| 46 |
+
# stage 4
|
| 47 |
+
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
|
| 48 |
+
nn.ReLU(True),
|
| 49 |
+
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
|
| 50 |
+
nn.ReLU(True),
|
| 51 |
+
nn.MaxPool2d(kernel_size=2, stride=2),
|
| 52 |
+
|
| 53 |
+
# stage5
|
| 54 |
+
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
|
| 55 |
+
nn.ReLU(True),
|
| 56 |
+
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
|
| 57 |
+
nn.ReLU(True),
|
| 58 |
+
nn.MaxPool2d(kernel_size=2, stride=2))
|
| 59 |
+
|
| 60 |
+
self.classifier = nn.Sequential(
|
| 61 |
+
nn.Linear(512 * 1 * 1, 512),
|
| 62 |
+
nn.ReLU(),
|
| 63 |
+
nn.Linear(512, 512),
|
| 64 |
+
nn.ReLU(),
|
| 65 |
+
nn.Linear(512, num_classes))
|
| 66 |
+
|
| 67 |
+
if init_weights:
|
| 68 |
+
self._init_weights()
|
| 69 |
+
|
| 70 |
+
def forward(self, x):
|
| 71 |
+
x = self.features(x)
|
| 72 |
+
x = self.classifier(x.view(-1, 512 * 1 * 1))
|
| 73 |
+
return x
|
| 74 |
+
|
| 75 |
+
def _init_weights(self):
|
| 76 |
+
for m in self.modules():
|
| 77 |
+
init_weights_(m)
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
class VGG_A_Light(nn.Module):
|
| 81 |
+
def __init__(self, inp_ch=3, num_classes=10):
|
| 82 |
+
super().__init__()
|
| 83 |
+
|
| 84 |
+
self.stage1 = nn.Sequential(
|
| 85 |
+
nn.Conv2d(in_channels=inp_ch, out_channels=16, kernel_size=3, padding=1),
|
| 86 |
+
nn.ReLU(),
|
| 87 |
+
nn.MaxPool2d(kernel_size=2, stride=2))
|
| 88 |
+
|
| 89 |
+
self.stage2 = nn.Sequential(
|
| 90 |
+
nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1),
|
| 91 |
+
nn.ReLU(),
|
| 92 |
+
nn.MaxPool2d(kernel_size=2, stride=2))
|
| 93 |
+
'''
|
| 94 |
+
self.stage3 = nn.Sequential(
|
| 95 |
+
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
|
| 96 |
+
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
|
| 97 |
+
nn.ReLU(),
|
| 98 |
+
nn.MaxPool2d(kernel_size=2, stride=2))
|
| 99 |
+
|
| 100 |
+
self.stage4 = nn.Sequential(
|
| 101 |
+
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
|
| 102 |
+
nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
|
| 103 |
+
nn.ReLU(),
|
| 104 |
+
nn.MaxPool2d(kernel_size=2, stride=2))
|
| 105 |
+
|
| 106 |
+
self.stage5 = nn.Sequential(
|
| 107 |
+
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
|
| 108 |
+
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
|
| 109 |
+
nn.ReLU(),
|
| 110 |
+
nn.MaxPool2d(kernel_size=2, stride=2))
|
| 111 |
+
'''
|
| 112 |
+
self.classifier = nn.Sequential(
|
| 113 |
+
nn.Linear(32 * 8 * 8, 128),
|
| 114 |
+
nn.ReLU(),
|
| 115 |
+
nn.Linear(128, 128),
|
| 116 |
+
nn.ReLU(),
|
| 117 |
+
nn.Linear(128, num_classes))
|
| 118 |
+
|
| 119 |
+
def forward(self, x):
|
| 120 |
+
x = self.stage1(x)
|
| 121 |
+
x = self.stage2(x)
|
| 122 |
+
# x = self.stage3(x)
|
| 123 |
+
# x = self.stage4(x)
|
| 124 |
+
# x = self.stage5(x)
|
| 125 |
+
x = self.classifier(x.view(-1, 32 * 8 * 8))
|
| 126 |
+
return x
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
class VGG_A_Dropout(nn.Module):
|
| 130 |
+
def __init__(self, inp_ch=3, num_classes=10):
|
| 131 |
+
super().__init__()
|
| 132 |
+
|
| 133 |
+
self.stage1 = nn.Sequential(
|
| 134 |
+
nn.Conv2d(in_channels=inp_ch, out_channels=64, kernel_size=3, padding=1),
|
| 135 |
+
nn.ReLU(True),
|
| 136 |
+
nn.MaxPool2d(kernel_size=2, stride=2))
|
| 137 |
+
|
| 138 |
+
self.stage2 = nn.Sequential(
|
| 139 |
+
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
|
| 140 |
+
nn.ReLU(True),
|
| 141 |
+
nn.MaxPool2d(kernel_size=2, stride=2))
|
| 142 |
+
|
| 143 |
+
self.stage3 = nn.Sequential(
|
| 144 |
+
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
|
| 145 |
+
nn.ReLU(True),
|
| 146 |
+
nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
|
| 147 |
+
nn.ReLU(True),
|
| 148 |
+
nn.MaxPool2d(kernel_size=2, stride=2))
|
| 149 |
+
|
| 150 |
+
self.stage4 = nn.Sequential(
|
| 151 |
+
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
|
| 152 |
+
nn.ReLU(True),
|
| 153 |
+
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
|
| 154 |
+
nn.ReLU(True),
|
| 155 |
+
nn.MaxPool2d(kernel_size=2, stride=2))
|
| 156 |
+
|
| 157 |
+
self.stage5 = nn.Sequential(
|
| 158 |
+
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
|
| 159 |
+
nn.ReLU(True),
|
| 160 |
+
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
|
| 161 |
+
nn.ReLU(True),
|
| 162 |
+
nn.MaxPool2d(kernel_size=2, stride=2))
|
| 163 |
+
|
| 164 |
+
self.classifier = nn.Sequential(
|
| 165 |
+
nn.Dropout(),
|
| 166 |
+
nn.Linear(512 * 1 * 1, 512),
|
| 167 |
+
nn.ReLU(True),
|
| 168 |
+
nn.Dropout(),
|
| 169 |
+
nn.Linear(512, 512),
|
| 170 |
+
nn.ReLU(True),
|
| 171 |
+
nn.Linear(512, num_classes))
|
| 172 |
+
|
| 173 |
+
def forward(self, x):
|
| 174 |
+
x = self.stage1(x)
|
| 175 |
+
x = self.stage2(x)
|
| 176 |
+
x = self.stage3(x)
|
| 177 |
+
x = self.stage4(x)
|
| 178 |
+
x = self.stage5(x)
|
| 179 |
+
x = self.classifier(x.view(-1, 512 * 1 * 1))
|
| 180 |
+
return x
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
class VGG_A_BatchNorm(nn.Module):
|
| 184 |
+
def __init__(self, inp_ch=3, num_classes=10, init_weights=True):
|
| 185 |
+
super().__init__()
|
| 186 |
+
self.features = nn.Sequential(
|
| 187 |
+
nn.Conv2d(inp_ch, 64, 3, padding=1),
|
| 188 |
+
nn.BatchNorm2d(64),
|
| 189 |
+
nn.ReLU(True),
|
| 190 |
+
nn.MaxPool2d(2, 2),
|
| 191 |
+
|
| 192 |
+
nn.Conv2d(64, 128, 3, padding=1),
|
| 193 |
+
nn.BatchNorm2d(128),
|
| 194 |
+
nn.ReLU(True),
|
| 195 |
+
nn.MaxPool2d(2, 2),
|
| 196 |
+
|
| 197 |
+
nn.Conv2d(128, 256, 3, padding=1),
|
| 198 |
+
nn.BatchNorm2d(256),
|
| 199 |
+
nn.ReLU(True),
|
| 200 |
+
nn.Conv2d(256, 256, 3, padding=1),
|
| 201 |
+
nn.BatchNorm2d(256),
|
| 202 |
+
nn.ReLU(True),
|
| 203 |
+
nn.MaxPool2d(2, 2),
|
| 204 |
+
|
| 205 |
+
nn.Conv2d(256, 512, 3, padding=1),
|
| 206 |
+
nn.BatchNorm2d(512),
|
| 207 |
+
nn.ReLU(True),
|
| 208 |
+
nn.Conv2d(512, 512, 3, padding=1),
|
| 209 |
+
nn.BatchNorm2d(512),
|
| 210 |
+
nn.ReLU(True),
|
| 211 |
+
nn.MaxPool2d(2, 2),
|
| 212 |
+
|
| 213 |
+
nn.Conv2d(512, 512, 3, padding=1),
|
| 214 |
+
nn.BatchNorm2d(512),
|
| 215 |
+
nn.ReLU(True),
|
| 216 |
+
nn.Conv2d(512, 512, 3, padding=1),
|
| 217 |
+
nn.BatchNorm2d(512),
|
| 218 |
+
nn.ReLU(True),
|
| 219 |
+
nn.MaxPool2d(2, 2)
|
| 220 |
+
)
|
| 221 |
+
self.classifier = nn.Sequential(
|
| 222 |
+
nn.Linear(512 * 1 * 1, 512),
|
| 223 |
+
nn.ReLU(),
|
| 224 |
+
nn.Linear(512, 512),
|
| 225 |
+
nn.ReLU(),
|
| 226 |
+
nn.Linear(512, num_classes)
|
| 227 |
+
)
|
| 228 |
+
if init_weights:
|
| 229 |
+
self._init_weights()
|
| 230 |
+
def forward(self, x):
|
| 231 |
+
x = self.features(x)
|
| 232 |
+
x = self.classifier(x.view(-1, 512 * 1 * 1))
|
| 233 |
+
return x
|
| 234 |
+
def _init_weights(self):
|
| 235 |
+
for m in self.modules():
|
| 236 |
+
init_weights_(m)
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
if __name__ == '__main__':
|
| 240 |
+
print(get_number_of_parameters(VGG_A()))
|
| 241 |
+
print(get_number_of_parameters(VGG_A_Light()))
|
| 242 |
+
print(get_number_of_parameters(VGG_A_Dropout()))
|
| 243 |
+
print(get_number_of_parameters(VGG_A_BatchNorm()))
|
VGG_BatchNorm/models/vgg_a_lr0.0001.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:105171da7fd7bf680dd5d4c6d9484019a469849021e775edade244a84b139bfb
|
| 3 |
+
size 39011786
|
VGG_BatchNorm/models/vgg_a_lr0.0001_step_grads.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40fd79ad6ea0064b44c91905f42549bac9f3e4439f2ac7b3e4f4b61681b7a81f
|
| 3 |
+
size 31408
|
VGG_BatchNorm/models/vgg_a_lr0.0001_step_losses.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc50830495cff83662ea2603a3b39b8f882f45341a6aa95ac8cc9a731c315196
|
| 3 |
+
size 31408
|
VGG_BatchNorm/models/vgg_a_lr0.0005.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b3db4e136d92dc94a1f8601d7700e922e466a049e91891840f0374d6d27c98db
|
| 3 |
+
size 39011786
|
VGG_BatchNorm/models/vgg_a_lr0.0005_step_grads.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5703f3db77174dc12448202f46dac32866fa7ce47383c961b0c0d85bfc4c89b7
|
| 3 |
+
size 31408
|
VGG_BatchNorm/models/vgg_a_lr0.0005_step_losses.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3838ee654dddc55efa6b63f65594901803a410f17e90e2d2402ba51127c94db
|
| 3 |
+
size 31408
|
VGG_BatchNorm/models/vgg_a_lr0.001.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7bab30b087815d4470cb665e2aeadcb2c2c786f5f21746bcf07e93cef1e3669
|
| 3 |
+
size 39011760
|
VGG_BatchNorm/models/vgg_a_lr0.001_step_grads.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68a558422f2f2eb6233e061ed0e24fea94a7525074255f8f91dd5d6ed6b2dd1c
|
| 3 |
+
size 31408
|
VGG_BatchNorm/models/vgg_a_lr0.001_step_losses.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7f74eebcff55116c216fbaca90a56280090d6a01ab7223cad91d8cb42c97747
|
| 3 |
+
size 31408
|
VGG_BatchNorm/models/vgg_a_lr0.002.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5e8453a024e60b3bb8e680b0aeed1d17bcf1545b2de4ca030f57513e0c5f7be
|
| 3 |
+
size 39011760
|
VGG_BatchNorm/models/vgg_a_lr0.002_step_grads.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a22fc25871c1a180f423cc498b60f1083d07c2e45d605939889b5c90a35afcfa
|
| 3 |
+
size 31408
|
VGG_BatchNorm/models/vgg_a_lr0.002_step_losses.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7868e3df3d4ae5b25889df52f98ef5e4bb98dcd741f62667b4671b3e16ae981
|
| 3 |
+
size 31408
|
VGG_BatchNorm/models/vgg_bn_lr0.0001.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9451b3372e6897dd96be45a1902a9629e8f313e9019162d8f9ce5251ffc542a3
|
| 3 |
+
size 39068716
|
VGG_BatchNorm/models/vgg_bn_lr0.0001_step_grads.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8880316d60e582db12979390d79b6f5df7a1b0c3b31e90ff164de5f0930d0aef
|
| 3 |
+
size 31408
|
VGG_BatchNorm/models/vgg_bn_lr0.0001_step_losses.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a2ee1fe92633edf653d27d5139c882ffdd331319832aedf391a9d8f9a2c42e8
|
| 3 |
+
size 31408
|
VGG_BatchNorm/models/vgg_bn_lr0.0005.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:17b108a6b387ba7f8dbaf5f8192cebd96e753d3b2eec071fdb9047ec18ac56af
|
| 3 |
+
size 39068716
|
VGG_BatchNorm/models/vgg_bn_lr0.0005_step_grads.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75477b2424055e4773105ed1c3e30b3f865263d93620cdb0ab8ea8cc7ab90e5b
|
| 3 |
+
size 31408
|
VGG_BatchNorm/models/vgg_bn_lr0.0005_step_losses.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3261b9564be6ae816ec048ac8189aec7e16eb0b9ed6a31a4851356a19f09ce04
|
| 3 |
+
size 31408
|
VGG_BatchNorm/models/vgg_bn_lr0.001.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9765324e222a9d6ff7b51ae6b5f3f97a1e1cc99197654f906f8e7611b9b8d193
|
| 3 |
+
size 39068650
|
VGG_BatchNorm/models/vgg_bn_lr0.001_step_grads.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a2224404b4ba0e30fb143ddbaee1976528671afe3b2f25a14499dc7dd63a87d
|
| 3 |
+
size 31408
|
VGG_BatchNorm/models/vgg_bn_lr0.001_step_losses.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50030e0c9e31a85a981d7b60d0094b758fa8efb37f89ca309ec7025cfa71787e
|
| 3 |
+
size 31408
|
VGG_BatchNorm/models/vgg_bn_lr0.002.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1488c03104e9de23b07a9deb22ef6afb1ba3b7f6aaa5dad9ba5a46a4c51082c4
|
| 3 |
+
size 39068650
|
VGG_BatchNorm/models/vgg_bn_lr0.002_step_grads.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ebf1d2594a3cd9783d29c71a69d124d00928c24ba50d143dfb58c72cd06f601b
|
| 3 |
+
size 31408
|
VGG_BatchNorm/models/vgg_bn_lr0.002_step_losses.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:215478b68ca6ea0590bc76f7353771a923a1f10422c5b9dfa022526d1ad0d41b
|
| 3 |
+
size 31408
|
VGG_BatchNorm/utils/__init__.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''
|
| 2 |
+
Several utils, in particular for experiments
|
| 3 |
+
'''
|
| 4 |
+
from . import nn
|
VGG_BatchNorm/utils/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (251 Bytes). View file
|
|
|
VGG_BatchNorm/utils/__pycache__/__init__.cpython-37.pyc
ADDED
|
Binary file (262 Bytes). View file
|
|
|
VGG_BatchNorm/utils/__pycache__/nn.cpython-310.pyc
ADDED
|
Binary file (788 Bytes). View file
|
|
|
VGG_BatchNorm/utils/__pycache__/nn.cpython-37.pyc
ADDED
|
Binary file (771 Bytes). View file
|
|
|
VGG_BatchNorm/utils/nn.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Utils for neural networks
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from torch import nn
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def init_weights_(m):
|
| 9 |
+
"""
|
| 10 |
+
Initializes weights of m according to Xavier normal method.
|
| 11 |
+
|
| 12 |
+
:param m: module
|
| 13 |
+
:return:
|
| 14 |
+
"""
|
| 15 |
+
if isinstance(m, nn.Conv2d):
|
| 16 |
+
nn.init.xavier_normal_(m.weight)
|
| 17 |
+
if m.bias is not None:
|
| 18 |
+
nn.init.zeros_(m.bias)
|
| 19 |
+
|
| 20 |
+
elif isinstance(m, nn.BatchNorm2d):
|
| 21 |
+
nn.init.ones_(m.weight)
|
| 22 |
+
nn.init.zeros_(m.bias)
|
| 23 |
+
|
| 24 |
+
elif isinstance(m, nn.BatchNorm1d):
|
| 25 |
+
nn.init.ones_(m.weight)
|
| 26 |
+
nn.init.zeros_(m.bias)
|
| 27 |
+
|
| 28 |
+
elif isinstance(m, nn.Linear):
|
| 29 |
+
nn.init.xavier_normal_(m.weight)
|
| 30 |
+
if m.bias is not None:
|
| 31 |
+
nn.init.zeros_(m.bias)
|
VGG_BatchNorm/vgg_bn_compare.png
ADDED
|
VGG_BatchNorm/vgg_bn_loss_landscape.png
ADDED
|
VGG_BatchNorm/vgg_grad_norm_curve_lr0.0001.png
ADDED
|