unfair221 commited on
Commit
0bae2fa
·
verified ·
1 Parent(s): bc57809

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +7 -0
  2. VGG_BatchNorm/.DS_Store +0 -0
  3. VGG_BatchNorm/VGG_Loss_Landscape.py +256 -0
  4. VGG_BatchNorm/data/__init__.py +5 -0
  5. VGG_BatchNorm/data/__pycache__/__init__.cpython-310.pyc +0 -0
  6. VGG_BatchNorm/data/__pycache__/__init__.cpython-37.pyc +0 -0
  7. VGG_BatchNorm/data/__pycache__/loaders.cpython-310.pyc +0 -0
  8. VGG_BatchNorm/data/__pycache__/loaders.cpython-37.pyc +0 -0
  9. VGG_BatchNorm/data/cifar-10-python.tar.gz +3 -0
  10. VGG_BatchNorm/data/loaders.py +53 -0
  11. VGG_BatchNorm/loss.txt +5 -0
  12. VGG_BatchNorm/models/__init__.py +5 -0
  13. VGG_BatchNorm/models/__pycache__/__init__.cpython-310.pyc +0 -0
  14. VGG_BatchNorm/models/__pycache__/__init__.cpython-37.pyc +0 -0
  15. VGG_BatchNorm/models/__pycache__/vgg.cpython-310.pyc +0 -0
  16. VGG_BatchNorm/models/__pycache__/vgg.cpython-37.pyc +0 -0
  17. VGG_BatchNorm/models/vgg.py +243 -0
  18. VGG_BatchNorm/models/vgg_a_lr0.0001.pth +3 -0
  19. VGG_BatchNorm/models/vgg_a_lr0.0001_step_grads.npy +3 -0
  20. VGG_BatchNorm/models/vgg_a_lr0.0001_step_losses.npy +3 -0
  21. VGG_BatchNorm/models/vgg_a_lr0.0005.pth +3 -0
  22. VGG_BatchNorm/models/vgg_a_lr0.0005_step_grads.npy +3 -0
  23. VGG_BatchNorm/models/vgg_a_lr0.0005_step_losses.npy +3 -0
  24. VGG_BatchNorm/models/vgg_a_lr0.001.pth +3 -0
  25. VGG_BatchNorm/models/vgg_a_lr0.001_step_grads.npy +3 -0
  26. VGG_BatchNorm/models/vgg_a_lr0.001_step_losses.npy +3 -0
  27. VGG_BatchNorm/models/vgg_a_lr0.002.pth +3 -0
  28. VGG_BatchNorm/models/vgg_a_lr0.002_step_grads.npy +3 -0
  29. VGG_BatchNorm/models/vgg_a_lr0.002_step_losses.npy +3 -0
  30. VGG_BatchNorm/models/vgg_bn_lr0.0001.pth +3 -0
  31. VGG_BatchNorm/models/vgg_bn_lr0.0001_step_grads.npy +3 -0
  32. VGG_BatchNorm/models/vgg_bn_lr0.0001_step_losses.npy +3 -0
  33. VGG_BatchNorm/models/vgg_bn_lr0.0005.pth +3 -0
  34. VGG_BatchNorm/models/vgg_bn_lr0.0005_step_grads.npy +3 -0
  35. VGG_BatchNorm/models/vgg_bn_lr0.0005_step_losses.npy +3 -0
  36. VGG_BatchNorm/models/vgg_bn_lr0.001.pth +3 -0
  37. VGG_BatchNorm/models/vgg_bn_lr0.001_step_grads.npy +3 -0
  38. VGG_BatchNorm/models/vgg_bn_lr0.001_step_losses.npy +3 -0
  39. VGG_BatchNorm/models/vgg_bn_lr0.002.pth +3 -0
  40. VGG_BatchNorm/models/vgg_bn_lr0.002_step_grads.npy +3 -0
  41. VGG_BatchNorm/models/vgg_bn_lr0.002_step_losses.npy +3 -0
  42. VGG_BatchNorm/utils/__init__.py +4 -0
  43. VGG_BatchNorm/utils/__pycache__/__init__.cpython-310.pyc +0 -0
  44. VGG_BatchNorm/utils/__pycache__/__init__.cpython-37.pyc +0 -0
  45. VGG_BatchNorm/utils/__pycache__/nn.cpython-310.pyc +0 -0
  46. VGG_BatchNorm/utils/__pycache__/nn.cpython-37.pyc +0 -0
  47. VGG_BatchNorm/utils/nn.py +31 -0
  48. VGG_BatchNorm/vgg_bn_compare.png +0 -0
  49. VGG_BatchNorm/vgg_bn_loss_landscape.png +0 -0
  50. VGG_BatchNorm/vgg_grad_norm_curve_lr0.0001.png +0 -0
.gitattributes CHANGED
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/cifar-10-batches-py/data_batch_1 filter=lfs diff=lfs merge=lfs -text
37
+ data/cifar-10-batches-py/data_batch_2 filter=lfs diff=lfs merge=lfs -text
38
+ data/cifar-10-batches-py/data_batch_3 filter=lfs diff=lfs merge=lfs -text
39
+ data/cifar-10-batches-py/data_batch_4 filter=lfs diff=lfs merge=lfs -text
40
+ data/cifar-10-batches-py/data_batch_5 filter=lfs diff=lfs merge=lfs -text
41
+ data/cifar-10-batches-py/test_batch filter=lfs diff=lfs merge=lfs -text
42
+ fig/feature_maps_conv1.png filter=lfs diff=lfs merge=lfs -text
VGG_BatchNorm/.DS_Store ADDED
Binary file (6.15 kB). View file
 
VGG_BatchNorm/VGG_Loss_Landscape.py ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib as mpl
2
+ mpl.use('Agg')
3
+ import matplotlib.pyplot as plt
4
+ from torch import nn
5
+ import numpy as np
6
+ import torch
7
+ import os
8
+ import random
9
+ from tqdm import tqdm as tqdm
10
+ from IPython import display
11
+
12
+ from models.vgg import VGG_A
13
+ from models.vgg import VGG_A_BatchNorm # you need to implement this network
14
+ from data.loaders import get_cifar_loader
15
+
16
+ # ## Constants (parameters) initialization
17
+ device_id = [0,1,2,3]
18
+ num_workers = 4
19
+ batch_size = 128
20
+
21
+ # add our package dir to path
22
+ module_path = os.path.dirname(os.getcwd())
23
+ home_path = module_path
24
+ figures_path = os.path.join(home_path, 'reports', 'figures')
25
+ models_path = os.path.join(home_path, 'reports', 'models')
26
+
27
+ # Make sure you are using the right device.
28
+ device_id = device_id
29
+ os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
30
+ device = torch.device("cuda:{}".format(3) if torch.cuda.is_available() else "cpu")
31
+ print(device)
32
+ print(torch.cuda.get_device_name(3))
33
+
34
+
35
+
36
+ # Initialize your data loader and
37
+ # make sure that dataloader works
38
+ # as expected by observing one
39
+ # sample from it.
40
+ train_loader = get_cifar_loader(train=True)
41
+ val_loader = get_cifar_loader(train=False)
42
+ for X,y in train_loader:
43
+ ## --------------------
44
+ # Add code as needed
45
+ #
46
+ #
47
+ #
48
+ #
49
+ ## --------------------
50
+ break
51
+
52
+
53
+
54
+ # This function is used to calculate the accuracy of model classification
55
+ def get_accuracy(model, data_loader):
56
+ model.eval()
57
+ correct = 0
58
+ total = 0
59
+ with torch.no_grad():
60
+ for x, y in data_loader:
61
+ x, y = x.to(device), y.to(device)
62
+ outputs = model(x)
63
+ _, predicted = torch.max(outputs, 1)
64
+ total += y.size(0)
65
+ correct += (predicted == y).sum().item()
66
+ return correct / total
67
+
68
+ # Set a random seed to ensure reproducible results
69
+ def set_random_seeds(seed_value=0, device='cpu'):
70
+ np.random.seed(seed_value)
71
+ torch.manual_seed(seed_value)
72
+ random.seed(seed_value)
73
+ if device != 'cpu':
74
+ torch.cuda.manual_seed(seed_value)
75
+ torch.cuda.manual_seed_all(seed_value)
76
+ torch.backends.cudnn.deterministic = True
77
+ torch.backends.cudnn.benchmark = False
78
+
79
+
80
+ # We use this function to complete the entire
81
+ # training process. In order to plot the loss landscape,
82
+ # you need to record the loss value of each step.
83
+ # Of course, as before, you can test your model
84
+ # after drawing a training round and save the curve
85
+ # to observe the training
86
+ def train(model, optimizer, criterion, train_loader, val_loader, scheduler=None, epochs_n=100, best_model_path=None):
87
+ model.to(device)
88
+ learning_curve = []
89
+ train_accuracy_curve = []
90
+ val_accuracy_curve = []
91
+ for epoch in tqdm(range(epochs_n), unit='epoch'):
92
+ model.train()
93
+ running_loss = 0.0
94
+ for data in train_loader:
95
+ x, y = data
96
+ x = x.to(device)
97
+ y = y.to(device)
98
+ optimizer.zero_grad()
99
+ prediction = model(x)
100
+ loss = criterion(prediction, y)
101
+ loss.backward()
102
+ optimizer.step()
103
+ running_loss += loss.item()
104
+ avg_loss = running_loss / len(train_loader)
105
+ learning_curve.append(avg_loss)
106
+ train_acc = get_accuracy(model, train_loader)
107
+ val_acc = get_accuracy(model, val_loader)
108
+ train_accuracy_curve.append(train_acc)
109
+ val_accuracy_curve.append(val_acc)
110
+ print(f'Epoch {epoch+1}: loss={avg_loss:.4f}, train_acc={train_acc:.4f}, val_acc={val_acc:.4f}')
111
+ return learning_curve, train_accuracy_curve, val_accuracy_curve
112
+
113
+ def train_stepwise(model, optimizer, criterion, train_loader, val_loader, epochs_n=10, save_prefix=''):
114
+ model.to(device)
115
+ model.train()
116
+ step_losses = []
117
+ step_grads = []
118
+ for epoch in range(epochs_n):
119
+ for i, (x, y) in enumerate(train_loader):
120
+ x, y = x.to(device), y.to(device)
121
+ optimizer.zero_grad()
122
+ prediction = model(x)
123
+ loss = criterion(prediction, y)
124
+ loss.backward()
125
+ # 记录loss
126
+ step_losses.append(loss.item())
127
+ # 记录梯度范数
128
+ total_norm = 0.0
129
+ for p in model.parameters():
130
+ if p.grad is not None:
131
+ param_norm = p.grad.data.norm(2)
132
+ total_norm += param_norm.item() ** 2
133
+ total_norm = total_norm ** 0.5
134
+ step_grads.append(total_norm)
135
+ optimizer.step()
136
+ # 保存loss和grad
137
+ np.save(f'{save_prefix}_step_losses.npy', np.array(step_losses))
138
+ np.save(f'{save_prefix}_step_grads.npy', np.array(step_grads))
139
+ return step_losses, step_grads
140
+
141
+ # Train your model
142
+ # feel free to modify
143
+ epo = 5
144
+ loss_save_path = ''
145
+ # grad_save_path = ''
146
+
147
+ set_random_seeds(seed_value=2020, device=device)
148
+ model = VGG_A()
149
+ lr = 0.001
150
+ optimizer = torch.optim.Adam(model.parameters(), lr = lr)
151
+ criterion = nn.CrossEntropyLoss()
152
+ loss, train_acc, val_acc = train(model, optimizer, criterion, train_loader, val_loader, epochs_n=epo)
153
+ np.savetxt(os.path.join(loss_save_path, 'loss.txt'), loss, fmt='%s', delimiter=' ')
154
+ # np.savetxt(os.path.join(grad_save_path, 'grads.txt'), grads, fmt='%s', delimiter=' ')
155
+
156
+ # Maintain two lists: max_curve and min_curve,
157
+ # select the maximum value of loss in all models
158
+ # on the same step, add it to max_curve, and
159
+ # the minimum value to min_curve
160
+ min_curve = []
161
+ max_curve = []
162
+ ## --------------------
163
+ # Add your code
164
+ #
165
+ #
166
+ #
167
+ #
168
+ ## --------------------
169
+
170
+ # Use this function to plot the final loss landscape,
171
+ # fill the area between the two curves can use plt.fill_between()
172
+ def plot_loss_landscape():
173
+ ## --------------------
174
+ # Add your code
175
+ #
176
+ #
177
+ #
178
+ #
179
+ ## --------------------
180
+ pass
181
+
182
+ if __name__ == "__main__":
183
+ epo = 10
184
+ set_random_seeds(seed_value=2020, device=device)
185
+
186
+ learning_rates = [1e-3, 2e-3, 1e-4, 5e-4]
187
+ criterion = nn.CrossEntropyLoss()
188
+ all_loss_a = []
189
+ all_loss_bn = []
190
+ all_grad_a = []
191
+ all_grad_bn = []
192
+ for lr in learning_rates:
193
+ # VGG-A
194
+ model_a = VGG_A()
195
+ optimizer_a = torch.optim.Adam(model_a.parameters(), lr=lr)
196
+ prefix_a = f'models/vgg_a_lr{lr}'
197
+ step_losses_a, step_grads_a = train_stepwise(model_a, optimizer_a, criterion, train_loader, val_loader, epochs_n=epo, save_prefix=prefix_a)
198
+ torch.save(model_a.state_dict(), f'{prefix_a}.pth')
199
+ all_loss_a.append(step_losses_a)
200
+ all_grad_a.append(step_grads_a)
201
+
202
+ # VGG-A-BN
203
+ model_bn = VGG_A_BatchNorm()
204
+ optimizer_bn = torch.optim.Adam(model_bn.parameters(), lr=lr)
205
+ prefix_bn = f'models/vgg_bn_lr{lr}'
206
+ step_losses_bn, step_grads_bn = train_stepwise(model_bn, optimizer_bn, criterion, train_loader, val_loader, epochs_n=epo, save_prefix=prefix_bn)
207
+ torch.save(model_bn.state_dict(), f'{prefix_bn}.pth')
208
+ all_loss_bn.append(step_losses_bn)
209
+ all_grad_bn.append(step_grads_bn)
210
+
211
+ # 分别为每个learning rate单独画图
212
+ for lr in learning_rates:
213
+ # 读取loss和grad
214
+ loss_a = np.load(f'models/vgg_a_lr{lr}_step_losses.npy')
215
+ loss_bn = np.load(f'models/vgg_bn_lr{lr}_step_losses.npy')
216
+ grad_a = np.load(f'models/vgg_a_lr{lr}_step_grads.npy')
217
+ grad_bn = np.load(f'models/vgg_bn_lr{lr}_step_grads.npy')
218
+ steps = np.arange(1, min(len(loss_a), len(loss_bn)) + 1)
219
+
220
+ # Loss对比
221
+ plt.figure(figsize=(8,5))
222
+ plt.plot(steps, loss_a[:len(steps)], 'r-', label='VGG-A (no BN)')
223
+ plt.plot(steps, loss_bn[:len(steps)], 'b-', label='VGG-A (with BN)')
224
+ plt.xlabel('Step')
225
+ plt.ylabel('Training Loss')
226
+ plt.title(f'Loss Curve (lr={lr})')
227
+ plt.legend()
228
+ plt.tight_layout()
229
+ plt.savefig(f'vgg_loss_curve_lr{lr}.png')
230
+ plt.close()
231
+
232
+ # 梯度范数对比
233
+ plt.figure(figsize=(8,5))
234
+ plt.plot(steps, grad_a[:len(steps)], 'r-', label='VGG-A (no BN)')
235
+ plt.plot(steps, grad_bn[:len(steps)], 'b-', label='VGG-A (with BN)')
236
+ plt.xlabel('Step')
237
+ plt.ylabel('Gradient Norm')
238
+ plt.title(f'Gradient Norm Curve (lr={lr})')
239
+ plt.legend()
240
+ plt.tight_layout()
241
+ plt.savefig(f'vgg_grad_norm_curve_lr{lr}.png')
242
+ plt.close()
243
+
244
+ # 最大梯度差
245
+ grad_diff_a = np.abs(np.diff(grad_a[:len(steps)]))
246
+ grad_diff_bn = np.abs(np.diff(grad_bn[:len(steps)]))
247
+ plt.figure(figsize=(8,5))
248
+ plt.plot(steps[1:], grad_diff_a, 'r-', label='VGG-A (no BN)')
249
+ plt.plot(steps[1:], grad_diff_bn, 'b-', label='VGG-A (with BN)')
250
+ plt.xlabel('Step')
251
+ plt.ylabel('Gradient Difference')
252
+ plt.title(f'Max Gradient Difference (lr={lr})')
253
+ plt.legend()
254
+ plt.tight_layout()
255
+ plt.savefig(f'vgg_max_grad_diff_lr{lr}.png')
256
+ plt.close()
VGG_BatchNorm/data/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ '''
2
+ Scripts to download and generate data
3
+ '''
4
+
5
+ from . import loaders
VGG_BatchNorm/data/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (248 Bytes). View file
 
VGG_BatchNorm/data/__pycache__/__init__.cpython-37.pyc ADDED
Binary file (259 Bytes). View file
 
VGG_BatchNorm/data/__pycache__/loaders.cpython-310.pyc ADDED
Binary file (1.93 kB). View file
 
VGG_BatchNorm/data/__pycache__/loaders.cpython-37.pyc ADDED
Binary file (1.9 kB). View file
 
VGG_BatchNorm/data/cifar-10-python.tar.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f45163964244cea80d8b2367396f1a64e529767c1a4e2c0c91c67b8ac2f691e1
3
+ size 5226496
VGG_BatchNorm/data/loaders.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Data loaders
3
+ """
4
+ import matplotlib as mpl
5
+ mpl.use('Agg')
6
+ import matplotlib.pyplot as plt
7
+ import numpy as np
8
+ from torch.utils.data import DataLoader, Dataset
9
+ from torchvision import transforms
10
+ import torchvision.datasets as datasets
11
+
12
+
13
+
14
+ class PartialDataset(Dataset):
15
+ def __init__(self, dataset, n_items=10):
16
+ self.dataset = dataset
17
+ self.n_items = n_items
18
+
19
+ def __getitem__(self):
20
+ return self.dataset.__getitem__()
21
+
22
+ def __len__(self):
23
+ return min(self.n_items, len(self.dataset))
24
+
25
+
26
+ def get_cifar_loader(root='../data/', batch_size=128, train=True, shuffle=True, num_workers=4, n_items=-1):
27
+ normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5],
28
+ std=[0.5, 0.5, 0.5])
29
+
30
+ data_transforms = transforms.Compose(
31
+ [transforms.ToTensor(),
32
+ normalize])
33
+
34
+ dataset = datasets.CIFAR10(root=root, train=train, download=True, transform=data_transforms)
35
+ if n_items > 0:
36
+ dataset = PartialDataset(dataset, n_items)
37
+
38
+ loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
39
+
40
+ return loader
41
+
42
+ if __name__ == '__main__':
43
+ train_loader = get_cifar_loader()
44
+ for X, y in train_loader:
45
+ print(X[0])
46
+ print(y[0])
47
+ print(X[0].shape)
48
+ img = np.transpose(X[0], [1,2,0])
49
+ plt.imshow(img*0.5 + 0.5)
50
+ plt.savefig('sample.png')
51
+ print(X[0].max())
52
+ print(X[0].min())
53
+ break
VGG_BatchNorm/loss.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ 1.7658132662248733
2
+ 1.2146569554458189
3
+ 0.9086841719839579
4
+ 0.7367523306471002
5
+ 0.6139206301678172
VGG_BatchNorm/models/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ '''
2
+ Models implementation and training & evaluating functions
3
+ '''
4
+
5
+ from . import vgg
VGG_BatchNorm/models/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (266 Bytes). View file
 
VGG_BatchNorm/models/__pycache__/__init__.cpython-37.pyc ADDED
Binary file (277 Bytes). View file
 
VGG_BatchNorm/models/__pycache__/vgg.cpython-310.pyc ADDED
Binary file (4.93 kB). View file
 
VGG_BatchNorm/models/__pycache__/vgg.cpython-37.pyc ADDED
Binary file (3.96 kB). View file
 
VGG_BatchNorm/models/vgg.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ VGG
3
+ """
4
+ import numpy as np
5
+ from torch import nn
6
+
7
+ from utils.nn import init_weights_
8
+
9
+ # ## Models implementation
10
+ def get_number_of_parameters(model):
11
+ parameters_n = 0
12
+ for parameter in model.parameters():
13
+ parameters_n += np.prod(parameter.shape).item()
14
+
15
+ return parameters_n
16
+
17
+
18
+ class VGG_A(nn.Module):
19
+ """VGG_A model
20
+
21
+ size of Linear layers is smaller since input assumed to be 32x32x3, instead of
22
+ 224x224x3
23
+ """
24
+
25
+ def __init__(self, inp_ch=3, num_classes=10, init_weights=True):
26
+ super().__init__()
27
+
28
+ self.features = nn.Sequential(
29
+ # stage 1
30
+ nn.Conv2d(in_channels=inp_ch, out_channels=64, kernel_size=3, padding=1),
31
+ nn.ReLU(True),
32
+ nn.MaxPool2d(kernel_size=2, stride=2),
33
+
34
+ # stage 2
35
+ nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
36
+ nn.ReLU(True),
37
+ nn.MaxPool2d(kernel_size=2, stride=2),
38
+
39
+ # stage 3
40
+ nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
41
+ nn.ReLU(True),
42
+ nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
43
+ nn.ReLU(True),
44
+ nn.MaxPool2d(kernel_size=2, stride=2),
45
+
46
+ # stage 4
47
+ nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
48
+ nn.ReLU(True),
49
+ nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
50
+ nn.ReLU(True),
51
+ nn.MaxPool2d(kernel_size=2, stride=2),
52
+
53
+ # stage5
54
+ nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
55
+ nn.ReLU(True),
56
+ nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
57
+ nn.ReLU(True),
58
+ nn.MaxPool2d(kernel_size=2, stride=2))
59
+
60
+ self.classifier = nn.Sequential(
61
+ nn.Linear(512 * 1 * 1, 512),
62
+ nn.ReLU(),
63
+ nn.Linear(512, 512),
64
+ nn.ReLU(),
65
+ nn.Linear(512, num_classes))
66
+
67
+ if init_weights:
68
+ self._init_weights()
69
+
70
+ def forward(self, x):
71
+ x = self.features(x)
72
+ x = self.classifier(x.view(-1, 512 * 1 * 1))
73
+ return x
74
+
75
+ def _init_weights(self):
76
+ for m in self.modules():
77
+ init_weights_(m)
78
+
79
+
80
+ class VGG_A_Light(nn.Module):
81
+ def __init__(self, inp_ch=3, num_classes=10):
82
+ super().__init__()
83
+
84
+ self.stage1 = nn.Sequential(
85
+ nn.Conv2d(in_channels=inp_ch, out_channels=16, kernel_size=3, padding=1),
86
+ nn.ReLU(),
87
+ nn.MaxPool2d(kernel_size=2, stride=2))
88
+
89
+ self.stage2 = nn.Sequential(
90
+ nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1),
91
+ nn.ReLU(),
92
+ nn.MaxPool2d(kernel_size=2, stride=2))
93
+ '''
94
+ self.stage3 = nn.Sequential(
95
+ nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
96
+ nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
97
+ nn.ReLU(),
98
+ nn.MaxPool2d(kernel_size=2, stride=2))
99
+
100
+ self.stage4 = nn.Sequential(
101
+ nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
102
+ nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
103
+ nn.ReLU(),
104
+ nn.MaxPool2d(kernel_size=2, stride=2))
105
+
106
+ self.stage5 = nn.Sequential(
107
+ nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
108
+ nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
109
+ nn.ReLU(),
110
+ nn.MaxPool2d(kernel_size=2, stride=2))
111
+ '''
112
+ self.classifier = nn.Sequential(
113
+ nn.Linear(32 * 8 * 8, 128),
114
+ nn.ReLU(),
115
+ nn.Linear(128, 128),
116
+ nn.ReLU(),
117
+ nn.Linear(128, num_classes))
118
+
119
+ def forward(self, x):
120
+ x = self.stage1(x)
121
+ x = self.stage2(x)
122
+ # x = self.stage3(x)
123
+ # x = self.stage4(x)
124
+ # x = self.stage5(x)
125
+ x = self.classifier(x.view(-1, 32 * 8 * 8))
126
+ return x
127
+
128
+
129
+ class VGG_A_Dropout(nn.Module):
130
+ def __init__(self, inp_ch=3, num_classes=10):
131
+ super().__init__()
132
+
133
+ self.stage1 = nn.Sequential(
134
+ nn.Conv2d(in_channels=inp_ch, out_channels=64, kernel_size=3, padding=1),
135
+ nn.ReLU(True),
136
+ nn.MaxPool2d(kernel_size=2, stride=2))
137
+
138
+ self.stage2 = nn.Sequential(
139
+ nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
140
+ nn.ReLU(True),
141
+ nn.MaxPool2d(kernel_size=2, stride=2))
142
+
143
+ self.stage3 = nn.Sequential(
144
+ nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
145
+ nn.ReLU(True),
146
+ nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
147
+ nn.ReLU(True),
148
+ nn.MaxPool2d(kernel_size=2, stride=2))
149
+
150
+ self.stage4 = nn.Sequential(
151
+ nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
152
+ nn.ReLU(True),
153
+ nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
154
+ nn.ReLU(True),
155
+ nn.MaxPool2d(kernel_size=2, stride=2))
156
+
157
+ self.stage5 = nn.Sequential(
158
+ nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
159
+ nn.ReLU(True),
160
+ nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
161
+ nn.ReLU(True),
162
+ nn.MaxPool2d(kernel_size=2, stride=2))
163
+
164
+ self.classifier = nn.Sequential(
165
+ nn.Dropout(),
166
+ nn.Linear(512 * 1 * 1, 512),
167
+ nn.ReLU(True),
168
+ nn.Dropout(),
169
+ nn.Linear(512, 512),
170
+ nn.ReLU(True),
171
+ nn.Linear(512, num_classes))
172
+
173
+ def forward(self, x):
174
+ x = self.stage1(x)
175
+ x = self.stage2(x)
176
+ x = self.stage3(x)
177
+ x = self.stage4(x)
178
+ x = self.stage5(x)
179
+ x = self.classifier(x.view(-1, 512 * 1 * 1))
180
+ return x
181
+
182
+
183
+ class VGG_A_BatchNorm(nn.Module):
184
+ def __init__(self, inp_ch=3, num_classes=10, init_weights=True):
185
+ super().__init__()
186
+ self.features = nn.Sequential(
187
+ nn.Conv2d(inp_ch, 64, 3, padding=1),
188
+ nn.BatchNorm2d(64),
189
+ nn.ReLU(True),
190
+ nn.MaxPool2d(2, 2),
191
+
192
+ nn.Conv2d(64, 128, 3, padding=1),
193
+ nn.BatchNorm2d(128),
194
+ nn.ReLU(True),
195
+ nn.MaxPool2d(2, 2),
196
+
197
+ nn.Conv2d(128, 256, 3, padding=1),
198
+ nn.BatchNorm2d(256),
199
+ nn.ReLU(True),
200
+ nn.Conv2d(256, 256, 3, padding=1),
201
+ nn.BatchNorm2d(256),
202
+ nn.ReLU(True),
203
+ nn.MaxPool2d(2, 2),
204
+
205
+ nn.Conv2d(256, 512, 3, padding=1),
206
+ nn.BatchNorm2d(512),
207
+ nn.ReLU(True),
208
+ nn.Conv2d(512, 512, 3, padding=1),
209
+ nn.BatchNorm2d(512),
210
+ nn.ReLU(True),
211
+ nn.MaxPool2d(2, 2),
212
+
213
+ nn.Conv2d(512, 512, 3, padding=1),
214
+ nn.BatchNorm2d(512),
215
+ nn.ReLU(True),
216
+ nn.Conv2d(512, 512, 3, padding=1),
217
+ nn.BatchNorm2d(512),
218
+ nn.ReLU(True),
219
+ nn.MaxPool2d(2, 2)
220
+ )
221
+ self.classifier = nn.Sequential(
222
+ nn.Linear(512 * 1 * 1, 512),
223
+ nn.ReLU(),
224
+ nn.Linear(512, 512),
225
+ nn.ReLU(),
226
+ nn.Linear(512, num_classes)
227
+ )
228
+ if init_weights:
229
+ self._init_weights()
230
+ def forward(self, x):
231
+ x = self.features(x)
232
+ x = self.classifier(x.view(-1, 512 * 1 * 1))
233
+ return x
234
+ def _init_weights(self):
235
+ for m in self.modules():
236
+ init_weights_(m)
237
+
238
+
239
+ if __name__ == '__main__':
240
+ print(get_number_of_parameters(VGG_A()))
241
+ print(get_number_of_parameters(VGG_A_Light()))
242
+ print(get_number_of_parameters(VGG_A_Dropout()))
243
+ print(get_number_of_parameters(VGG_A_BatchNorm()))
VGG_BatchNorm/models/vgg_a_lr0.0001.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:105171da7fd7bf680dd5d4c6d9484019a469849021e775edade244a84b139bfb
3
+ size 39011786
VGG_BatchNorm/models/vgg_a_lr0.0001_step_grads.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40fd79ad6ea0064b44c91905f42549bac9f3e4439f2ac7b3e4f4b61681b7a81f
3
+ size 31408
VGG_BatchNorm/models/vgg_a_lr0.0001_step_losses.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc50830495cff83662ea2603a3b39b8f882f45341a6aa95ac8cc9a731c315196
3
+ size 31408
VGG_BatchNorm/models/vgg_a_lr0.0005.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3db4e136d92dc94a1f8601d7700e922e466a049e91891840f0374d6d27c98db
3
+ size 39011786
VGG_BatchNorm/models/vgg_a_lr0.0005_step_grads.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5703f3db77174dc12448202f46dac32866fa7ce47383c961b0c0d85bfc4c89b7
3
+ size 31408
VGG_BatchNorm/models/vgg_a_lr0.0005_step_losses.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3838ee654dddc55efa6b63f65594901803a410f17e90e2d2402ba51127c94db
3
+ size 31408
VGG_BatchNorm/models/vgg_a_lr0.001.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7bab30b087815d4470cb665e2aeadcb2c2c786f5f21746bcf07e93cef1e3669
3
+ size 39011760
VGG_BatchNorm/models/vgg_a_lr0.001_step_grads.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68a558422f2f2eb6233e061ed0e24fea94a7525074255f8f91dd5d6ed6b2dd1c
3
+ size 31408
VGG_BatchNorm/models/vgg_a_lr0.001_step_losses.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7f74eebcff55116c216fbaca90a56280090d6a01ab7223cad91d8cb42c97747
3
+ size 31408
VGG_BatchNorm/models/vgg_a_lr0.002.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5e8453a024e60b3bb8e680b0aeed1d17bcf1545b2de4ca030f57513e0c5f7be
3
+ size 39011760
VGG_BatchNorm/models/vgg_a_lr0.002_step_grads.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a22fc25871c1a180f423cc498b60f1083d07c2e45d605939889b5c90a35afcfa
3
+ size 31408
VGG_BatchNorm/models/vgg_a_lr0.002_step_losses.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7868e3df3d4ae5b25889df52f98ef5e4bb98dcd741f62667b4671b3e16ae981
3
+ size 31408
VGG_BatchNorm/models/vgg_bn_lr0.0001.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9451b3372e6897dd96be45a1902a9629e8f313e9019162d8f9ce5251ffc542a3
3
+ size 39068716
VGG_BatchNorm/models/vgg_bn_lr0.0001_step_grads.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8880316d60e582db12979390d79b6f5df7a1b0c3b31e90ff164de5f0930d0aef
3
+ size 31408
VGG_BatchNorm/models/vgg_bn_lr0.0001_step_losses.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a2ee1fe92633edf653d27d5139c882ffdd331319832aedf391a9d8f9a2c42e8
3
+ size 31408
VGG_BatchNorm/models/vgg_bn_lr0.0005.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17b108a6b387ba7f8dbaf5f8192cebd96e753d3b2eec071fdb9047ec18ac56af
3
+ size 39068716
VGG_BatchNorm/models/vgg_bn_lr0.0005_step_grads.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75477b2424055e4773105ed1c3e30b3f865263d93620cdb0ab8ea8cc7ab90e5b
3
+ size 31408
VGG_BatchNorm/models/vgg_bn_lr0.0005_step_losses.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3261b9564be6ae816ec048ac8189aec7e16eb0b9ed6a31a4851356a19f09ce04
3
+ size 31408
VGG_BatchNorm/models/vgg_bn_lr0.001.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9765324e222a9d6ff7b51ae6b5f3f97a1e1cc99197654f906f8e7611b9b8d193
3
+ size 39068650
VGG_BatchNorm/models/vgg_bn_lr0.001_step_grads.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a2224404b4ba0e30fb143ddbaee1976528671afe3b2f25a14499dc7dd63a87d
3
+ size 31408
VGG_BatchNorm/models/vgg_bn_lr0.001_step_losses.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50030e0c9e31a85a981d7b60d0094b758fa8efb37f89ca309ec7025cfa71787e
3
+ size 31408
VGG_BatchNorm/models/vgg_bn_lr0.002.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1488c03104e9de23b07a9deb22ef6afb1ba3b7f6aaa5dad9ba5a46a4c51082c4
3
+ size 39068650
VGG_BatchNorm/models/vgg_bn_lr0.002_step_grads.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebf1d2594a3cd9783d29c71a69d124d00928c24ba50d143dfb58c72cd06f601b
3
+ size 31408
VGG_BatchNorm/models/vgg_bn_lr0.002_step_losses.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:215478b68ca6ea0590bc76f7353771a923a1f10422c5b9dfa022526d1ad0d41b
3
+ size 31408
VGG_BatchNorm/utils/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ '''
2
+ Several utils, in particular for experiments
3
+ '''
4
+ from . import nn
VGG_BatchNorm/utils/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (251 Bytes). View file
 
VGG_BatchNorm/utils/__pycache__/__init__.cpython-37.pyc ADDED
Binary file (262 Bytes). View file
 
VGG_BatchNorm/utils/__pycache__/nn.cpython-310.pyc ADDED
Binary file (788 Bytes). View file
 
VGG_BatchNorm/utils/__pycache__/nn.cpython-37.pyc ADDED
Binary file (771 Bytes). View file
 
VGG_BatchNorm/utils/nn.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utils for neural networks
3
+ """
4
+
5
+ from torch import nn
6
+
7
+
8
+ def init_weights_(m):
9
+ """
10
+ Initializes weights of m according to Xavier normal method.
11
+
12
+ :param m: module
13
+ :return:
14
+ """
15
+ if isinstance(m, nn.Conv2d):
16
+ nn.init.xavier_normal_(m.weight)
17
+ if m.bias is not None:
18
+ nn.init.zeros_(m.bias)
19
+
20
+ elif isinstance(m, nn.BatchNorm2d):
21
+ nn.init.ones_(m.weight)
22
+ nn.init.zeros_(m.bias)
23
+
24
+ elif isinstance(m, nn.BatchNorm1d):
25
+ nn.init.ones_(m.weight)
26
+ nn.init.zeros_(m.bias)
27
+
28
+ elif isinstance(m, nn.Linear):
29
+ nn.init.xavier_normal_(m.weight)
30
+ if m.bias is not None:
31
+ nn.init.zeros_(m.bias)
VGG_BatchNorm/vgg_bn_compare.png ADDED
VGG_BatchNorm/vgg_bn_loss_landscape.png ADDED
VGG_BatchNorm/vgg_grad_norm_curve_lr0.0001.png ADDED