IvanBanny commited on
Commit
340ccea
·
0 Parent(s):

initialized repo

Browse files
Files changed (4) hide show
  1. .gitignore +4 -0
  2. README.md +2 -0
  3. model.py +70 -0
  4. scene_classification.py +306 -0
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ data
2
+ test.txt
3
+ venv
4
+ .idea
README.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Places-ResNet
2
+ My experiment training a ResNet-inspired model for image classification using PyTorch.
model.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+
5
+ class ResidualBlock(nn.Module):
6
+ def __init__(self, in_channels, out_channels):
7
+ super(ResidualBlock, self).__init__()
8
+ self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
9
+ self.bn1 = nn.BatchNorm2d(out_channels)
10
+ self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
11
+ self.bn2 = nn.BatchNorm2d(out_channels)
12
+
13
+ # Skip connection (identity mapping)
14
+ self.skip_connection = nn.Sequential()
15
+ if in_channels != out_channels:
16
+ self.skip_connection = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
17
+
18
+ def forward(self, x):
19
+ residual = self.skip_connection(x)
20
+ out = nn.functional.relu(self.bn1(self.conv1(x)))
21
+ out = self.bn2(self.conv2(out))
22
+ out += residual # Adding the skip connection
23
+ out = nn.functional.relu(out)
24
+ return out
25
+
26
+
27
+ class MyModel(nn.Module):
28
+ def __init__(self, num_classes=100):
29
+ super(MyModel, self).__init__()
30
+
31
+ # Initial convolutional layer
32
+ self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
33
+ self.bn1 = nn.BatchNorm2d(64)
34
+ self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
35
+
36
+ # Residual blocks
37
+ self.layer1 = self._resnet_layers(64, 128, num_blocks=2) # 2 residual blocks
38
+ self.layer2 = self._resnet_layers(128, 256, num_blocks=2) # 2 residual blocks
39
+ self.layer3 = self._resnet_layers(256, 512, num_blocks=2) # 2 residual blocks
40
+
41
+ # Global average pooling
42
+ self.global_avg_pool = nn.AdaptiveAvgPool2d(1)
43
+
44
+ # Combine features
45
+ self.features = nn.Sequential(
46
+ self.conv1,
47
+ self.bn1,
48
+ nn.ReLU(),
49
+ self.pool1,
50
+ self.layer1,
51
+ self.layer2,
52
+ self.layer3,
53
+ self.global_avg_pool
54
+ )
55
+
56
+ # Fully connected layer
57
+ self.fc = nn.Linear(512, num_classes)
58
+
59
+ @staticmethod
60
+ def _resnet_layers(in_channels, out_channels, num_blocks):
61
+ return nn.Sequential(
62
+ ResidualBlock(in_channels, out_channels),
63
+ *[ResidualBlock(out_channels, out_channels) for _ in range(num_blocks)]
64
+ )
65
+
66
+ def forward(self, x):
67
+ x = self.features(x)
68
+ x = torch.flatten(x, 1) # Flatten the output for the fully connected layer
69
+ x = self.fc(x)
70
+ return x
scene_classification.py ADDED
@@ -0,0 +1,306 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import csv
3
+ from tqdm import tqdm
4
+ import torch
5
+ import argparse
6
+ from PIL import Image
7
+ from torchvision import transforms
8
+ from torch.utils.data import DataLoader, Dataset
9
+ from model import MyModel
10
+
11
+
12
+ class MiniPlaces(Dataset):
13
+ def __init__(self, root_dir, split, transform=None, label_dict=None):
14
+ """
15
+ Initialize the MiniPlaces dataset with the root directory for the images,
16
+ the split (train/val/test), an optional data transformation,
17
+ and an optional label dictionary.
18
+
19
+ Args:
20
+ root_dir (str): Root directory for the MiniPlaces images.
21
+ split (str): Split to use ('train', 'val', or 'test').
22
+ transform (callable, optional): Optional data transformation to apply to the images.
23
+ label_dict (dict, optional): Optional dictionary mapping integer labels to class names.
24
+ """
25
+ assert split in ['train', 'val', 'test']
26
+ self.root_dir = root_dir
27
+ self.split = split
28
+ self.transform = transform
29
+ self.filenames = []
30
+ self.labels = []
31
+
32
+ self.label_dict = label_dict if label_dict is not None else {}
33
+
34
+ with open(os.path.join(self.root_dir, self.split + '.txt')) as r:
35
+ lines = r.readlines()
36
+ for line in lines:
37
+ line = line.split()
38
+ self.filenames.append(line[0])
39
+ if split == 'test':
40
+ label = line[0]
41
+ else:
42
+ label = int(line[1])
43
+ self.labels.append(label)
44
+ if split == 'train':
45
+ text_label = line[0].split('/')[2]
46
+ self.label_dict[label] = text_label
47
+
48
+ def __len__(self):
49
+ """
50
+ Return the number of images in the dataset.
51
+
52
+ Returns:
53
+ int: Number of images in the dataset.
54
+ """
55
+ return len(self.labels)
56
+
57
+ def __getitem__(self, idx):
58
+ """
59
+ Return a single image and its corresponding label when given an index.
60
+
61
+ Args:
62
+ idx (int): Index of the image to retrieve.
63
+
64
+ Returns:
65
+ tuple: Tuple containing the image and its label.
66
+ """
67
+ if self.transform is not None:
68
+ image = self.transform(
69
+ Image.open(os.path.join(self.root_dir, "images", self.filenames[idx])))
70
+ else:
71
+ image = Image.open(os.path.join(self.root_dir, "images", self.filenames[idx]))
72
+ label = self.labels[idx]
73
+ return image, label
74
+
75
+
76
+ def evaluate(model, test_loader, criterion, device):
77
+ """
78
+ Evaluate the CNN classifier on the validation set.
79
+
80
+ Args:
81
+ model (CNN): CNN classifier to evaluate.
82
+ test_loader (torch.utils.data.DataLoader): Data loader for the test set.
83
+ criterion (callable): Loss function to use for evaluation.
84
+ device (torch.device): Device to use for evaluation.
85
+
86
+ Returns:
87
+ float: Average loss on the test set.
88
+ float: Accuracy on the test set.
89
+ """
90
+ model.eval() # Set model to evaluation mode
91
+
92
+ with torch.no_grad():
93
+ total_loss = 0.0
94
+ num_correct = 0
95
+ num_samples = 0
96
+
97
+ for inputs, labels in test_loader:
98
+ # Move inputs and labels to device
99
+ inputs = inputs.to(device)
100
+ labels = labels.to(device)
101
+
102
+ # Compute the logits and loss
103
+ logits = model(inputs)
104
+ loss = criterion(logits, labels)
105
+ total_loss += loss.item()
106
+
107
+ # Compute the accuracy
108
+ _, predictions = torch.max(logits, dim=1)
109
+ num_correct += (predictions == labels).sum().item()
110
+ num_samples += len(inputs)
111
+
112
+ # Evaluate the model on the validation set
113
+ avg_loss = total_loss / len(test_loader)
114
+ accuracy = num_correct / num_samples
115
+
116
+ return avg_loss, accuracy
117
+
118
+
119
+ def train(model, train_loader, val_loader, optimizer, criterion, device,
120
+ num_epochs):
121
+ """
122
+ Train the CNN classifer on the training set and evaluate it on the validation set every epoch.
123
+
124
+ Args:
125
+ model (CNN): CNN classifier to train.
126
+ train_loader (torch.utils.data.DataLoader): Data loader for the training set.
127
+ val_loader (torch.utils.data.DataLoader): Data loader for the validation set.
128
+ optimizer (torch.optim.Optimizer): Optimizer to use for training.
129
+ criterion (callable): Loss function to use for training.
130
+ device (torch.device): Device to use for training.
131
+ num_epochs (int): Number of epochs to train the model.
132
+ """
133
+
134
+ # Place the model on device
135
+ model = model.to(device)
136
+
137
+ for epoch in range(num_epochs):
138
+ model.train() # Set model to training mode
139
+
140
+ running_loss = 0.0 # Track cumulative loss for averaging
141
+ correct_predictions = 0
142
+ total_samples = 0
143
+
144
+ with tqdm(total=len(train_loader),
145
+ desc=f'Epoch {epoch + 1}/{num_epochs}',
146
+ position=0,
147
+ leave=True) as pbar:
148
+ for inputs, labels in train_loader:
149
+ # Move inputs and labels to device
150
+ inputs = inputs.to(device)
151
+ labels = labels.to(device)
152
+
153
+ # Zero the gradients
154
+ optimizer.zero_grad()
155
+
156
+ # Compute the logits and loss
157
+ logits = model(inputs)
158
+ loss = criterion(logits, labels)
159
+
160
+ # Backward pass: Compute gradients
161
+ loss.backward()
162
+
163
+ # Optimize model parameters
164
+ optimizer.step()
165
+
166
+ # Track running loss
167
+ running_loss += loss.item()
168
+
169
+ # Track accuracy
170
+ _, predicted = logits.max(1)
171
+ correct_predictions += (predicted == labels).sum().item()
172
+ total_samples += labels.size(0)
173
+
174
+ # Update the progress bar
175
+ pbar.update(1)
176
+ pbar.set_postfix(loss=loss.item())
177
+
178
+ # Calculate average loss and accuracy
179
+ avg_loss = running_loss / len(train_loader)
180
+ accuracy = correct_predictions / total_samples
181
+
182
+ avg_val_loss, val_accuracy = evaluate(model, val_loader, criterion, device)
183
+ print(
184
+ f"Train Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f} "
185
+ f"Validation Loss: {avg_val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}"
186
+ )
187
+
188
+
189
+ def test(model, test_loader, device):
190
+ """
191
+ Get predictions for the test set.
192
+
193
+ Args:
194
+ model (CNN): classifier to evaluate.
195
+ test_loader (torch.utils.data.DataLoader): Data loader for the test set.
196
+ device (torch.device): Device to use for evaluation.
197
+
198
+ Returns:
199
+ float: Average loss on the test set.
200
+ float: Accuracy on the test set.
201
+ """
202
+ model = model.to(device)
203
+ model.eval() # Set model to evaluation mode
204
+
205
+ with torch.no_grad():
206
+ all_preds = []
207
+
208
+ for inputs, labels in test_loader:
209
+ # Move inputs and labels to device
210
+ inputs = inputs.to(device)
211
+
212
+ logits = model(inputs)
213
+
214
+ _, predictions = torch.max(logits, dim=1)
215
+ preds = list(zip(labels, predictions.tolist()))
216
+ all_preds.extend(preds)
217
+
218
+ return all_preds
219
+
220
+
221
+ def write_predictions(preds, filename):
222
+ with open(filename, 'w') as f:
223
+ writer = csv.writer(f, delimiter=',')
224
+ for im, pred in preds:
225
+ writer.writerow((im, pred))
226
+
227
+
228
+ def main(args):
229
+ image_net_mean = torch.Tensor([0.485, 0.456, 0.406])
230
+ image_net_std = torch.Tensor([0.229, 0.224, 0.225])
231
+
232
+ # Define data transformation
233
+ data_transform = transforms.Compose([
234
+ transforms.ToTensor(),
235
+ transforms.Resize((128, 128)),
236
+ transforms.Normalize(image_net_mean, image_net_std),
237
+ ])
238
+
239
+ data_root = 'data'
240
+
241
+ # Create MiniPlaces dataset object
242
+ miniplaces_train = MiniPlaces(data_root,
243
+ split='train',
244
+ transform=data_transform)
245
+ miniplaces_val = MiniPlaces(data_root,
246
+ split='val',
247
+ transform=data_transform,
248
+ label_dict=miniplaces_train.label_dict)
249
+
250
+ # Create the dataloaders
251
+
252
+ # Define the batch size and number of workers
253
+ batch_size = 64
254
+ num_workers = 2
255
+
256
+ # Create DataLoader for training and validation sets
257
+ train_loader = DataLoader(miniplaces_train,
258
+ batch_size=batch_size,
259
+ num_workers=num_workers,
260
+ shuffle=True)
261
+ val_loader = DataLoader(miniplaces_val,
262
+ batch_size=batch_size,
263
+ num_workers=num_workers,
264
+ shuffle=False)
265
+
266
+ device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # TODO: check cuda
267
+
268
+ model = MyModel(num_classes=len(miniplaces_train.label_dict))
269
+
270
+ # optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-4, amsgrad=False)
271
+ optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, dampening=0, weight_decay=1e-4, nesterov=True)
272
+
273
+ if args.checkpoint:
274
+ checkpoint = torch.load(args.checkpoint)
275
+ model.load_state_dict(checkpoint['model_state_dict'])
276
+ optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
277
+
278
+ criterion = torch.nn.CrossEntropyLoss(reduction='mean', label_smoothing=0.1)
279
+
280
+ if not args.test:
281
+ train(model, train_loader, val_loader, optimizer, criterion,
282
+ device, num_epochs=25)
283
+
284
+ torch.save({'model_state_dict': model.state_dict(),
285
+ 'optimizer_state_dict': optimizer.state_dict()}, 'model.ckpt')
286
+
287
+ else:
288
+ miniplaces_test = MiniPlaces(data_root,
289
+ split='test',
290
+ transform=data_transform)
291
+ test_loader = DataLoader(miniplaces_test,
292
+ batch_size=batch_size,
293
+ num_workers=num_workers,
294
+ shuffle=False)
295
+ checkpoint = torch.load(args.checkpoint, weights_only=True)
296
+ model.load_state_dict(checkpoint['model_state_dict'])
297
+ preds = test(model, test_loader, device)
298
+ write_predictions(preds, 'predictions.csv')
299
+
300
+
301
+ if __name__ == "__main__":
302
+ parser = argparse.ArgumentParser()
303
+ parser.add_argument('--test', action='store_true')
304
+ parser.add_argument('--checkpoint', default='model.ckpt')
305
+ args = parser.parse_args()
306
+ main(args)