Commit
·
ea78c09
1
Parent(s):
171643a
Final submission
Browse files- resnet_execute.py +0 -220
resnet_execute.py
DELETED
|
@@ -1,220 +0,0 @@
|
|
| 1 |
-
import torch
|
| 2 |
-
import torchvision
|
| 3 |
-
import torchvision.transforms as transforms
|
| 4 |
-
from torch.utils.data import DataLoader
|
| 5 |
-
import torch.nn as nn
|
| 6 |
-
import torch.optim as optim
|
| 7 |
-
from resnet_model import ResNet50
|
| 8 |
-
from tqdm import tqdm
|
| 9 |
-
from torchvision import datasets
|
| 10 |
-
from checkpoint import save_checkpoint, load_checkpoint
|
| 11 |
-
import matplotlib.pyplot as plt
|
| 12 |
-
from torchvision.utils import make_grid
|
| 13 |
-
import albumentations as A
|
| 14 |
-
from albumentations.pytorch import ToTensorV2
|
| 15 |
-
import numpy as np
|
| 16 |
-
from torchsummary import summary
|
| 17 |
-
|
| 18 |
-
# Define transformations
|
| 19 |
-
train_transform = A.Compose([
|
| 20 |
-
A.RandomResizedCrop(height=224, width=224, scale=(0.08, 1.0), ratio=(3/4, 4/3), p=1.0),
|
| 21 |
-
A.HorizontalFlip(p=0.5),
|
| 22 |
-
A.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1, p=0.8),
|
| 23 |
-
A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
|
| 24 |
-
ToTensorV2()
|
| 25 |
-
])
|
| 26 |
-
|
| 27 |
-
test_transform = A.Compose([
|
| 28 |
-
A.Resize(height=256, width=256),
|
| 29 |
-
A.CenterCrop(height=224, width=224),
|
| 30 |
-
A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
|
| 31 |
-
ToTensorV2()
|
| 32 |
-
])
|
| 33 |
-
|
| 34 |
-
# Train dataset and loader
|
| 35 |
-
trainset = datasets.ImageFolder(root='/mnt/imagenet/ILSVRC/Data/CLS-LOC/train', transform=lambda img: train_transform(image=np.array(img))['image'])
|
| 36 |
-
trainloader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=8, pin_memory=True)
|
| 37 |
-
|
| 38 |
-
testset = datasets.ImageFolder(root='/mnt/imagenet/ILSVRC/Data/CLS-LOC/val', transform=lambda img: test_transform(image=np.array(img))['image'])
|
| 39 |
-
testloader = DataLoader(testset, batch_size=500, shuffle=False, num_workers=8, pin_memory=True)
|
| 40 |
-
|
| 41 |
-
# Initialize model, loss function, and optimizer
|
| 42 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 43 |
-
print( device )
|
| 44 |
-
model = ResNet50()
|
| 45 |
-
model = torch.nn.DataParallel(model)
|
| 46 |
-
model = model.to(device)
|
| 47 |
-
summary(model, input_size=(3, 224, 224))
|
| 48 |
-
|
| 49 |
-
criterion = nn.CrossEntropyLoss()
|
| 50 |
-
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
|
| 51 |
-
|
| 52 |
-
# Training function
|
| 53 |
-
from torch.amp import autocast
|
| 54 |
-
|
| 55 |
-
def train(model, device, train_loader, optimizer, criterion, epoch, accumulation_steps=4):
|
| 56 |
-
model.train()
|
| 57 |
-
running_loss = 0.0
|
| 58 |
-
correct1 = 0
|
| 59 |
-
correct5 = 0
|
| 60 |
-
total = 0
|
| 61 |
-
pbar = tqdm(train_loader)
|
| 62 |
-
|
| 63 |
-
for batch_idx, (inputs, targets) in enumerate(pbar):
|
| 64 |
-
inputs, targets = inputs.to(device), targets.to(device)
|
| 65 |
-
|
| 66 |
-
with autocast(device_type='cuda'):
|
| 67 |
-
outputs = model(inputs)
|
| 68 |
-
loss = criterion(outputs, targets) / accumulation_steps
|
| 69 |
-
|
| 70 |
-
loss.backward()
|
| 71 |
-
|
| 72 |
-
if (batch_idx + 1) % accumulation_steps == 0 or (batch_idx + 1) == len(train_loader):
|
| 73 |
-
optimizer.step()
|
| 74 |
-
optimizer.zero_grad()
|
| 75 |
-
|
| 76 |
-
running_loss += loss.item() * accumulation_steps
|
| 77 |
-
_, predicted = outputs.topk(5, 1, True, True)
|
| 78 |
-
total += targets.size(0)
|
| 79 |
-
correct1 += predicted[:, :1].eq(targets.view(-1, 1).expand_as(predicted[:, :1])).sum().item()
|
| 80 |
-
correct5 += predicted.eq(targets.view(-1, 1).expand_as(predicted)).sum().item()
|
| 81 |
-
|
| 82 |
-
pbar.set_description(desc=f'Epoch {epoch} | Loss: {running_loss / (batch_idx + 1):.4f} | Top-1 Acc: {100. * correct1 / total:.2f} | Top-5 Acc: {100. * correct5 / total:.2f}')
|
| 83 |
-
|
| 84 |
-
if (batch_idx + 1) % 50 == 0:
|
| 85 |
-
torch.cuda.empty_cache()
|
| 86 |
-
|
| 87 |
-
return 100. * correct1 / total, 100. * correct5 / total, running_loss / len(train_loader)
|
| 88 |
-
|
| 89 |
-
# Testing function
|
| 90 |
-
def test(model, device, test_loader, criterion):
|
| 91 |
-
model.eval()
|
| 92 |
-
test_loss = 0
|
| 93 |
-
correct1 = 0
|
| 94 |
-
correct5 = 0
|
| 95 |
-
total = 0
|
| 96 |
-
misclassified_images = []
|
| 97 |
-
misclassified_labels = []
|
| 98 |
-
misclassified_preds = []
|
| 99 |
-
|
| 100 |
-
with torch.no_grad():
|
| 101 |
-
for inputs, targets in test_loader:
|
| 102 |
-
inputs, targets = inputs.to(device), targets.to(device)
|
| 103 |
-
outputs = model(inputs)
|
| 104 |
-
loss = criterion(outputs, targets)
|
| 105 |
-
|
| 106 |
-
test_loss += loss.item()
|
| 107 |
-
_, predicted = outputs.topk(5, 1, True, True)
|
| 108 |
-
total += targets.size(0)
|
| 109 |
-
correct1 += predicted[:, :1].eq(targets.view(-1, 1).expand_as(predicted[:, :1])).sum().item()
|
| 110 |
-
correct5 += predicted.eq(targets.view(-1, 1).expand_as(predicted)).sum().item()
|
| 111 |
-
|
| 112 |
-
# Collect misclassified samples
|
| 113 |
-
for i in range(inputs.size(0)):
|
| 114 |
-
if targets[i] not in predicted[i, :1]:
|
| 115 |
-
misclassified_images.append(inputs[i].cpu())
|
| 116 |
-
misclassified_labels.append(targets[i].cpu())
|
| 117 |
-
misclassified_preds.append(predicted[i, :1].cpu())
|
| 118 |
-
|
| 119 |
-
test_accuracy1 = 100. * correct1 / total
|
| 120 |
-
test_accuracy5 = 100. * correct5 / total
|
| 121 |
-
print(f'Test Loss: {test_loss/len(test_loader):.4f}, Top-1 Accuracy: {test_accuracy1:.2f}, Top-5 Accuracy: {test_accuracy5:.2f}')
|
| 122 |
-
return test_accuracy1, test_accuracy5, test_loss / len(test_loader), misclassified_images, misclassified_labels, misclassified_preds
|
| 123 |
-
|
| 124 |
-
# Main execution
|
| 125 |
-
if __name__ == '__main__':
|
| 126 |
-
# Early stopping parameters and checkpoint path
|
| 127 |
-
checkpoint_path = "checkpoint.pth"
|
| 128 |
-
best_loss = float('inf')
|
| 129 |
-
patience = 5
|
| 130 |
-
patience_counter = 0
|
| 131 |
-
# Load checkpoint if it exists to resume training
|
| 132 |
-
try:
|
| 133 |
-
model, optimizer, best_test_accuracy = load_checkpoint(model, optimizer, checkpoint_path)
|
| 134 |
-
except FileNotFoundError:
|
| 135 |
-
print("No checkpoint found, starting from scratch.")
|
| 136 |
-
|
| 137 |
-
# Store results for each epoch
|
| 138 |
-
results = []
|
| 139 |
-
learning_rates = []
|
| 140 |
-
|
| 141 |
-
for epoch in range(1, 26): # 20 epochs
|
| 142 |
-
train_accuracy1, train_accuracy5, train_loss = train(model, device, trainloader, optimizer, criterion, epoch)
|
| 143 |
-
test_accuracy1, test_accuracy5, test_loss, misclassified_images, misclassified_labels, misclassified_preds = test(model, device, testloader, criterion)
|
| 144 |
-
print(f'Epoch {epoch} | Train Top-1 Acc: {train_accuracy1:.2f} | Train Top-5 Acc: {train_accuracy5:.2f} | Test Top-1 Acc: {test_accuracy1:.2f} | Test Top-5 Acc: {test_accuracy5:.2f}')
|
| 145 |
-
|
| 146 |
-
# Append results for this epoch
|
| 147 |
-
results.append((epoch, train_accuracy1, train_accuracy5, test_accuracy1, test_accuracy5, train_loss, test_loss))
|
| 148 |
-
learning_rates.append(optimizer.param_groups[0]['lr'])
|
| 149 |
-
|
| 150 |
-
if test_loss < best_loss:
|
| 151 |
-
best_loss = test_loss
|
| 152 |
-
patience_counter = 0
|
| 153 |
-
save_checkpoint(model, optimizer, epoch, test_loss, checkpoint_path)
|
| 154 |
-
else:
|
| 155 |
-
patience_counter += 1
|
| 156 |
-
|
| 157 |
-
if patience_counter >= patience:
|
| 158 |
-
print("Early stopping triggered. Training terminated.")
|
| 159 |
-
break
|
| 160 |
-
|
| 161 |
-
# Only process misclassified samples after the last epoch
|
| 162 |
-
if epoch == 25:
|
| 163 |
-
# Display or process misclassified samples
|
| 164 |
-
if misclassified_images:
|
| 165 |
-
print("\nDisplaying some misclassified samples from the last epoch:")
|
| 166 |
-
misclassified_grid = make_grid(misclassified_images[:16], nrow=4, normalize=True, scale_each=True)
|
| 167 |
-
plt.figure(figsize=(8, 8))
|
| 168 |
-
plt.imshow(misclassified_grid.permute(1, 2, 0))
|
| 169 |
-
plt.title("Misclassified Samples")
|
| 170 |
-
plt.axis('off')
|
| 171 |
-
plt.show()
|
| 172 |
-
|
| 173 |
-
# Print the Top-1 accuracy results in a tab-separated format
|
| 174 |
-
print("\nEpoch\tTrain Top-1 Accuracy\tTest Top-1 Accuracy")
|
| 175 |
-
for epoch, train_acc1, test_acc1, *_ in results:
|
| 176 |
-
print(f"{epoch}\t{train_acc1:.2f}\t{test_acc1:.2f}")
|
| 177 |
-
|
| 178 |
-
# Plotting
|
| 179 |
-
epochs = [r[0] for r in results]
|
| 180 |
-
train_acc1 = [r[1] for r in results]
|
| 181 |
-
train_acc5 = [r[2] for r in results]
|
| 182 |
-
test_acc1 = [r[3] for r in results]
|
| 183 |
-
test_acc5 = [r[4] for r in results]
|
| 184 |
-
train_losses = [r[5] for r in results]
|
| 185 |
-
test_losses = [r[6] for r in results]
|
| 186 |
-
|
| 187 |
-
plt.figure(figsize=(12, 8))
|
| 188 |
-
plt.subplot(2, 2, 1)
|
| 189 |
-
plt.plot(epochs, train_acc1, label='Train Top-1 Acc')
|
| 190 |
-
plt.plot(epochs, test_acc1, label='Test Top-1 Acc')
|
| 191 |
-
plt.xlabel('Epoch')
|
| 192 |
-
plt.ylabel('Accuracy')
|
| 193 |
-
plt.legend()
|
| 194 |
-
plt.title('Top-1 Accuracy')
|
| 195 |
-
|
| 196 |
-
plt.subplot(2, 2, 2)
|
| 197 |
-
plt.plot(epochs, train_acc5, label='Train Top-5 Acc')
|
| 198 |
-
plt.plot(epochs, test_acc5, label='Test Top-5 Acc')
|
| 199 |
-
plt.xlabel('Epoch')
|
| 200 |
-
plt.ylabel('Accuracy')
|
| 201 |
-
plt.legend()
|
| 202 |
-
plt.title('Top-5 Accuracy')
|
| 203 |
-
|
| 204 |
-
plt.subplot(2, 2, 3)
|
| 205 |
-
plt.plot(epochs, train_losses, label='Train Loss')
|
| 206 |
-
plt.plot(epochs, test_losses, label='Test Loss')
|
| 207 |
-
plt.xlabel('Epoch')
|
| 208 |
-
plt.ylabel('Loss')
|
| 209 |
-
plt.legend()
|
| 210 |
-
plt.title('Loss')
|
| 211 |
-
|
| 212 |
-
plt.subplot(2, 2, 4)
|
| 213 |
-
plt.plot(epochs, learning_rates, label='Learning Rate')
|
| 214 |
-
plt.xlabel('Epoch')
|
| 215 |
-
plt.ylabel('Learning Rate')
|
| 216 |
-
plt.legend()
|
| 217 |
-
plt.title('Learning Rate')
|
| 218 |
-
|
| 219 |
-
plt.tight_layout()
|
| 220 |
-
plt.show()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|