| import torch |
| import torch.nn as nn |
| from PIL import Image |
| from torchvision.transforms import ToTensor |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
|
| class MCNN(nn.Module): |
| def __init__(self): |
| super(MCNN, self).__init__() |
|
|
| |
| self.conv1 = nn.Conv2d(3, 64, 3, 1, 1) |
| self.bn1 = nn.BatchNorm2d(64) |
| |
| self.conv2 = nn.Conv2d(64, 128, 3, 1, 1) |
| self.bn2 = nn.BatchNorm2d(128) |
| |
| self.conv3 = nn.Conv2d(128, 256, 3, 1, 1) |
| self.bn3 = nn.BatchNorm2d(256) |
|
|
| self.conv4 = nn.Conv2d(256, 512, 3, 1, 1) |
| self.bn4 = nn.BatchNorm2d(512) |
|
|
| |
| self.pool = nn.MaxPool2d(2, 2) |
|
|
| |
| self.fc1 = nn.Linear(100352, 2048) |
| self.fc2 = nn.Linear(2048, 1024) |
| self.fc3 = nn.Linear(1024, 512) |
| self.fc4 = nn.Linear(512, 256) |
| self.fc5 = nn.Linear(256, 2) |
|
|
| |
| self.relu = nn.ReLU() |
| self.dropout = nn.Dropout(0.2) |
|
|
| def forward(self, pixel_values, labels=None): |
| x = self.pool(self.relu(self.bn1(self.conv1(pixel_values)))) |
| x = self.pool(self.relu(self.bn2(self.conv2(x)))) |
| x = self.pool(self.relu(self.bn3(self.conv3(x)))) |
| x = self.pool(self.relu(self.bn4(self.conv4(x)))) |
|
|
| x = x.view(x.size(0), -1) |
| x = self.dropout(self.relu(self.fc1(x))) |
| x = self.dropout(self.relu(self.fc2(x))) |
| x = self.dropout(self.relu(self.fc3(x))) |
| x = self.dropout(self.relu(self.fc4(x))) |
| logits = self.fc5(x) |
|
|
| loss = None |
| if labels is not None: |
| loss_fct = nn.CrossEntropyLoss() |
| loss = loss_fct(logits.view(-1, 2), labels.view(-1)) |
|
|
| if loss is not None: |
| return logits, loss.item() |
| else: |
| return logits, None |
| |
| def preprocess_image(img, desired_size=224): |
| im = img |
|
|
| |
| old_size = im.size |
| ratio = float(desired_size) / max(old_size) |
| new_size = tuple([int(x*ratio) for x in old_size]) |
| im = im.resize(new_size) |
|
|
| |
| new_im = Image.new("RGB", (desired_size, desired_size), "white") |
| new_im.paste(im, ((desired_size-new_size[0])//2, |
| (desired_size-new_size[1])//2)) |
| return new_im |
|
|
| def predict_image(image, model): |
| |
| model.eval() |
|
|
| |
| transform = ToTensor() |
| input_tensor = transform(image) |
| input_batch = input_tensor.unsqueeze(0) |
|
|
| |
| input_batch = input_batch.to(device) |
|
|
| |
| output = model(input_batch) |
|
|
| |
| probabilities = torch.nn.functional.softmax(output[0], dim=1) |
|
|
| return probabilities.cpu().detach().numpy() |