from PIL import ImageFile, Image import gradio as gr import torch import torch.nn as nn import torch.nn.functional as F import torchvision.transforms as transforms import pickle from torchvision import transforms, models device = torch.device('cpu') class FineTunedVGG(nn.Module): def __init__(self, num_classes, input_size=224): super(FineTunedVGG, self).__init__() self.vgg = models.vgg16(pretrained=True) self.st = 8 self.blocks = [] for param in self.vgg.parameters(): param.requires_grad = False x = torch.randn(1, 3, input_size, input_size) for idx, layer in enumerate(self.vgg.features): if isinstance(layer, nn.Conv2d): x = layer(x) if idx in [12, 22, 32]: self.blocks.append(x) for idx, block in enumerate(self.blocks): filters = block.size(1) depthwise_conv = nn.Conv2d(filters, filters, kernel_size=3, padding=1, groups=filters) depthwise_sep_conv = nn.Conv2d(filters, 128, kernel_size=1, padding=0) bn = nn.BatchNorm2d(128) pooled_block = nn.MaxPool2d(kernel_size=self.st, stride=self.st) self.st = self.st // 2 self.blocks[idx] = nn.Sequential(depthwise_conv, depthwise_sep_conv, bn, pooled_block) self.vgg.add_module('ConcatenatedBlocks', nn.Sequential(*self.blocks)) self.avgpool = nn.AdaptiveAvgPool2d((1,1)) self.fc = nn.Linear(1000, num_classes) def forward(self, x): x = self.vgg(x) x = x.view(x.size(0), -1) x = self.fc(x) return x model = torch.load("model.pth",map_location ='cpu') with open("encoder.pkl", "rb") as encoder_file: label_encoder = pickle.load(encoder_file) def preprocess_image(image): transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0]), ]) image = transform(image) image = image.unsqueeze(0) return image def recognize_image(image): mean = [0.0, 0.0, 0.0] std = [1.0, 1.0, 1.0] transform_norm = transforms.Compose([transforms.ToTensor(), transforms.Resize((224,224)),transforms.Normalize(mean, std)]) img_normalized = transform_norm(image).float() img_normalized = img_normalized.unsqueeze_(0) img_normalized = img_normalized.to(device) with torch.no_grad(): model.eval() output =model(img_normalized) probs = torch.softmax(output, dim=1)[0].tolist() class_labels = label_encoder.classes_ output_dict = dict(zip(class_labels, map(float, probs))) return output_dict image = gr.inputs.Image(shape=(224,224)) label = gr.outputs.Label(num_top_classes=10) examples = [ 'test_imgs/bike.jpg', 'test_imgs/boat.jpg', 'test_imgs/boat_2.png', 'test_imgs/easybike.jpg', ] iface = gr.Interface(fn=recognize_image, inputs=image, outputs=label, examples=examples) iface.launch(inline=False)