from PIL import ImageFile, Image
import gradio as gr
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import pickle
from torchvision import transforms, models

device = torch.device('cpu')

class FineTunedVGG(nn.Module):
    def __init__(self, num_classes, input_size=224):
        super(FineTunedVGG, self).__init__()
        self.vgg = models.vgg16(pretrained=True)
        
        self.st = 8
        self.blocks = []
        for param in self.vgg.parameters():
            param.requires_grad = False
        
        x = torch.randn(1, 3, input_size, input_size)
        for idx, layer in enumerate(self.vgg.features):
            if isinstance(layer, nn.Conv2d):
                x = layer(x)
                if idx in [12, 22, 32]:  
                    self.blocks.append(x)
        
        
        for idx, block in enumerate(self.blocks):
        
            filters = block.size(1)
            depthwise_conv = nn.Conv2d(filters, filters, kernel_size=3, padding=1, groups=filters)
            depthwise_sep_conv = nn.Conv2d(filters, 128, kernel_size=1, padding=0)
            bn = nn.BatchNorm2d(128)
            pooled_block = nn.MaxPool2d(kernel_size=self.st, stride=self.st)
            self.st = self.st // 2
            self.blocks[idx] = nn.Sequential(depthwise_conv, depthwise_sep_conv, bn, pooled_block)
        
        self.vgg.add_module('ConcatenatedBlocks', nn.Sequential(*self.blocks))
        
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(1000, num_classes) 
    
    def forward(self, x):
        x = self.vgg(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

model = torch.load("model.pth",map_location ='cpu')
with open("encoder.pkl", "rb") as encoder_file:
    label_encoder = pickle.load(encoder_file)
    
def preprocess_image(image):
    transform = transforms.Compose([
        transforms.Resize((224, 224)), 
        transforms.ToTensor(),         
       transforms.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0]),
    ])
    image = transform(image)
    image = image.unsqueeze(0)  
    return image


def recognize_image(image):
   mean = [0.0, 0.0, 0.0]
   std = [1.0, 1.0, 1.0]
   transform_norm = transforms.Compose([transforms.ToTensor(), 
   transforms.Resize((224,224)),transforms.Normalize(mean, std)])

   img_normalized = transform_norm(image).float()
   img_normalized = img_normalized.unsqueeze_(0)

   img_normalized = img_normalized.to(device)

   with torch.no_grad():
      model.eval()  
      output =model(img_normalized)
      probs = torch.softmax(output, dim=1)[0].tolist()  
      class_labels = label_encoder.classes_  
      output_dict = dict(zip(class_labels, map(float, probs)))
      return output_dict
    
image = gr.inputs.Image(shape=(224,224))
label = gr.outputs.Label(num_top_classes=10)

examples = [
    'test_imgs/bike.jpg',
    'test_imgs/boat.jpg',
    'test_imgs/boat_2.png',
    'test_imgs/easybike.jpg',
    ]
    
    
iface = gr.Interface(fn=recognize_image, inputs=image, outputs=label, examples=examples)
iface.launch(inline=False)