import gradio as gr
import torch
from torchvision import models, transforms
from timeit import default_timer as timer
from typing import Tuple, Dict

# Setup class names
with open("class_names.txt", "r") as f:
    class_names = [food_name.strip() for food_name in f.readlines()]

# Load MobileNetV2 model
mobilenetv2 = models.mobilenet_v2(weights=None)
mobilenetv2.load_state_dict(
    torch.load(
        f="model_state_dict.pth",
        map_location=torch.device("cpu"),
    )
)
mobilenetv2.eval()

# Define transforms
mobilenetv2_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Predict function
def predict(img) -> Tuple[Dict, float]:
    start_time = timer()
    img = mobilenetv2_transforms(img).unsqueeze(0)
    mobilenetv2.eval()
    with torch.no_grad():
        pred_probs = torch.softmax(mobilenetv2(img), dim=1)
        pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))}
    pred_time = round(timer() - start_time, 5)
    return pred_labels_and_probs, pred_time

# Gradio app
title = "🍲 Food Image Classification with MobileNetV2 🍕"
description = """
Upload an image of your food, and this model will predict what it is! 🍽️

The model can identify the following 5 types of food:
1. 🍛 **Chicken Curry**
2. 🍚 **Fried Rice**
3. 🍦 **Ice Cream**
4. 🍕 **Pizza**
5. 🥟 **Samosa**

Just upload your image and get the probabilities for each class!
"""

demo = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="pil"),
    outputs=[
        gr.Label(num_top_classes=5, label="Predictions"),
        gr.Number(label="Prediction time (s)"),
    ],
    title=title,
    description=description,
)

demo.launch()