import gradio as gr import torch from torchvision import models, transforms from timeit import default_timer as timer from typing import Tuple, Dict # Setup class names with open("class_names.txt", "r") as f: class_names = [food_name.strip() for food_name in f.readlines()] # Load MobileNetV2 model mobilenetv2 = models.mobilenet_v2(weights=None) mobilenetv2.load_state_dict( torch.load( f="model_state_dict.pth", map_location=torch.device("cpu"), ) ) mobilenetv2.eval() # Define transforms mobilenetv2_transforms = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) # Predict function def predict(img) -> Tuple[Dict, float]: start_time = timer() img = mobilenetv2_transforms(img).unsqueeze(0) mobilenetv2.eval() with torch.no_grad(): pred_probs = torch.softmax(mobilenetv2(img), dim=1) pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))} pred_time = round(timer() - start_time, 5) return pred_labels_and_probs, pred_time # Gradio app title = "🍲 Food Image Classification with MobileNetV2 🍕" description = """ Upload an image of your food, and this model will predict what it is! 🍽️ The model can identify the following 5 types of food: 1. 🍛 **Chicken Curry** 2. 🍚 **Fried Rice** 3. 🍦 **Ice Cream** 4. 🍕 **Pizza** 5. 🥟 **Samosa** Just upload your image and get the probabilities for each class! """ demo = gr.Interface( fn=predict, inputs=gr.Image(type="pil"), outputs=[ gr.Label(num_top_classes=5, label="Predictions"), gr.Number(label="Prediction time (s)"), ], title=title, description=description, ) demo.launch()