### 1. Imports and class names setup ### import gradio as gr import os import torch import numpy as np from PIL import Image from model import create_vit_model from timeit import default_timer as timer from typing import Tuple, Dict # Setup class names with open("class_names.txt", "r") as f: class_names = [food_name.strip() for food_name in f.readlines()] ### 2. Model and transforms preparation ### # Create model vit, vit_transforms = create_vit_model(num_classes=121) # Load saved weights vit.load_state_dict( torch.load( f="vit_epoch_2.pth", map_location=torch.device("cpu"), ) ) ### 3. Predict function ### from PIL import Image import numpy as np def predict(img) -> Tuple[Dict, float]: """Transforms and performs a prediction on img and returns prediction and time taken.""" start_time = timer() if isinstance(img, np.ndarray): img = img.astype(np.uint8) # ✅ Ensure dtype is uint8 img = Image.fromarray(img, mode="RGB") # ✅ Safe conversion img = vit_transforms(img).unsqueeze(0) vit.eval() with torch.inference_mode(): pred_probs = torch.softmax(vit(img), dim=1) pred_labels_and_probs = { class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names)) } pred_time = round(timer() - start_time, 5) return pred_labels_and_probs, pred_time ### 4. Gradio app ### title = "VisionBite 🍔👁" description = "A ViT feature extractor computer vision model to classify images of food into 121 categories." article = "The model has been trained on the Food121 dataset using ViT Base 16." # ✅ Sort examples for consistent UI (optional) example_list = [["examples/" + example] for example in sorted(os.listdir("examples")) if example.endswith((".jpg", ".png", ".jpeg"))] demo = gr.Interface( fn=predict, inputs=gr.Image(type="pil"), outputs=[ gr.Label(num_top_classes=5, label="Predictions"), gr.Number(label="Prediction time (s)"), ], examples=example_list, title=title, description=description, article=article, ) demo.launch()