import gradio as gr import os import torch from torch import nn import torchvision from timeit import default_timer as timer from typing import Tuple, Dict from PIL import Image # Added for image verification and conversion def create_effnetb2_model(num_classes: int = 3, seed: int = 42): """Creates an EfficientNetB2 feature extractor model and transforms.""" weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT transforms = weights.transforms() model = torchvision.models.efficientnet_b2(weights=weights) for param in model.parameters(): param.requires_grad = False torch.manual_seed(seed) model.classifier = nn.Sequential( nn.Dropout(p=0.3, inplace=True), nn.Linear(in_features=1408, out_features=num_classes), ) return model, transforms # Load class names with open("class_names.txt", "r") as f: class_names = [food_name.strip() for food_name in f.readlines()] # Create model and transforms effnetb2, effnetb2_transforms = create_effnetb2_model(num_classes=101) # Load pretrained weights effnetb2.load_state_dict( torch.load( f="pretrained_effnetb2_feature_extractor_food101_20_percent.pth", map_location=torch.device("cpu"), ) ) def predict(img) -> Tuple[Dict, float]: """Transforms and performs a prediction on img and returns prediction and time taken.""" start_time = timer() # Convert to RGB to avoid dtype issues if img.mode != "RGB": img = img.convert("RGB") # Apply transforms img = effnetb2_transforms(img).unsqueeze(0) # Inference effnetb2.eval() with torch.inference_mode(): pred_probs = torch.softmax(effnetb2(img), dim=1) pred_labels_and_probs = { class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names)) } pred_time = round(timer() - start_time, 5) return pred_labels_and_probs, pred_time # Verify examples directory and images example_list = [] if os.path.exists("examples"): for example in os.listdir("examples"): example_path = os.path.join("examples", example) try: img = Image.open(example_path) img.verify() # Verify image is not corrupted example_list.append([example_path]) except Exception as e: print(f"Skipping example {example}: {e}") # Gradio Interface Setup title = "FoodVision Big 🍔👁" description = "An EfficientNetB2 feature extractor computer vision model to classify images of food into 101 different classes." article = "Created by [Ali Khalaji](https://github.com/codali-ml)." demo = gr.Interface( fn=predict, inputs=gr.Image(type="pil"), outputs=[ gr.Label(num_top_classes=5, label="Predictions"), gr.Number(label="Prediction time (s)"), ], examples=example_list, title=title, description=description, article=article, ) demo.launch()