Spaces:
Sleeping
Sleeping
File size: 2,922 Bytes
049a5d1 17d7633 ca5e7b9 049a5d1 e7021e2 049a5d1 e7021e2 049a5d1 e7021e2 049a5d1 e7021e2 049a5d1 9f2afdc 049a5d1 e7021e2 049a5d1 e7021e2 049a5d1 e7021e2 049a5d1 e7021e2 049a5d1 e7021e2 049a5d1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 | import gradio as gr
import os
import torch
from torch import nn
import torchvision
from timeit import default_timer as timer
from typing import Tuple, Dict
from PIL import Image # Added for image verification and conversion
def create_effnetb2_model(num_classes: int = 3, seed: int = 42):
"""Creates an EfficientNetB2 feature extractor model and transforms."""
weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
transforms = weights.transforms()
model = torchvision.models.efficientnet_b2(weights=weights)
for param in model.parameters():
param.requires_grad = False
torch.manual_seed(seed)
model.classifier = nn.Sequential(
nn.Dropout(p=0.3, inplace=True),
nn.Linear(in_features=1408, out_features=num_classes),
)
return model, transforms
# Load class names
with open("class_names.txt", "r") as f:
class_names = [food_name.strip() for food_name in f.readlines()]
# Create model and transforms
effnetb2, effnetb2_transforms = create_effnetb2_model(num_classes=101)
# Load pretrained weights
effnetb2.load_state_dict(
torch.load(
f="pretrained_effnetb2_feature_extractor_food101_20_percent.pth",
map_location=torch.device("cpu"),
)
)
def predict(img) -> Tuple[Dict, float]:
"""Transforms and performs a prediction on img and returns prediction and time taken."""
start_time = timer()
# Convert to RGB to avoid dtype issues
if img.mode != "RGB":
img = img.convert("RGB")
# Apply transforms
img = effnetb2_transforms(img).unsqueeze(0)
# Inference
effnetb2.eval()
with torch.inference_mode():
pred_probs = torch.softmax(effnetb2(img), dim=1)
pred_labels_and_probs = {
class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))
}
pred_time = round(timer() - start_time, 5)
return pred_labels_and_probs, pred_time
# Verify examples directory and images
example_list = []
if os.path.exists("examples"):
for example in os.listdir("examples"):
example_path = os.path.join("examples", example)
try:
img = Image.open(example_path)
img.verify() # Verify image is not corrupted
example_list.append([example_path])
except Exception as e:
print(f"Skipping example {example}: {e}")
# Gradio Interface Setup
title = "FoodVision Big 🍔👁"
description = "An EfficientNetB2 feature extractor computer vision model to classify images of food into 101 different classes."
article = "Created by [Ali Khalaji](https://github.com/codali-ml)."
demo = gr.Interface(
fn=predict,
inputs=gr.Image(type="pil"),
outputs=[
gr.Label(num_top_classes=5, label="Predictions"),
gr.Number(label="Prediction time (s)"),
],
examples=example_list,
title=title,
description=description,
article=article,
)
demo.launch()
|