Spaces:
Runtime error
Runtime error
| ### Imports and class names setup ---------------------------------------------------- ### | |
| import os | |
| import torch | |
| import torchvision | |
| import gradio as gr | |
| from model import create_vit | |
| from timeit import default_timer as timer | |
| from typing import Tuple, Dict | |
| # Setup class names | |
| with open("class_names.txt", "r") as f: | |
| class_names = [food.strip() for food in f.readlines()] | |
| # Device agnostic code | |
| if torch.backends.mps.is_available(): | |
| device = 'mps' | |
| elif torch.cuda.is_available(): | |
| device = 'cuda' | |
| else: | |
| device = 'cpu' | |
| ### Model and transforms preparation ---------------------------------------------------- ### | |
| vit_model, vit_transforms = create_vit(pretrained_weights=torchvision.models.ViT_B_16_Weights.DEFAULT, | |
| model=torchvision.models.vit_b_16, | |
| in_features=768, | |
| out_features=101, | |
| device='cpu') | |
| # Load save weights | |
| vit_model.load_state_dict(torch.load(f="pretrained_vit_feature_extractor_food101.pth", | |
| map_location=torch.device("cpu"))) # load the model to the CPU | |
| ### Predict function ---------------------------------------------------- ### | |
| def predict(img) -> Tuple[Dict, float]: | |
| # Start a timer | |
| start_time = timer() | |
| # Transform the input image for use with ViT Model | |
| img = vit_transforms(img).unsqueeze(0) # unsqueeze = add batch dimension on 0th index (3, 224, 224) into (1, 3, 224, 224) | |
| # Put model into eval mode, make prediction | |
| vit_model.eval() | |
| with torch.inference_mode(): | |
| # Pass transformed image through the model and turn the prediction logits into probabilities | |
| pred_logits = vit_model(img) | |
| pred_probs = torch.softmax(pred_logits, dim=1) | |
| # Create a prediction label and prediction probability dictionary | |
| pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))} | |
| # Calculate pred time | |
| end_timer = timer() | |
| pred_time = round(end_timer - start_time, 4) | |
| # Return pred dict and pred time | |
| return pred_labels_and_probs, pred_time | |
| ### Gradio interface and launch ------------------------------------------------------------------ ### | |
| # Create title and description | |
| title = "FoodVision: ViT Model" | |
| description = "A ViT model trained on 20% of the Food101 dataset to classify Food images" | |
| # Create example list | |
| example_list = [["examples/" + example] for example in os.listdir("examples")] | |
| # Create the Gradio demo | |
| demo = gr.Interface(fn=predict, inputs=gr.Image(type="pil"), outputs=[gr.Label(num_top_classes=5, label="Predictions"), | |
| gr.Number(label="Prediction time(s)")], title=title, description=description, examples=example_list) | |
| demo.launch() | |