Spaces:

Harsh72AI
/

Food-Vision-Mini

Sleeping

App Files Files Community

Food-Vision-Mini / app.py

Harsh72AI

Uploaded Project Files

deb67e9 about 2 years ago

raw

history blame contribute delete

2.37 kB


	import gradio as gr
	import torch
	import os

	from PIL import Image
	from typing import Tuple, Dict, List
	from timeit import default_timer as timer

	from model import create_vit_b_16_swag

	class_names = ['Pizza', 'Steak', 'Sushi']

	# Creating new instance of saved model's architecture and pre-trained model data transformation pipeline
	vit_swag_model, vit_swag_transforms = create_vit_b_16_swag(num_classes=len(class_names))

	# Load weights from trained and saved model
	vit_swag_model.load_state_dict(torch.load('foodvision_mini_vit_swag_model.pt',
	map_location=torch.device('cpu')))


	# -------------- Model Predicting Function --------------

	# Create Predicting Function
	def predict(img) -> Tuple[Dict, float]:

	# Start the timer
	start_time = timer()

	# Transform image
	vit_swag_transformed_img = vit_swag_transforms(img)

	# Making predictions with ViT SWAG model
	vit_swag_model.eval()
	with torch.inference_mode():
	vit_swag_probs = torch.softmax(vit_swag_model(vit_swag_transformed_img.to("cpu").unsqueeze(dim=0)), dim=1)

	pred_probs = {class_names[i]: float(vit_swag_probs[0][i]) for i in range(len(vit_swag_probs[0]))}

	# Calculate the prediction time
	pred_time = round(timer() - start_time, 5)

	return pred_probs, pred_time


	# -------------- Building Gradio App --------------

	# Create title, description and article strings
	title = "FoodVision Mini 🍕🥩🍣"
	description = "A ViT (Vision Transformer) SWAG weighted feature extractor computer vision model to classify images of food as pizza, steak or sushi."
	article = "Created by Harsh Singh [-Github-](https://github.com/HarshSingh2009/)"

	example_list = example_list = ['example-pizza_img.jpeg', 'example-steak-img.jpeg', 'example-sushi-img.jpeg']

	# Create the Gradio demo
	demo = gr.Interface(fn=predict, # mapping function from input to output
	inputs=gr.Image(type="pil"), # what are the inputs?
	outputs=[gr.Label(num_top_classes=3, label="Predictions"), # what are the outputs?
	gr.Number(label="Prediction time (s)")], # our fn has two outputs, therefore we have two outputs
	examples=example_list,
	title=title,
	description=description,
	article=article)

	# Launch the demo!
	demo.launch()