Cunha / app.py
Arpit1234's picture
Create app.py
2ea9cf6 verified
from transformers import ViTFeatureExtractor, ViTForImageClassification
from PIL import Image
import torch
import gradio as gr
# Load the model and feature extractor
model_name = 'google/vit-base-patch16-224'
feature_extractor = ViTFeatureExtractor.from_pretrained(model_name)
model = ViTForImageClassification.from_pretrained(model_name)
# Function to load and preprocess the image
def preprocess_image(image):
inputs = feature_extractor(images=image, return_tensors="pt")
return inputs['pixel_values']
# Function to predict the class of the image
def predict_image(image):
pixel_values = preprocess_image(image)
with torch.no_grad():
outputs = model(pixel_values)
logits = outputs.logits
predicted_class_idx = logits.argmax(-1).item()
return model.config.id2label[predicted_class_idx]
# Define the Gradio interface
image_input = gr.inputs.Image(type="pil")
label_output = gr.outputs.Label(num_top_classes=3)
interface = gr.Interface(
fn=predict_image,
inputs=image_input,
outputs=label_output,
title="Image Classification with ViT",
description="Upload an image and get the predicted label using Vision Transformer (ViT)."
)
# Launch the interface
if __name__ == "__main__":
interface.launch()