ImageNet1k / app.py
dhairyashil's picture
acc in bold letters
0036610
import gradio as gr
import torch
import torchvision.models as models
from torchvision import transforms
from PIL import Image
# Load the ImageNet class labels
import json
import urllib.request
# Download ImageNet class labels
labels_url = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
labels = urllib.request.urlopen(labels_url).read().decode('utf-8').split('\n')
# Initialize model
model = models.resnet50()
num_classes = 1000 # ImageNet1k classes
# Load your trained weights
checkpoint = torch.load('model_best.pth.tar', map_location=torch.device('cpu'))
if 'state_dict' in checkpoint:
state_dict = checkpoint['state_dict']
# Remove 'module.' prefix if model was trained with DataParallel
state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
model.load_state_dict(state_dict)
else:
model.load_state_dict(checkpoint)
model.eval()
# Define image transforms
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
def predict(image):
# Ensure image is in RGB format
if image.mode != 'RGB':
image = image.convert('RGB')
# Apply transforms
input_tensor = transform(image)
input_batch = input_tensor.unsqueeze(0)
# Get prediction
with torch.no_grad():
output = model(input_batch)
# Get probabilities
probabilities = torch.nn.functional.softmax(output[0], dim=0)
# Get top 5 predictions
top5_prob, top5_indices = torch.topk(probabilities, 5)
# Format results as dictionary
results = {}
for prob, idx in zip(top5_prob, top5_indices):
class_name = labels[idx]
results[class_name] = float(prob)
return results
# Create Gradio interface
title = "ImageNet1k Classification"
description = """Accuracy for this model is top@1: **75.212%**, top@5: 92.668% after 80 epochs. Upload an image and the model will predict its category using the ImageNet1k classification system.
Tips for best results:
- Use clear, well-lit images; ensure the main subject is centered and clearly visible
- The model works best with common objects, animals, and scenes
- Images can be any size or color - they'll be automatically resized to 224x224
The model will show the top 5 most likely categories with confidence scores.
Link to github repo: [https://github.com/dhairyag/ImageNet1k_ResNet50](https://github.com/dhairyag/ImageNet1k_ResNet50)
"""
iface = gr.Interface(
fn=predict,
inputs=gr.Image(type="pil"),
outputs=gr.Label(num_top_classes=5),
title=title,
description=description,
examples=[
["examples/dog.jpg"],
["examples/cat.jpg"],
],
)
iface.launch(share=True)