import gradio as gr import torch import torchvision.models as models from torchvision import transforms from PIL import Image # Load the ImageNet class labels import json import urllib.request # Download ImageNet class labels labels_url = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt" labels = urllib.request.urlopen(labels_url).read().decode('utf-8').split('\n') # Initialize model model = models.resnet50() num_classes = 1000 # ImageNet1k classes # Load your trained weights checkpoint = torch.load('model_best.pth.tar', map_location=torch.device('cpu')) if 'state_dict' in checkpoint: state_dict = checkpoint['state_dict'] # Remove 'module.' prefix if model was trained with DataParallel state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()} model.load_state_dict(state_dict) else: model.load_state_dict(checkpoint) model.eval() # Define image transforms transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) def predict(image): # Ensure image is in RGB format if image.mode != 'RGB': image = image.convert('RGB') # Apply transforms input_tensor = transform(image) input_batch = input_tensor.unsqueeze(0) # Get prediction with torch.no_grad(): output = model(input_batch) # Get probabilities probabilities = torch.nn.functional.softmax(output[0], dim=0) # Get top 5 predictions top5_prob, top5_indices = torch.topk(probabilities, 5) # Format results as dictionary results = {} for prob, idx in zip(top5_prob, top5_indices): class_name = labels[idx] results[class_name] = float(prob) return results # Create Gradio interface title = "ImageNet1k Classification" description = """Accuracy for this model is top@1: **75.212%**, top@5: 92.668% after 80 epochs. Upload an image and the model will predict its category using the ImageNet1k classification system. Tips for best results: - Use clear, well-lit images; ensure the main subject is centered and clearly visible - The model works best with common objects, animals, and scenes - Images can be any size or color - they'll be automatically resized to 224x224 The model will show the top 5 most likely categories with confidence scores. Link to github repo: [https://github.com/dhairyag/ImageNet1k_ResNet50](https://github.com/dhairyag/ImageNet1k_ResNet50) """ iface = gr.Interface( fn=predict, inputs=gr.Image(type="pil"), outputs=gr.Label(num_top_classes=5), title=title, description=description, examples=[ ["examples/dog.jpg"], ["examples/cat.jpg"], ], ) iface.launch(share=True)