import gradio as gr
import torch
import torchvision.models as models
from torchvision import transforms
from PIL import Image

# Load the ImageNet class labels
import json
import urllib.request

# Download ImageNet class labels
labels_url = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
labels = urllib.request.urlopen(labels_url).read().decode('utf-8').split('\n')

# Initialize model
model = models.resnet50()
num_classes = 1000  # ImageNet1k classes

# Load your trained weights
checkpoint = torch.load('model_best.pth.tar', map_location=torch.device('cpu'))
if 'state_dict' in checkpoint:
    state_dict = checkpoint['state_dict']
    # Remove 'module.' prefix if model was trained with DataParallel
    state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
    model.load_state_dict(state_dict)
else:
    model.load_state_dict(checkpoint)

model.eval()

# Define image transforms
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                       std=[0.229, 0.224, 0.225])
])

def predict(image):
    # Ensure image is in RGB format
    if image.mode != 'RGB':
        image = image.convert('RGB')
    
    # Apply transforms
    input_tensor = transform(image)
    input_batch = input_tensor.unsqueeze(0)
    
    # Get prediction
    with torch.no_grad():
        output = model(input_batch)
    
    # Get probabilities
    probabilities = torch.nn.functional.softmax(output[0], dim=0)
    
    # Get top 5 predictions
    top5_prob, top5_indices = torch.topk(probabilities, 5)
    
    # Format results as dictionary
    results = {}
    for prob, idx in zip(top5_prob, top5_indices):
        class_name = labels[idx]
        results[class_name] = float(prob)
    
    return results

# Create Gradio interface
title = "ImageNet1k Classification"
description = """Accuracy for this model is top@1: **75.212%**, top@5: 92.668% after 80 epochs. Upload an image and the model will predict its category using the ImageNet1k classification system.
Tips for best results:
- Use clear, well-lit images; ensure the main subject is centered and clearly visible
- The model works best with common objects, animals, and scenes
- Images can be any size or color - they'll be automatically resized to 224x224

The model will show the top 5 most likely categories with confidence scores.
Link to github repo: [https://github.com/dhairyag/ImageNet1k_ResNet50](https://github.com/dhairyag/ImageNet1k_ResNet50) 
"""

iface = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="pil"),
    outputs=gr.Label(num_top_classes=5),
    title=title,
    description=description,
    examples=[
        ["examples/dog.jpg"],
        ["examples/cat.jpg"],
    ],
)

iface.launch(share=True)