import torch
import torchvision.transforms as transforms
import gradio as gr
from torchvision import models
from PIL import Image

# Define Clothing1M class labels
clothing1m_classes = [
    "T-shirt", "Shirt", "Knitwear", "Chiffon", "Sweater", "Hoodie", "Windbreaker",
    "Jacket", "Down Coat", "Suits", "Shawl", "Dress", "Vest", "Underwear", "Shorts",
    "Trousers", "Jeans", "Leather Shoes", "Casual Shoes", "Sport Shoes", "Sandals"
]

# ✅ Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ✅ Load model
model = models.resnet50(weights=None)  # Ensure correct architecture
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 21)  # Match Clothing1M class count
model.load_state_dict(torch.load("model.pth", map_location=device))  # Load weights
model.to(device)
model.eval()

# ✅ Define image preprocessing
def preprocess_image(image):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    return transform(image).unsqueeze(0).to(device)

# ✅ Define inference function
def classify_image(image):
    image_tensor = preprocess_image(image)
    with torch.no_grad():
        output = model(image_tensor)
        predicted_class_idx = output.argmax(dim=1).item()
    predicted_class_name = clothing1m_classes[predicted_class_idx] if predicted_class_idx < len(clothing1m_classes) else "Unknown"
    return f"Predicted Class: {predicted_class_name}"

# ✅ Create Gradio Interface
interface = gr.Interface(
    fn=classify_image,
    inputs=gr.Image(type="pil"),
    outputs="text",
    title="Clothing1M Classifier",
    description="Upload an image of clothing and get the predicted category."
)

if __name__ == "__main__":
    interface.launch()