import torch import torchvision.transforms as transforms import gradio as gr from torchvision import models from PIL import Image # Define Clothing1M class labels clothing1m_classes = [ "T-shirt", "Shirt", "Knitwear", "Chiffon", "Sweater", "Hoodie", "Windbreaker", "Jacket", "Down Coat", "Suits", "Shawl", "Dress", "Vest", "Underwear", "Shorts", "Trousers", "Jeans", "Leather Shoes", "Casual Shoes", "Sport Shoes", "Sandals" ] # ✅ Set device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # ✅ Load model model = models.resnet50(weights=None) # Ensure correct architecture num_ftrs = model.fc.in_features model.fc = torch.nn.Linear(num_ftrs, 21) # Match Clothing1M class count model.load_state_dict(torch.load("model.pth", map_location=device)) # Load weights model.to(device) model.eval() # ✅ Define image preprocessing def preprocess_image(image): transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) return transform(image).unsqueeze(0).to(device) # ✅ Define inference function def classify_image(image): image_tensor = preprocess_image(image) with torch.no_grad(): output = model(image_tensor) predicted_class_idx = output.argmax(dim=1).item() predicted_class_name = clothing1m_classes[predicted_class_idx] if predicted_class_idx < len(clothing1m_classes) else "Unknown" return f"Predicted Class: {predicted_class_name}" # ✅ Create Gradio Interface interface = gr.Interface( fn=classify_image, inputs=gr.Image(type="pil"), outputs="text", title="Clothing1M Classifier", description="Upload an image of clothing and get the predicted category." ) if __name__ == "__main__": interface.launch()