Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import CLIPProcessor, CLIPModel | |
| import torch | |
| import requests | |
| from PIL import Image | |
| from io import BytesIO | |
| fashion_items = ['top', 'trousers', 'bottom', 'jumper'] | |
| # Load model and processor - using standard CLIP model instead | |
| model_name = "openai/clip-vit-base-patch32" | |
| model = CLIPModel.from_pretrained(model_name) | |
| processor = CLIPProcessor.from_pretrained(model_name) | |
| # CLIP processes text and images together, so no need for separate text preprocessing | |
| # Prediction function | |
| def predict_from_url(url): | |
| # Check if the URL is empty | |
| if not url: | |
| return {"Error": "Please input a URL"} | |
| try: | |
| image = Image.open(BytesIO(requests.get(url).content)) | |
| except Exception as e: | |
| return {"Error": f"Failed to load image: {str(e)}"} | |
| inputs = processor(images=image, text=fashion_items, return_tensors="pt", padding=True) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| logits_per_image = outputs.logits_per_image | |
| text_probs = logits_per_image.softmax(dim=-1) | |
| return {fashion_items[i]: float(text_probs[0, i]) for i in range(len(fashion_items))} | |
| # Gradio interface | |
| demo = gr.Interface( | |
| fn=predict_from_url, | |
| inputs=gr.Textbox(label="Enter Image URL"), | |
| outputs=gr.Label(label="Classification Results"), | |
| title="Fashion Item Classifier", | |
| allow_flagging="never" | |
| ) | |
| # Launch the interface | |
| demo.launch() | |