Spaces:
Sleeping
Sleeping
| # app.py | |
| import os | |
| import torch | |
| import numpy as np | |
| from PIL import Image | |
| import joblib | |
| import gradio as gr | |
| from transformers import CLIPProcessor, CLIPModel | |
| # --- Load CLIP Model and Processor --- | |
| clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") | |
| clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") | |
| # --- Load Trained SVM Model --- | |
| svm_model = joblib.load("svm_phone_view_model.joblib") | |
| # --- Label Mapping --- | |
| label_map = {0: "back", 1: "bottom", 2: "front", 3: "top"} | |
| # --- Function to Extract CLIP Embedding --- | |
| def extract_clip_embedding(image): | |
| inputs = clip_processor(images=image, return_tensors="pt") | |
| with torch.no_grad(): | |
| features = clip_model.get_image_features(**inputs) | |
| return features.squeeze().numpy() | |
| # --- Gradio prediction function --- | |
| def predict_image_view(image): | |
| embedding = extract_clip_embedding(image) | |
| probs = svm_model.predict_proba([embedding])[0] | |
| pred_index = np.argmax(probs) | |
| prediction = label_map[pred_index] | |
| confidence = probs[pred_index] * 100 | |
| return f"View: {prediction.upper()} ({confidence:.2f}%)" | |
| # --- Launch Gradio interface --- | |
| demo = gr.Interface( | |
| fn=predict_image_view, | |
| inputs=gr.Image(type="pil"), | |
| outputs="text", | |
| title="Phone View Classifier (4-class)", | |
| description="Upload an image of a phone and classify it as one of: Front, Back, Top, Bottom" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |