Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| from PIL import Image | |
| import joblib | |
| from transformers import CLIPProcessor, CLIPModel | |
| from huggingface_hub import hf_hub_download | |
| # --- Load CLIP Model & Processor from Hugging Face Hub --- | |
| clip_model = CLIPModel.from_pretrained("Ut14/clip-phone-view", subfolder="clip_model") | |
| clip_processor = CLIPProcessor.from_pretrained("Ut14/clip-phone-view", subfolder="clip_processor") | |
| # --- Download SVM model from Hugging Face Hub --- | |
| svm_model_path = hf_hub_download(repo_id="Ut14/clip-phone-view", filename="svm_phone_view_model.joblib") | |
| svm_model = joblib.load(svm_model_path) | |
| # --- Label Mapping --- | |
| label_map = {0: "Front", 1: "Back", 2: "Side"} | |
| # --- Extract Features --- | |
| def extract_clip_embedding(image: Image.Image) -> np.ndarray: | |
| image = image.convert("RGB") | |
| inputs = clip_processor(images=image, return_tensors="pt") | |
| with torch.no_grad(): | |
| features = clip_model.get_image_features(**inputs) | |
| return features.squeeze().numpy() | |
| # --- Prediction Function for Gradio --- | |
| def predict_view(image: Image.Image): | |
| embedding = extract_clip_embedding(image) | |
| pred = svm_model.predict([embedding])[0] | |
| return label_map[pred] | |
| # --- Gradio Interface --- | |
| iface = gr.Interface( | |
| fn=predict_view, | |
| inputs=gr.Image(type="pil", label="Upload Phone Image"), | |
| outputs=gr.Label(num_top_classes=1, label="Predicted View"), | |
| title="📱 Phone View Classifier", | |
| description="Upload an image of a phone (front, back, or side) and get the predicted view using CLIP + SVM." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() |