Ut14's picture
Create app.py
c8a6c88 verified
import gradio as gr
import torch
import numpy as np
from PIL import Image
import joblib
from transformers import CLIPProcessor, CLIPModel
from huggingface_hub import hf_hub_download
# --- Load CLIP Model & Processor from Hugging Face Hub ---
clip_model = CLIPModel.from_pretrained("Ut14/clip-phone-view", subfolder="clip_model")
clip_processor = CLIPProcessor.from_pretrained("Ut14/clip-phone-view", subfolder="clip_processor")
# --- Download SVM model from Hugging Face Hub ---
svm_model_path = hf_hub_download(repo_id="Ut14/clip-phone-view", filename="svm_phone_view_model.joblib")
svm_model = joblib.load(svm_model_path)
# --- Label Mapping ---
label_map = {0: "Front", 1: "Back", 2: "Side"}
# --- Extract Features ---
def extract_clip_embedding(image: Image.Image) -> np.ndarray:
image = image.convert("RGB")
inputs = clip_processor(images=image, return_tensors="pt")
with torch.no_grad():
features = clip_model.get_image_features(**inputs)
return features.squeeze().numpy()
# --- Prediction Function for Gradio ---
def predict_view(image: Image.Image):
embedding = extract_clip_embedding(image)
pred = svm_model.predict([embedding])[0]
return label_map[pred]
# --- Gradio Interface ---
iface = gr.Interface(
fn=predict_view,
inputs=gr.Image(type="pil", label="Upload Phone Image"),
outputs=gr.Label(num_top_classes=1, label="Predicted View"),
title="📱 Phone View Classifier",
description="Upload an image of a phone (front, back, or side) and get the predicted view using CLIP + SVM."
)
if __name__ == "__main__":
iface.launch()