Spaces:
Paused
Paused
| """ | |
| CLIP Image Embedding Generator | |
| A simple Gradio-based application for generating CLIP embeddings from uploaded images. | |
| Uses OpenAI's CLIP model with proper preprocessing. | |
| """ | |
| import gradio as gr | |
| from transformers import CLIPProcessor, CLIPModel | |
| from PIL import Image | |
| import torch | |
| import numpy as np | |
| import spaces | |
| # Load model/processor | |
| model: CLIPModel = CLIPModel.from_pretrained("openai/clip-vit-large-patch14") | |
| processor: CLIPProcessor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14") | |
| model.eval() | |
| def get_embedding(image: Image.Image) -> str: | |
| """ | |
| Generate CLIP embedding for an image. | |
| Args: | |
| image (Image.Image): PIL Image object to process | |
| Returns: | |
| str: The full embedding array as a string | |
| """ | |
| device: str = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Use CLIP's built-in preprocessing | |
| inputs = processor(images=image, return_tensors="pt").to(device) | |
| model_device = model.to(device) | |
| with torch.no_grad(): | |
| emb: torch.Tensor = model_device.get_image_features(**inputs) | |
| # L2 normalize the embeddings | |
| emb = emb / emb.norm(p=2, dim=-1, keepdim=True) | |
| # Convert to numpy and return as string | |
| emb_numpy = emb.cpu().numpy().squeeze() | |
| return str(emb_numpy.tolist()) | |
| # Create Gradio interface | |
| demo: gr.Interface = gr.Interface( | |
| fn=get_embedding, | |
| inputs=gr.Image(type="pil", label="Upload Image"), | |
| outputs=gr.Textbox(label="Embedding", lines=20, max_lines=30), | |
| allow_flagging="never", | |
| title="CLIP Image Embedding Generator", | |
| description="Upload an image to generate its CLIP embedding vector.", | |
| theme=gr.themes.Soft() | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(mcp_server=True) |