| from transformers import AutoProcessor, AutoModel | |
| from PIL import Image | |
| from io import BytesIO | |
| import torch | |
| import requests | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model = AutoModel.from_pretrained("openai/clip-vit-large-patch14-336").to(device) | |
| processor = AutoProcessor.from_pretrained("openai/clip-vit-large-patch14-336") | |
| def get_image_embedding(image_url): | |
| try: | |
| response = requests.get(image_url, timeout=10) | |
| img = Image.open(BytesIO(response.content)).convert('RGB') | |
| inputs = processor(images=img, return_tensors="pt").to(device) | |
| with torch.no_grad(): | |
| emb = model.get_image_features(**inputs) | |
| emb = emb / emb.norm(p=2, dim=-1, keepdim=True) | |
| return emb[0].cpu().numpy().tolist() | |
| except Exception as e: | |
| print(f"Image loading failed: {e}") | |
| return None | |
| def get_text_embedding_clip(text_query): | |
| inputs = processor(text=[text_query], return_tensors="pt", padding=True, truncation=True).to(device) | |
| with torch.no_grad(): | |
| emb = model.get_text_features(**inputs) | |
| emb = emb / emb.norm(p=2, dim=-1, keepdim=True) | |
| return emb[0].cpu().numpy().tolist() | |