clip-embeddings / handler.py
sbarouti's picture
Update handler.py
4434581 verified
from typing import Dict, List, Any
import numpy as np
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
from io import BytesIO
import base64
class EndpointHandler():
def __init__(self, path=""):
# Preload all the elements you we need at inference.
self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
print("** data: ", data)
inputs = data.get("inputs")
print("** inputs: ", inputs)
text = inputs.get("text")
print("** text: ", text)
imageData = inputs.get("image")
print("** imageData: ", imageData)
image = None
if imageData:
try:
image = Image.open(BytesIO(base64.b64decode(imageData)))
print("** image: ", image)
except Exception as e:
raise ValueError(f"Error decoding image: {e}")
if not text and not image:
raise ValueError("Both text and image cannot be None. Provide at least one.")
inputs = self.processor(text=text, images=image, return_tensors="pt", padding=True)
outputs = self.model(**inputs)
embeddings = outputs.image_embeds.detach().numpy().flatten().tolist()
return { "embeddings": embeddings }