roemmele commited on
Commit
0e2d235
·
1 Parent(s): a5f3f45

Added handler.py to support running model in Inference Endpoints.

Browse files

The call to the handler takes a single text and image as input.
It returns the embeddings of the text and image as well as their cosine similarity.

Files changed (1) hide show
  1. handler.py +27 -0
handler.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict
2
+ import requests
3
+ from transformers import CLIPProcessor, CLIPModel
4
+ from PIL import Image
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+
7
+
8
+ class EndpointHandler:
9
+ def __init__(self, path=""):
10
+ self.processor = CLIPProcessor.from_pretrained(path)
11
+ self.model = CLIPModel.from_pretrained(path)
12
+
13
+ def __call__(self, data: Dict) -> Dict:
14
+ text = data.pop("text")
15
+ if "image_url" in data:
16
+ image_url = data.pop("image_url")
17
+ image = Image.open(requests.get(image_url, stream=True).raw)
18
+ else:
19
+ image = data.pop("image")
20
+ inputs = self.processor(text=text, images=image,
21
+ return_tensors="pt", padding=True, truncation=True)
22
+ outputs = self.model(**inputs)
23
+ embedding_similarity = cosine_similarity(outputs.text_embeds.detach().numpy(),
24
+ outputs.image_embeds.detach().numpy())[0][0].item()
25
+ return {"text_embedding": outputs.text_embeds[0].tolist(),
26
+ "image_embedding": outputs.image_embeds[0].tolist(),
27
+ "embedding_similarity": embedding_similarity}