image_recognition_model_org

oscarfu0501 commited on Mar 14, 2024

Commit

fef199d

verified ·

1 Parent(s): f04e030

Upload handler.py

Files changed (1) hide show

handler.py CHANGED Viewed

@@ -4,7 +4,9 @@ from transformers import CLIPProcessor, CLIPModel
 from PIL import Image
 from io import BytesIO
 import base64
 class EndpointHandler():
     def __init__(self, path=""):
         # Preload all the elements you we need at inference.
@@ -13,10 +15,16 @@ class EndpointHandler():
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         inputs = data.get("inputs")
         text = inputs.get("text")
         imageData = inputs.get("image")
-        image = Image.open(BytesIO(base64.b64decode(imageData)))
         inputs = self.processor(text=text, images=image, return_tensors="pt", padding=True)
         outputs = self.model(**inputs)
         embeddings = outputs.image_embeds.detach().numpy().flatten().tolist()

 from PIL import Image
 from io import BytesIO
 import base64
+import requests
+# handle clip embeddings by utilizing openAI CLIP pretrained model
 class EndpointHandler():
     def __init__(self, path=""):
         # Preload all the elements you we need at inference.
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        # inputs = self.processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
+        # logits_per_image = outputs.logits_per_image # this is the image-text similarity score
+        # probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
         inputs = data.get("inputs")
         text = inputs.get("text")
         imageData = inputs.get("image")
+        url = inputs.get("image")
+        image = Image.open(requests.get(url, stream=True).raw)
+        # image = Image.open(BytesIO(base64.b64decode(imageData)))
         inputs = self.processor(text=text, images=image, return_tensors="pt", padding=True)
         outputs = self.model(**inputs)
         embeddings = outputs.image_embeds.detach().numpy().flatten().tolist()