initial commit

Browse files

Files changed (5) hide show

.gitignore +4 -0
handler.py +45 -0
requirements.txt +2 -0
sign_ids.npy +3 -0
vanilla_large-patch14_image_embeddings_normalized.npy +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+__pycache__/
+.ipynb_checkpoints/
+local_test.ipynb
+hosted_test.ipynb

handler.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from typing import Dict, List, Any
+from transformers import CLIPTokenizer, CLIPModel
+import numpy as np
+import os
+class EndpointHandler:
+    def __init__(self, path=""):
+        """
+        Initialize the model
+        """
+        self.sign_ids = np.load(os.path.join(path, "sign_ids.npy"))
+        self.sign_embeddings = np.load(os.path.join(path, "vanilla_large-patch14_image_embeddings_normalized.npy"))
+        hf_model_path = "openai/clip-vit-large-patch14"
+        self.model = CLIPModel.from_pretrained(hf_model_path)
+        self.tokenizer = CLIPTokenizer.from_pretrained(hf_model_path)
+    def __call__(self, data: Dict[str, Any]) -> List[float]:
+        """
+         data args:
+              inputs (:obj: `str` | `PIL.Image` | `np.array`)
+              kwargs
+        Return:
+              A :obj:`list` | `dict`: will be serialized and returned
+        """
+        token_inputs = self.tokenizer([data["inputs"]],  padding=True, return_tensors="pt")
+        query_embed = self.model.get_text_features(**token_inputs)
+        np_query_embed = query_embed.detach().cpu().numpy()[0]
+        np_query_embed /= np.linalg.norm(np_query_embed)
+        # Compute the cosine similarity; note the embeddings are normalized.
+        # This weight is arbitrary, but makes the results easier to think about
+        w = 2.5
+        threshold = 0.475
+        cos_similarites = w * (self.sign_embeddings @ np_query_embed)
+        count_above_threshold = np.sum(cos_similarites > threshold)
+        sign_id_arg_rankings = np.argsort(cos_similarites)[::-1]
+        threshold_id_arg_rankings = sign_id_arg_rankings[:count_above_threshold]
+        result_sign_ids = self.sign_ids[threshold_id_arg_rankings]
+        result_sign_scores = cos_similarites[threshold_id_arg_rankings]
+        return [result_sign_ids.tolist(), result_sign_scores.tolist()]

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ numpy==1.23.1
2	+ transformers==4.21.1

sign_ids.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e282e229c3af38c7c0ee6ce5cf15317d5c6f83b7c44a18fe04f0239a0bbd8bde
+size 465400

vanilla_large-patch14_image_embeddings_normalized.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f70fc1bcba9555a00344cb21132276955645a7b78c54de1a1efcb17f776f033
+size 357329024