finhdev
/

testmobileclip

mobileclip

Model card Files Files and versions

xet

Community

finhdev commited on Jul 29, 2025

Commit

566678a

verified ·

1 Parent(s): e1369ab

Update handler.py

Browse files

Files changed (1) hide show

handler.py +96 -32

handler.py CHANGED Viewed

@@ -1,81 +1,145 @@
 # handler.py  (repo root)
-# handler.py  (repo root)
-import io, base64, torch
 from PIL import Image
-import open_clip
 class EndpointHandler:
     """
-    Zero‑shot classifier for MobileCLIP‑B (OpenCLIP) with a text‑embedding cache.
-    Client JSON:
-    {
-      "inputs": {
-        "image": "<base64 PNG/JPEG>",
-        "candidate_labels": ["cat", "dog", ...]
       }
-    }
     """
-    # ------------------------------------------------- #
-    #                 INITIALISATION                    #
-    # ------------------------------------------------- #
-    def __init__(self, path: str = ""):
-        weights = f"{path}/mobileclip_b.pt"
         self.model, _, self.preprocess = open_clip.create_model_and_transforms(
-            "MobileCLIP-B", pretrained=weights
         )
         self.model.eval()
-        self.tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.model.to(self.device)
-        # cache: {prompt -> 1×512 tensor on device}
         self.label_cache: dict[str, torch.Tensor] = {}
-    # ------------------------------------------------- #
-    #                    INFERENCE                      #
-    # ------------------------------------------------- #
     def __call__(self, data):
         payload = data.get("inputs", data)
         img_b64 = payload["image"]
         labels  = payload.get("candidate_labels", [])
         if not labels:
             return {"error": "candidate_labels list is empty"}
-        # --- image ----
         image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
         img_tensor = self.preprocess(image).unsqueeze(0).to(self.device)
-        # --- text (with cache) ----
         missing = [l for l in labels if l not in self.label_cache]
         if missing:
-            tokens = self.tokenizer(missing).to(self.device)
             with torch.no_grad():
-                emb = self.model.encode_text(tokens)
                 emb = emb / emb.norm(dim=-1, keepdim=True)
             for l, e in zip(missing, emb):
                 self.label_cache[l] = e
         txt_feat = torch.stack([self.label_cache[l] for l in labels])
-        # --- forward & softmax ----
         with torch.no_grad(), torch.cuda.amp.autocast():
             img_feat = self.model.encode_image(img_tensor)
             img_feat = img_feat / img_feat.norm(dim=-1, keepdim=True)
             probs = (100 * img_feat @ txt_feat.T).softmax(dim=-1)[0].tolist()
-        # --- sorted output ----
         return [
             {"label": l, "score": float(p)}
             for l, p in sorted(zip(labels, probs), key=lambda x: x[1], reverse=True)
         ]
 # # handler.py  (repo root)
 # import io, base64, torch
 # from PIL import Image

 # handler.py  (repo root)
+import io, base64, torch, open_clip
 from PIL import Image
 class EndpointHandler:
     """
+    MobileCLIP‑B zero‑shot (OpenCLIP, pretrained = 'datacompdr')
+    Expects JSON:
+      {
+        "inputs": {
+          "image": "<base64 PNG/JPEG>",
+          "candidate_labels": ["a photo of a cat", ...]
+        }
       }
     """
+    # ---------- initialisation (once per container) ----------
+    def __init__(self, path=""):
+        # • Use the same checkpoint as your local workflow
+        # • No need for the local mobileclip_b.pt file
         self.model, _, self.preprocess = open_clip.create_model_and_transforms(
+            "mobileclip_b", pretrained="datacompdr"
         )
         self.model.eval()
+        self.tokenizer = open_clip.get_tokenizer("mobileclip_b")
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.model.to(self.device)
+        # Cache: {prompt -> 1×512 tensor}
         self.label_cache: dict[str, torch.Tensor] = {}
+    # -------------------- inference --------------------------
     def __call__(self, data):
         payload = data.get("inputs", data)
         img_b64 = payload["image"]
         labels  = payload.get("candidate_labels", [])
         if not labels:
             return {"error": "candidate_labels list is empty"}
+        # image → tensor
         image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
         img_tensor = self.preprocess(image).unsqueeze(0).to(self.device)
+        # text → cached embeddings
         missing = [l for l in labels if l not in self.label_cache]
         if missing:
+            tok = self.tokenizer(missing).to(self.device)
             with torch.no_grad():
+                emb = self.model.encode_text(tok)
                 emb = emb / emb.norm(dim=-1, keepdim=True)
             for l, e in zip(missing, emb):
                 self.label_cache[l] = e
         txt_feat = torch.stack([self.label_cache[l] for l in labels])
+        # forward
         with torch.no_grad(), torch.cuda.amp.autocast():
             img_feat = self.model.encode_image(img_tensor)
             img_feat = img_feat / img_feat.norm(dim=-1, keepdim=True)
             probs = (100 * img_feat @ txt_feat.T).softmax(dim=-1)[0].tolist()
+        # sorted result
         return [
             {"label": l, "score": float(p)}
             for l, p in sorted(zip(labels, probs), key=lambda x: x[1], reverse=True)
         ]
+# import io, base64, torch
+# from PIL import Image
+# import open_clip
+# class EndpointHandler:
+#     """
+#     Zero‑shot classifier for MobileCLIP‑B (OpenCLIP) with a text‑embedding cache.
+#     Client JSON:
+#     {
+#       "inputs": {
+#         "image": "<base64 PNG/JPEG>",
+#         "candidate_labels": ["cat", "dog", ...]
+#       }
+#     }
+#     """
+#     # ------------------------------------------------- #
+#     #                 INITIALISATION                    #
+#     # ------------------------------------------------- #
+#     def __init__(self, path: str = ""):
+#         weights = f"{path}/mobileclip_b.pt"
+#         self.model, _, self.preprocess = open_clip.create_model_and_transforms(
+#             "MobileCLIP-B", pretrained=weights
+#         )
+#         self.model.eval()
+#         self.tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
+#         self.device = "cuda" if torch.cuda.is_available() else "cpu"
+#         self.model.to(self.device)
+#         # cache: {prompt -> 1×512 tensor on device}
+#         self.label_cache: dict[str, torch.Tensor] = {}
+#     # ------------------------------------------------- #
+#     #                    INFERENCE                      #
+#     # ------------------------------------------------- #
+#     def __call__(self, data):
+#         payload = data.get("inputs", data)
+#         img_b64 = payload["image"]
+#         labels  = payload.get("candidate_labels", [])
+#         if not labels:
+#             return {"error": "candidate_labels list is empty"}
+#         # --- image ----
+#         image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
+#         img_tensor = self.preprocess(image).unsqueeze(0).to(self.device)
+#         # --- text (with cache) ----
+#         missing = [l for l in labels if l not in self.label_cache]
+#         if missing:
+#             tokens = self.tokenizer(missing).to(self.device)
+#             with torch.no_grad():
+#                 emb = self.model.encode_text(tokens)
+#                 emb = emb / emb.norm(dim=-1, keepdim=True)
+#             for l, e in zip(missing, emb):
+#                 self.label_cache[l] = e
+#         txt_feat = torch.stack([self.label_cache[l] for l in labels])
+#         # --- forward & softmax ----
+#         with torch.no_grad(), torch.cuda.amp.autocast():
+#             img_feat = self.model.encode_image(img_tensor)
+#             img_feat = img_feat / img_feat.norm(dim=-1, keepdim=True)
+#             probs = (100 * img_feat @ txt_feat.T).softmax(dim=-1)[0].tolist()
+#         # --- sorted output ----
+#         return [
+#             {"label": l, "score": float(p)}
+#             for l, p in sorted(zip(labels, probs), key=lambda x: x[1], reverse=True)
+#         ]
 # # handler.py  (repo root)
 # import io, base64, torch
 # from PIL import Image