testmobileclip

mobileclip

Model card Files Files and versions

xet

Community

finhdev commited on Aug 4, 2025

Commit

652b877

verified ·

1 Parent(s): 98ef5e5

Update handler.py

Browse files

Files changed (1) hide show

handler.py +16 -23

handler.py CHANGED Viewed

@@ -1,60 +1,53 @@
-import io, base64, torch
 from PIL import Image
 import open_clip
-# Make sure the mobileclip library is installed in your Hugging Face environment
-# You might need to add it to your requirements.txt
 from mobileclip.modules.common.mobileone import reparameterize_model
-class EndpointHandler:
-    """
-    Zero-shot classifier for MobileCLIP-B (OpenCLIP).
-    """
     def __init__(self, path: str = ""):
         weights = f"{path}/mobileclip_b.pt"
         self.model, _, self.preprocess = open_clip.create_model_and_transforms(
             "MobileCLIP-B", pretrained=weights
         )
         self.model.eval()
-        # *** THIS IS THE CRUCIAL ADDITION ***
-        self.model = reparameterize_model(self.model)
-        self.tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.model.to(self.device)
     def __call__(self, data):
-        # ... (the rest of your __call__ method remains the same)
-        # ── unwrap Hugging Face's `inputs` envelope ───────────
         payload = data.get("inputs", data)
         img_b64 = payload["image"]
         labels  = payload.get("candidate_labels", [])
         if not labels:
             return {"error": "candidate_labels list is empty"}
-        # Decode & preprocess image
         image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
-        img_tensor = self.preprocess(image).unsqueeze(0).to(self.device)
-        # Tokenise labels
         text_tokens = self.tokenizer(labels).to(self.device)
-        # Forward pass
-        with torch.no_grad(), torch.cuda.amp.autocast():
             img_feat = self.model.encode_image(img_tensor)
             txt_feat = self.model.encode_text(text_tokens)
-            img_feat = img_feat / img_feat.norm(dim=-1, keepdim=True)
-            txt_feat = txt_feat / txt_feat.norm(dim=-1, keepdim=True)
             probs = (100 * img_feat @ txt_feat.T).softmax(dim=-1)[0].tolist()
-        # Sorted output
         return [
             {"label": l, "score": float(p)}
             for l, p in sorted(zip(labels, probs), key=lambda x: x[1], reverse=True)
         ]
 # # handler.py  (repo root)
 # import io, base64, torch
 # from PIL import Image

+import contextlib, io, base64, torch
 from PIL import Image
 import open_clip
 from mobileclip.modules.common.mobileone import reparameterize_model
+class EndpointHandler:
     def __init__(self, path: str = ""):
+        # You can also pass pretrained='datacompdr' to let OpenCLIP download
         weights = f"{path}/mobileclip_b.pt"
         self.model, _, self.preprocess = open_clip.create_model_and_transforms(
             "MobileCLIP-B", pretrained=weights
         )
         self.model.eval()
+        self.model = reparameterize_model(self.model)   # *** fuse branches ***
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.model.to(self.device)
+        self.tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
     def __call__(self, data):
         payload = data.get("inputs", data)
         img_b64 = payload["image"]
         labels  = payload.get("candidate_labels", [])
         if not labels:
             return {"error": "candidate_labels list is empty"}
+        # ---------------- decode inputs ----------------
         image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
+        img_tensor  = self.preprocess(image).unsqueeze(0).to(self.device)
         text_tokens = self.tokenizer(labels).to(self.device)
+        # ---------------- forward pass -----------------
+        autocast_ctx = (
+            torch.cuda.amp.autocast if self.device.startswith("cuda") else contextlib.nullcontext
+        )
+        with torch.no_grad(), autocast_ctx():
             img_feat = self.model.encode_image(img_tensor)
             txt_feat = self.model.encode_text(text_tokens)
+            img_feat /= img_feat.norm(dim=-1, keepdim=True)
+            txt_feat /= txt_feat.norm(dim=-1, keepdim=True)
             probs = (100 * img_feat @ txt_feat.T).softmax(dim=-1)[0].tolist()
         return [
             {"label": l, "score": float(p)}
             for l, p in sorted(zip(labels, probs), key=lambda x: x[1], reverse=True)
         ]
 # # handler.py  (repo root)
 # import io, base64, torch
 # from PIL import Image