| import io, base64, torch | |
| from PIL import Image | |
| import open_clip | |
| # Make sure the mobileclip library is installed in your Hugging Face environment | |
| # You might need to add it to your requirements.txt | |
| from mobileclip.modules.common.mobileone import reparameterize_model | |
| class EndpointHandler: | |
| """ | |
| Zero-shot classifier for MobileCLIP-B (OpenCLIP). | |
| """ | |
| def __init__(self, path: str = ""): | |
| weights = f"{path}/mobileclip_b.pt" | |
| self.model, _, self.preprocess = open_clip.create_model_and_transforms( | |
| "MobileCLIP-B", pretrained=weights | |
| ) | |
| self.model.eval() | |
| # *** THIS IS THE CRUCIAL ADDITION *** | |
| self.model = reparameterize_model(self.model) | |
| self.tokenizer = open_clip.get_tokenizer("MobileCLIP-B") | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| self.model.to(self.device) | |
| def __call__(self, data): | |
| # ... (the rest of your __call__ method remains the same) | |
| # ── unwrap Hugging Face's `inputs` envelope ─────────── | |
| payload = data.get("inputs", data) | |
| img_b64 = payload["image"] | |
| labels = payload.get("candidate_labels", []) | |
| if not labels: | |
| return {"error": "candidate_labels list is empty"} | |
| # Decode & preprocess image | |
| image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB") | |
| img_tensor = self.preprocess(image).unsqueeze(0).to(self.device) | |
| # Tokenise labels | |
| text_tokens = self.tokenizer(labels).to(self.device) | |
| # Forward pass | |
| with torch.no_grad(), torch.cuda.amp.autocast(): | |
| img_feat = self.model.encode_image(img_tensor) | |
| txt_feat = self.model.encode_text(text_tokens) | |
| img_feat = img_feat / img_feat.norm(dim=-1, keepdim=True) | |
| txt_feat = txt_feat / txt_feat.norm(dim=-1, keepdim=True) | |
| probs = (100 * img_feat @ txt_feat.T).softmax(dim=-1)[0].tolist() | |
| # Sorted output | |
| return [ | |
| {"label": l, "score": float(p)} | |
| for l, p in sorted(zip(labels, probs), key=lambda x: x[1], reverse=True) | |
| ] | |
| # # handler.py (repo root) | |
| # import io, base64, torch | |
| # from PIL import Image | |
| # import open_clip | |
| # class EndpointHandler: | |
| # """ | |
| # Zero‑shot classifier for MobileCLIP‑B (OpenCLIP). | |
| # Expected client JSON *to the endpoint*: | |
| # { | |
| # "inputs": { | |
| # "image": "<base64 PNG/JPEG>", | |
| # "candidate_labels": ["cat", "dog", ...] | |
| # } | |
| # } | |
| # """ | |
| # def __init__(self, path: str = ""): | |
| # weights = f"{path}/mobileclip_b.pt" | |
| # self.model, _, self.preprocess = open_clip.create_model_and_transforms( | |
| # "MobileCLIP-B", pretrained=weights | |
| # ) | |
| # self.model.eval() | |
| # self.tokenizer = open_clip.get_tokenizer("MobileCLIP-B") | |
| # self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # self.model.to(self.device) | |
| # def __call__(self, data): | |
| # # ── unwrap Hugging Face's `inputs` envelope ─────────── | |
| # payload = data.get("inputs", data) | |
| # img_b64 = payload["image"] | |
| # labels = payload.get("candidate_labels", []) | |
| # if not labels: | |
| # return {"error": "candidate_labels list is empty"} | |
| # # Decode & preprocess image | |
| # image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB") | |
| # img_tensor = self.preprocess(image).unsqueeze(0).to(self.device) | |
| # # Tokenise labels | |
| # text_tokens = self.tokenizer(labels).to(self.device) | |
| # # Forward pass | |
| # with torch.no_grad(), torch.cuda.amp.autocast(): | |
| # img_feat = self.model.encode_image(img_tensor) | |
| # txt_feat = self.model.encode_text(text_tokens) | |
| # img_feat = img_feat / img_feat.norm(dim=-1, keepdim=True) | |
| # txt_feat = txt_feat / txt_feat.norm(dim=-1, keepdim=True) | |
| # probs = (100 * img_feat @ txt_feat.T).softmax(dim=-1)[0].tolist() | |
| # # Sorted output | |
| # return [ | |
| # {"label": l, "score": float(p)} | |
| # for l, p in sorted(zip(labels, probs), key=lambda x: x[1], reverse=True) | |
| # ] | |