| | import io, base64, torch |
| | from PIL import Image |
| | from transformers import CLIPProcessor, CLIPModel |
| |
|
| | class EndpointHandler: |
| | def __init__(self, path=""): |
| | device = "cuda" if torch.cuda.is_available() else "cpu" |
| | self.model = CLIPModel.from_pretrained(path).to(device) |
| | self.processor = CLIPProcessor.from_pretrained(path) |
| | self.device = device |
| |
|
| | def __call__(self, data): |
| | |
| | img_b64 = data["image"] |
| | labels = data.get("candidate_labels", []) |
| | image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB") |
| |
|
| | inputs = self.processor(text=labels, images=image, |
| | return_tensors="pt", padding=True).to(self.device) |
| | probs = self.model(**inputs).logits_per_image.softmax(dim=-1)[0].tolist() |
| | return [{"label": l, "score": float(p)} for l, p in zip(labels, probs)] |