| |
| |
| |
| |
| |
| |
|
|
| import base64 |
| import io |
| from typing import Any, Dict |
|
|
| import torch |
| from PIL import Image, UnidentifiedImageError |
| from transformers import AutoImageProcessor, AutoModel |
|
|
| MODEL_ID = "facebook/dinov2-large" |
|
|
|
|
| class EndpointHandler: |
| def __init__(self, path: str = MODEL_ID): |
| |
| |
| |
| self.processor = AutoImageProcessor.from_pretrained(path) |
| self.model = AutoModel.from_pretrained(path).eval() |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" |
| self.model.to(self.device) |
|
|
| def _to_image(self, inp: Any) -> Image.Image: |
| """Tolerant of every shape Buildly's probe or HF may hand us: |
| PIL.Image | raw bytes | base64 str | {"image": <b64|bytes>}. |
| |
| Anything unparseable raises -> the endpoint returns 4xx (NOT 503), |
| so Buildly's format probe falls through to the next wire shape |
| instead of treating it as the model being unavailable. |
| """ |
| if isinstance(inp, Image.Image): |
| return inp.convert("RGB") |
| if isinstance(inp, dict): |
| inp = inp.get("image") or inp.get("inputs") |
| if isinstance(inp, str): |
| try: |
| inp = base64.b64decode(inp) |
| except Exception as exc: |
| raise ValueError(f"input string is not valid base64: {exc}") |
| if isinstance(inp, (bytes, bytearray)): |
| try: |
| return Image.open(io.BytesIO(inp)).convert("RGB") |
| except (UnidentifiedImageError, OSError) as exc: |
| raise ValueError(f"bytes are not a decodable image: {exc}") |
| raise ValueError(f"unsupported input type: {type(inp).__name__}") |
|
|
| @torch.no_grad() |
| def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: |
| payload = data.get("inputs", data) if isinstance(data, dict) else data |
| image = self._to_image(payload) |
|
|
| batch = self.processor(images=image, return_tensors="pt").to(self.device) |
| tokens = self.model(**batch).last_hidden_state |
| vec = tokens[:, 1:, :].mean(dim=1)[0] |
| |
|
|
| return {"embedding": vec.float().cpu().tolist(), "dim": int(vec.shape[-1])} |