| import base64 |
| import io |
| from typing import Any, Dict, List |
|
|
| import torch |
| from PIL import Image |
| from transformers import AutoImageProcessor, AutoModelForObjectDetection |
|
|
|
|
| class EndpointHandler: |
| def __init__(self, path: str = ""): |
| self.processor = AutoImageProcessor.from_pretrained(path) |
| self.model = AutoModelForObjectDetection.from_pretrained(path) |
| self.model.eval() |
|
|
| def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: |
| inputs = data.get("inputs", data) |
|
|
| |
| if isinstance(inputs, str): |
| image_bytes = base64.b64decode(inputs) |
| image = Image.open(io.BytesIO(image_bytes)).convert("RGB") |
| elif isinstance(inputs, bytes): |
| image = Image.open(io.BytesIO(inputs)).convert("RGB") |
| elif isinstance(inputs, Image.Image): |
| image = inputs.convert("RGB") |
| else: |
| raise ValueError( |
| "Unsupported input type. Provide a base64-encoded image string or raw bytes." |
| ) |
|
|
| |
| with torch.no_grad(): |
| encoded = self.processor(images=image, return_tensors="pt") |
| outputs = self.model(**encoded) |
|
|
| |
| target_size = torch.tensor([image.size[::-1]]) |
| results = self.processor.post_process_object_detection( |
| outputs, threshold=0.5, target_sizes=target_size |
| )[0] |
|
|
| detections = [] |
| for score, label, box in zip( |
| results["scores"], results["labels"], results["boxes"] |
| ): |
| xmin, ymin, xmax, ymax = box.tolist() |
| detections.append( |
| { |
| "score": round(score.item(), 4), |
| "label": self.model.config.id2label[label.item()], |
| "box": { |
| "xmin": round(xmin, 2), |
| "ymin": round(ymin, 2), |
| "xmax": round(xmax, 2), |
| "ymax": round(ymax, 2), |
| }, |
| } |
| ) |
|
|
| return detections |
|
|