# handler.py from typing import Dict, Any, List import base64, io, os, json from PIL import Image import numpy as np # Ultralytics YOLO from ultralytics import YOLO # ---- Utilities ---- def load_image_from_payload(payload: Dict[str, Any]) -> Image.Image: """ Supports: - base64 image: payload["inputs"] = "data:image/...;base64,...." or raw base64 string - image URL: payload["image_url"] = "https://..." """ if "inputs" in payload and isinstance(payload["inputs"], str): s = payload["inputs"] # handle optional data URL prefix if s.startswith("data:image"): s = s.split(",", 1)[1] img_bytes = base64.b64decode(s) return Image.open(io.BytesIO(img_bytes)).convert("RGB") if "image_url" in payload: # Not recommended in restricted egress; but works if endpoint has outbound internet import requests r = requests.get(payload["image_url"], timeout=10) r.raise_for_status() return Image.open(io.BytesIO(r.content)).convert("RGB") raise ValueError("No image provided. Use 'inputs' (base64) or 'image_url'.") def yolo_to_coco(result) -> List[Dict[str, Any]]: """ Convert Ultralytics result to COCO-ish detections: [{"bbox":[x,y,w,h], "score":float, "category_id":int, "category_name":str}] """ detections = [] names = result.names # id->name for b in result.boxes: xywh = b.xywh.cpu().numpy().tolist()[0] # [x_center,y_center,w,h] # convert to top-left x,y,w,h x, y, w, h = xywh x0 = x - w/2 y0 = y - h/2 conf = float(b.conf.cpu().item()) cls_id = int(b.cls.cpu().item()) detections.append({ "bbox": [float(x0), float(y0), float(w), float(h)], "score": conf, "category_id": cls_id, "category_name": names.get(cls_id, str(cls_id)), }) return detections class EndpointHandler: def __init__(self, path: str = ""): """ `path` points to the repo files checked out on the endpoint. If yolov8n.pt is present in the repo, we load it directly. Otherwise we fallback to downloading 'yolov8n.pt'. """ weights_path = os.path.join(path, "yolov8n.pt") if not os.path.exists(weights_path): # Fallback: auto-download from Ultralytics/HF cache weights_path = "yolov8n.pt" self.model = YOLO(weights_path) # loads on first use # Optional: warmup for faster first token (small dummy inference) # Create a tiny blank image to compile the model dummy = Image.new("RGB", (320, 320), (128, 128, 128)) _ = self.model.predict(dummy, imgsz=320, conf=0.25, verbose=False) def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: """ Expected payload: { "inputs": "" # OR "image_url": "https://..." "imgsz": (optional int, default 640) "conf": (optional float, default 0.25) "iou": (optional float, default 0.45) "max_det":(optional int, default 300) } Returns: { "detections": [ {bbox, score, category_id, category_name}, ... ], "image_shape": [h,w], "model": "yolov8n" } """ imgsz = int(data.get("imgsz", 640)) conf = float(data.get("conf", 0.25)) iou = float(data.get("iou", 0.45)) max_det = int(data.get("max_det", 300)) img = load_image_from_payload(data) w, h = img.size results = self.model.predict( img, imgsz=imgsz, conf=conf, iou=iou, max_det=max_det, verbose=False ) detections = yolo_to_coco(results[0]) return { "detections": detections, "image_shape": [h, w], "model": "yolov8n" }