| | from typing import Dict, List, Any |
| | from ultralytics import YOLO |
| | import base64 |
| | from io import BytesIO |
| | from PIL import Image |
| |
|
| | class EndpointHandler: |
| | def __init__(self, path=""): |
| | |
| | self.model = YOLO(f"{path}/FFDNet-L.pt") |
| | self.id_to_cls = {0: "TextBox", 1: "ChoiceButton", 2: "Signature"} |
| | |
| | def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: |
| | """ |
| | Args: |
| | data: A dictionary containing: |
| | - "inputs": base64 encoded image or image URL |
| | - "parameters": optional dict with confidence, iou, imgsz |
| | Returns: |
| | List of predictions with bounding boxes and classes |
| | """ |
| | |
| | inputs = data.pop("inputs", data) |
| | parameters = data.pop("parameters", {}) |
| | |
| | |
| | if isinstance(inputs, str): |
| | if inputs.startswith("http"): |
| | image = inputs |
| | else: |
| | |
| | image_data = base64.b64decode(inputs) |
| | image = Image.open(BytesIO(image_data)) |
| | else: |
| | image = inputs |
| | |
| | |
| | confidence = parameters.get("conf", 0.3) |
| | iou = parameters.get("iou", 0.1) |
| | imgsz = parameters.get("imgsz", 1600) |
| | augment = parameters.get("augment", True) |
| | |
| | |
| | results = self.model.predict( |
| | image, |
| | conf=confidence, |
| | iou=iou, |
| | imgsz=imgsz, |
| | augment=augment |
| | ) |
| | |
| | |
| | predictions = [] |
| | for result in results: |
| | if result.boxes is not None: |
| | for box in result.boxes.cpu().numpy(): |
| | x, y, w, h = box.xywhn[0] |
| | cls_id = int(box.cls.item()) |
| | |
| | predictions.append({ |
| | "widget_type": self.id_to_cls[cls_id], |
| | "confidence": float(box.conf[0]), |
| | "bounding_box": { |
| | "cx": float(x), |
| | "cy": float(y), |
| | "w": float(w), |
| | "h": float(h) |
| | } |
| | }) |
| | |
| | return predictions |