FFDNET-L / handler.py
ogamaniuk's picture
Create handler.py
6322921 verified
raw
history blame
2.4 kB
from typing import Dict, List, Any
from ultralytics import YOLO
import base64
from io import BytesIO
from PIL import Image
class EndpointHandler:
def __init__(self, path=""):
# Load the YOLO model
self.model = YOLO(f"{path}/FFDNet-L.pt")
self.id_to_cls = {0: "TextBox", 1: "ChoiceButton", 2: "Signature"}
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Args:
data: A dictionary containing:
- "inputs": base64 encoded image or image URL
- "parameters": optional dict with confidence, iou, imgsz
Returns:
List of predictions with bounding boxes and classes
"""
# Extract image from request
inputs = data.pop("inputs", data)
parameters = data.pop("parameters", {})
# Handle image input (base64 or URL)
if isinstance(inputs, str):
if inputs.startswith("http"):
image = inputs
else:
# Decode base64
image_data = base64.b64decode(inputs)
image = Image.open(BytesIO(image_data))
else:
image = inputs
# Get parameters with defaults
confidence = parameters.get("conf", 0.3)
iou = parameters.get("iou", 0.1)
imgsz = parameters.get("imgsz", 1600)
augment = parameters.get("augment", True)
# Run inference
results = self.model.predict(
image,
conf=confidence,
iou=iou,
imgsz=imgsz,
augment=augment
)
# Format results
predictions = []
for result in results:
if result.boxes is not None:
for box in result.boxes.cpu().numpy():
x, y, w, h = box.xywhn[0]
cls_id = int(box.cls.item())
predictions.append({
"widget_type": self.id_to_cls[cls_id],
"confidence": float(box.conf[0]),
"bounding_box": {
"cx": float(x),
"cy": float(y),
"w": float(w),
"h": float(h)
}
})
return predictions