instafill
/

FFDNET-L

Model card Files Files and versions

FFDNET-L / handler.py

ogamaniuk's picture

Create handler.py

6322921 verified 5 months ago

2.4 kB

	from typing import Dict, List, Any
	from ultralytics import YOLO
	import base64
	from io import BytesIO
	from PIL import Image

	class EndpointHandler:
	def __init__(self, path=""):
	# Load the YOLO model
	self.model = YOLO(f"{path}/FFDNet-L.pt")
	self.id_to_cls = {0: "TextBox", 1: "ChoiceButton", 2: "Signature"}

	def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
	"""
	Args:
	data: A dictionary containing:
	- "inputs": base64 encoded image or image URL
	- "parameters": optional dict with confidence, iou, imgsz
	Returns:
	List of predictions with bounding boxes and classes
	"""
	# Extract image from request
	inputs = data.pop("inputs", data)
	parameters = data.pop("parameters", {})

	# Handle image input (base64 or URL)
	if isinstance(inputs, str):
	if inputs.startswith("http"):
	image = inputs
	else:
	# Decode base64
	image_data = base64.b64decode(inputs)
	image = Image.open(BytesIO(image_data))
	else:
	image = inputs

	# Get parameters with defaults
	confidence = parameters.get("conf", 0.3)
	iou = parameters.get("iou", 0.1)
	imgsz = parameters.get("imgsz", 1600)
	augment = parameters.get("augment", True)

	# Run inference
	results = self.model.predict(
	image,
	conf=confidence,
	iou=iou,
	imgsz=imgsz,
	augment=augment
	)

	# Format results
	predictions = []
	for result in results:
	if result.boxes is not None:
	for box in result.boxes.cpu().numpy():
	x, y, w, h = box.xywhn[0]
	cls_id = int(box.cls.item())

	predictions.append({
	"widget_type": self.id_to_cls[cls_id],
	"confidence": float(box.conf[0]),
	"bounding_box": {
	"cx": float(x),
	"cy": float(y),
	"w": float(w),
	"h": float(h)
	}
	})

	return predictions