| from __future__ import annotations |
|
|
| from pathlib import Path |
|
|
| import cv2 |
| from PIL import Image |
| from torchvision.transforms.functional import to_pil_image |
| from ultralytics import YOLO |
|
|
| from adetailer import PredictOutput |
| from adetailer.common import create_mask_from_bbox |
|
|
|
|
| def ultralytics_predict( |
| model_path: str | Path, |
| image: Image.Image, |
| confidence: float = 0.3, |
| device: str = "", |
| ) -> PredictOutput: |
| model = YOLO(model_path) |
| pred = model(image, conf=confidence, device=device) |
|
|
| bboxes = pred[0].boxes.xyxy.cpu().numpy() |
| if bboxes.size == 0: |
| return PredictOutput() |
| bboxes = bboxes.tolist() |
|
|
| if pred[0].masks is None: |
| masks = create_mask_from_bbox(bboxes, image.size) |
| else: |
| masks = mask_to_pil(pred[0].masks.data, image.size) |
| preview = pred[0].plot() |
| preview = cv2.cvtColor(preview, cv2.COLOR_BGR2RGB) |
| preview = Image.fromarray(preview) |
|
|
| return PredictOutput(bboxes=bboxes, masks=masks, preview=preview) |
|
|
|
|
| def mask_to_pil(masks, shape: tuple[int, int]) -> list[Image.Image]: |
| """ |
| Parameters |
| ---------- |
| masks: torch.Tensor, dtype=torch.float32, shape=(N, H, W). |
| The device can be CUDA, but `to_pil_image` takes care of that. |
| |
| shape: tuple[int, int] |
| (width, height) of the original image |
| """ |
| n = masks.shape[0] |
| return [to_pil_image(masks[i], mode="L").resize(shape) for i in range(n)] |
|
|