Spaces:
Running
Running
| # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license | |
| from __future__ import annotations | |
| import argparse | |
| import cv2 | |
| import numpy as np | |
| import onnxruntime as ort | |
| import torch | |
| from ultralytics.engine.results import Results | |
| from ultralytics.utils import ASSETS, YAML, nms, ops | |
| from ultralytics.utils.checks import check_yaml | |
| class YOLOv8Seg: | |
| """YOLOv8 segmentation model for performing instance segmentation using ONNX Runtime. | |
| This class implements a YOLOv8 instance segmentation model using ONNX Runtime for inference. It handles | |
| preprocessing of input images, running inference with the ONNX model, and postprocessing the results to generate | |
| bounding boxes and segmentation masks. | |
| Attributes: | |
| session (ort.InferenceSession): ONNX Runtime inference session for model execution. | |
| imgsz (tuple[int, int]): Input image size as (height, width) for the model. | |
| classes (dict): Dictionary mapping class indices to class names from the dataset. | |
| conf (float): Confidence threshold for filtering detections. | |
| iou (float): IoU threshold used by non-maximum suppression. | |
| Methods: | |
| letterbox: Resize and pad image while maintaining aspect ratio. | |
| preprocess: Preprocess the input image before feeding it into the model. | |
| postprocess: Post-process model predictions to extract meaningful results. | |
| process_mask: Process prototype masks with predicted mask coefficients to generate instance segmentation masks. | |
| Examples: | |
| >>> model = YOLOv8Seg("yolov8n-seg.onnx", conf=0.25, iou=0.7) | |
| >>> img = cv2.imread("image.jpg") | |
| >>> results = model(img) | |
| >>> cv2.imshow("Segmentation", results[0].plot()) | |
| """ | |
| def __init__(self, onnx_model: str, conf: float = 0.25, iou: float = 0.7, imgsz: int | tuple[int, int] = 640): | |
| """Initialize the instance segmentation model using an ONNX model. | |
| Args: | |
| onnx_model (str): Path to the ONNX model file. | |
| conf (float, optional): Confidence threshold for filtering detections. | |
| iou (float, optional): IoU threshold for non-maximum suppression. | |
| imgsz (int | tuple[int, int], optional): Input image size of the model. Can be an integer for square input | |
| or a tuple for rectangular input. | |
| """ | |
| available = ort.get_available_providers() | |
| providers = [p for p in ("CUDAExecutionProvider", "CPUExecutionProvider") if p in available] | |
| self.session = ort.InferenceSession(onnx_model, providers=providers or available) | |
| self.imgsz = (imgsz, imgsz) if isinstance(imgsz, int) else imgsz | |
| self.classes = YAML.load(check_yaml("coco8.yaml"))["names"] | |
| self.conf = conf | |
| self.iou = iou | |
| def __call__(self, img: np.ndarray) -> list[Results]: | |
| """Run inference on the input image using the ONNX model. | |
| Args: | |
| img (np.ndarray): The original input image in BGR format. | |
| Returns: | |
| (list[Results]): Processed detection results after post-processing, containing bounding boxes and | |
| segmentation masks. | |
| """ | |
| prep_img = self.preprocess(img, self.imgsz) | |
| outs = self.session.run(None, {self.session.get_inputs()[0].name: prep_img}) | |
| return self.postprocess(img, prep_img, outs) | |
| def letterbox(self, img: np.ndarray, new_shape: tuple[int, int] = (640, 640)) -> np.ndarray: | |
| """Resize and pad image while maintaining aspect ratio. | |
| Args: | |
| img (np.ndarray): Input image in BGR format. | |
| new_shape (tuple[int, int], optional): Target shape as (height, width). | |
| Returns: | |
| (np.ndarray): Resized and padded image. | |
| """ | |
| shape = img.shape[:2] # current shape [height, width] | |
| # Scale ratio (new / old) | |
| r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) | |
| # Compute padding | |
| new_unpad = round(shape[1] * r), round(shape[0] * r) | |
| dw, dh = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding | |
| if shape[::-1] != new_unpad: # resize | |
| img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) | |
| top, bottom = round(dh - 0.1), round(dh + 0.1) | |
| left, right = round(dw - 0.1), round(dw + 0.1) | |
| img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) | |
| return img | |
| def preprocess(self, img: np.ndarray, new_shape: tuple[int, int]) -> np.ndarray: | |
| """Preprocess the input image before feeding it into the model. | |
| Args: | |
| img (np.ndarray): The input image in BGR format. | |
| new_shape (tuple[int, int]): The target shape for resizing as (height, width). | |
| Returns: | |
| (np.ndarray): Preprocessed image ready for model inference, with shape (1, 3, height, width) and normalized | |
| to [0, 1]. | |
| """ | |
| img = self.letterbox(img, new_shape) | |
| img = img[..., ::-1].transpose([2, 0, 1])[None] # BGR to RGB, BHWC to BCHW | |
| img = np.ascontiguousarray(img) | |
| img = img.astype(np.float32) / 255 # Normalize to [0, 1] | |
| return img | |
| def postprocess(self, img: np.ndarray, prep_img: np.ndarray, outs: list) -> list[Results]: | |
| """Post-process model predictions to extract meaningful results. | |
| Args: | |
| img (np.ndarray): The original input image. | |
| prep_img (np.ndarray): The preprocessed image used for inference. | |
| outs (list): Model outputs containing predictions and prototype masks. | |
| Returns: | |
| (list[Results]): Processed detection results containing bounding boxes and segmentation masks. | |
| """ | |
| preds, protos = (torch.from_numpy(p) for p in outs) | |
| preds = nms.non_max_suppression(preds, self.conf, self.iou, nc=len(self.classes)) | |
| results = [] | |
| for i, pred in enumerate(preds): | |
| pred[:, :4] = ops.scale_boxes(prep_img.shape[2:], pred[:, :4], img.shape) | |
| masks = self.process_mask(protos[i], pred[:, 6:], pred[:, :4], img.shape[:2]) | |
| results.append(Results(img, path="", names=self.classes, boxes=pred[:, :6], masks=masks)) | |
| return results | |
| def process_mask( | |
| self, protos: torch.Tensor, masks_in: torch.Tensor, bboxes: torch.Tensor, shape: tuple[int, int] | |
| ) -> torch.Tensor: | |
| """Process prototype masks with predicted mask coefficients to generate instance segmentation masks. | |
| Args: | |
| protos (torch.Tensor): Prototype masks with shape (mask_dim, mask_h, mask_w). | |
| masks_in (torch.Tensor): Predicted mask coefficients with shape (N, mask_dim), where N is number of | |
| detections. | |
| bboxes (torch.Tensor): Bounding boxes with shape (N, 4), where N is number of detections. | |
| shape (tuple[int, int]): The size of the input image as (height, width). | |
| Returns: | |
| (torch.Tensor): Binary segmentation masks with shape (N, height, width). | |
| """ | |
| c, mh, mw = protos.shape # CHW | |
| masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw) # Matrix multiplication | |
| masks = ops.scale_masks(masks[None], shape)[0] # Scale masks to original image size | |
| masks = ops.crop_mask(masks, bboxes) # Crop masks to bounding boxes | |
| return masks.gt_(0.0) # Convert to binary masks | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--model", type=str, required=True, help="Path to ONNX model") | |
| parser.add_argument("--source", type=str, default=str(ASSETS / "bus.jpg"), help="Path to input image") | |
| parser.add_argument("--conf", type=float, default=0.25, help="Confidence threshold") | |
| parser.add_argument("--iou", type=float, default=0.7, help="NMS IoU threshold") | |
| args = parser.parse_args() | |
| model = YOLOv8Seg(args.model, args.conf, args.iou) | |
| img = cv2.imread(args.source) | |
| results = model(img) | |
| cv2.imshow("Segmented Image", results[0].plot()) | |
| cv2.waitKey(0) | |
| cv2.destroyAllWindows() | |