""" Object detection using YOLOv8n. Wraps the ultralytics YOLO interface and returns detections in the format expected by build_depth_context: (boxes, classes, confidences). """ import numpy as np import torch from ultralytics import YOLO from ..config import CONF_THRESHOLD, YOLO_MODEL class ObjectDetector: """YOLOv8n object detector. Downloads ``yolov8n.pt`` on first use (cached by ultralytics in ``~/.cache/ultralytics/``). Subsequent loads use the cached weights. """ def __init__(self) -> None: """Load YOLOv8n onto the available device.""" print("Loading YOLOv8n...") self.model = YOLO(YOLO_MODEL) # Move weights to GPU when available. YOLO's constructor always # loads to CPU; .to() moves the underlying PyTorch model in-place. self.device = "cuda" if torch.cuda.is_available() else "cpu" self.model.to(self.device) if torch.cuda.is_available(): print( f" GPU memory allocated: " f"{torch.cuda.memory_allocated() / 1024**2:.0f} MB" ) def detect( self, image: np.ndarray ) -> tuple[np.ndarray, list[str], list[float]]: """Run detection on an RGB image. Args: image: uint8 RGB numpy array of shape (H, W, 3). Returns: boxes: float32 array of shape (N, 4) as [x1, y1, x2, y2] in pixel coordinates. classes: List of N class-name strings. confidences: List of N confidence floats in [0, 1]. """ # ultralytics assumes BGR numpy input and does its own BGR→RGB flip # internally. Convert so colours are correct for a model trained on # standard BGR/OpenCV images. bgr = image[..., ::-1] with torch.inference_mode(): results = self.model( bgr, conf=CONF_THRESHOLD, verbose=False, device=self.device, ) result = results[0] det = result.boxes if det is None or len(det) == 0: empty = np.empty((0, 4), dtype=np.float32) return empty, [], [] boxes = det.xyxy.cpu().numpy().astype(np.float32) # (N, 4) confidences = det.conf.cpu().numpy().tolist() # (N,) class_ids = det.cls.cpu().numpy().astype(int).tolist() # (N,) classes = [result.names[cid] for cid in class_ids] return boxes, classes, confidences