Spaces:
Sleeping
Sleeping
File size: 2,536 Bytes
5412d82 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | """
Object detection using YOLOv8n.
Wraps the ultralytics YOLO interface and returns detections in the format
expected by build_depth_context: (boxes, classes, confidences).
"""
import numpy as np
import torch
from ultralytics import YOLO
from ..config import CONF_THRESHOLD, YOLO_MODEL
class ObjectDetector:
"""YOLOv8n object detector.
Downloads ``yolov8n.pt`` on first use (cached by ultralytics in
``~/.cache/ultralytics/``). Subsequent loads use the cached weights.
"""
def __init__(self) -> None:
"""Load YOLOv8n onto the available device."""
print("Loading YOLOv8n...")
self.model = YOLO(YOLO_MODEL)
# Move weights to GPU when available. YOLO's constructor always
# loads to CPU; .to() moves the underlying PyTorch model in-place.
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.model.to(self.device)
if torch.cuda.is_available():
print(
f" GPU memory allocated: "
f"{torch.cuda.memory_allocated() / 1024**2:.0f} MB"
)
def detect(
self, image: np.ndarray
) -> tuple[np.ndarray, list[str], list[float]]:
"""Run detection on an RGB image.
Args:
image: uint8 RGB numpy array of shape (H, W, 3).
Returns:
boxes: float32 array of shape (N, 4) as [x1, y1, x2, y2]
in pixel coordinates.
classes: List of N class-name strings.
confidences: List of N confidence floats in [0, 1].
"""
# ultralytics assumes BGR numpy input and does its own BGR→RGB flip
# internally. Convert so colours are correct for a model trained on
# standard BGR/OpenCV images.
bgr = image[..., ::-1]
with torch.inference_mode():
results = self.model(
bgr,
conf=CONF_THRESHOLD,
verbose=False,
device=self.device,
)
result = results[0]
det = result.boxes
if det is None or len(det) == 0:
empty = np.empty((0, 4), dtype=np.float32)
return empty, [], []
boxes = det.xyxy.cpu().numpy().astype(np.float32) # (N, 4)
confidences = det.conf.cpu().numpy().tolist() # (N,)
class_ids = det.cls.cpu().numpy().astype(int).tolist() # (N,)
classes = [result.names[cid] for cid in class_ids]
return boxes, classes, confidences
|