""" tiled_yolo_softnms.py Tiled inference + class-wise Soft-NMS for YOLO (Ultralytics). - Runs YOLO on overlapping tiles to boost recall on small symbols. - Maps all tile detections back to full-image coords. - Fuses duplicates with Soft-NMS per class. Usage ----- from ultralytics import YOLO import cv2 model = YOLO("best.pt") # your YOLO v12/v11/v8 checkpoint img = cv2.imread("example.jpg")[:, :, ::-1] # BGR->RGB (optional; YOLO accepts BGR too) out = detect_tiled_softnms( model, img, tile_size=1024, overlap=0.25, per_tile_conf=0.2, per_tile_iou=0.7, softnms_iou=0.55, softnms_method="linear", softnms_sigma=0.5, final_conf=0.25, device=None, imgsz=None ) # Access results xyxy = out["xyxy"] conf = out["conf"] cls = out["cls"] annot = draw_detections(img.copy(), xyxy, conf, cls, out["names"]) cv2.imwrite("annotated.jpg", annot[:, :, ::-1]) # RGB->BGR for writing """ from typing import List, Tuple, Dict, Optional import numpy as np import cv2 # --------------------------- # Utilities # --------------------------- def make_overlapping_tiles(H: int, W: int, tile: int, overlap: float) -> List[Tuple[int, int, int, int]]: """Return list of (x0, y0, x1, y1) tile boxes covering the image with given overlap.""" assert 0.0 <= overlap < 1.0 stride = max(1, int(tile * (1.0 - overlap))) xs = list(range(0, max(W - tile, 0) + 1, stride)) ys = list(range(0, max(H - tile, 0) + 1, stride)) if xs[-1] + tile < W: xs.append(W - tile) if ys[-1] + tile < H: ys.append(H - tile) tiles = [] for y in ys: for x in xs: x0, y0 = max(0, x), max(0, y) x1, y1 = min(W, x0 + tile), min(H, y0 + tile) tiles.append((x0, y0, x1, y1)) return tiles def iou_xyxy(a: np.ndarray, b: np.ndarray) -> np.ndarray: """IoU between one box a (4,x) and many boxes b (N,4).""" xx1 = np.maximum(a[0], b[:, 0]) yy1 = np.maximum(a[1], b[:, 1]) xx2 = np.minimum(a[2], b[:, 2]) yy2 = np.minimum(a[3], b[:, 3]) inter = np.maximum(0, xx2 - xx1) * np.maximum(0, yy2 - yy1) area_a = (a[2]-a[0]) * (a[3]-a[1]) area_b = (b[:, 2]-b[:, 0]) * (b[:, 3]-b[:, 1]) union = np.maximum(1e-9, area_a + area_b - inter) return inter / union def soft_nms_classwise( boxes: np.ndarray, scores: np.ndarray, classes: np.ndarray, iou_thr: float = 0.55, method: str = "linear", sigma: float = 0.5, score_thresh: float = 1e-3, max_det: Optional[int] = None ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """ Soft-NMS per class. boxes: (N,4), scores:(N, x), classes:(N, x) Returns filtered (boxes, scores, classes). """ keep_boxes, keep_scores, keep_classes = [], [], [] for c in np.unique(classes): m = classes == c b = boxes[m].astype(np.float32).copy() s = scores[m].astype(np.float32).copy() idxs = np.arange(b.shape[0]) kept = [] while len(idxs): i = idxs[np.argmax(s[idxs])] M = b[i].copy() Ms = s[i].copy() kept.append(i) idxs = idxs[idxs != i] if len(idxs) == 0: break ious = iou_xyxy(M, b[idxs]) if method == "linear": decay = np.where(ious > iou_thr, 1.0 - ious, 1.0) s[idxs] *= decay elif method == "gaussian": s[idxs] *= np.exp(-(ious ** 2) / sigma) elif method == "hard": # standard NMS behaviour idxs = idxs[ious <= iou_thr] else: raise ValueError("method must be 'linear', 'gaussian', or 'hard'") # prune very low scores idxs = idxs[s[idxs] >= score_thresh] if kept: kb, ks = b[kept], s[kept] order = np.argsort(-ks) kb, ks = kb[order], ks[order] kc = np.full(len(ks), c, dtype=classes.dtype) keep_boxes.append(kb) keep_scores.append(ks) keep_classes.append(kc) if not keep_boxes: return (np.zeros((0, 4), dtype=np.float32), np.zeros((0,), dtype=np.float32), np.zeros((0,), dtype=classes.dtype)) B = np.concatenate(keep_boxes, axis=0) S = np.concatenate(keep_scores, axis=0) C = np.concatenate(keep_classes, axis=0) order = np.argsort(-S) if max_det is not None: order = order[:max_det] return B[order], S[order], C[order] def draw_detections(img: np.ndarray, boxes: np.ndarray, scores: np.ndarray, classes: np.ndarray, names: Dict[int, str]) -> np.ndarray: """Simple visualizer (RGB in, RGB out).""" for (x1, y1, x2, y2), sc, cl in zip(boxes.astype(int), scores, classes.astype(int)): label = f"{names.get(cl, str(cl))} {sc:.2f}" cv2.rectangle(img, (x1, y1), (x2, y2), (0, 180, 255), 2) (tw, th), bl = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2) cv2.rectangle(img, (x1, y1 - th - 6), (x1 + tw + 4, y1), (0, 180, 255), -1) cv2.putText(img, label, (x1 + 2, y1 - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2, cv2.LINE_AA) return img # --------------------------- # Main tiled inference # --------------------------- def detect_tiled_softnms( model, image: np.ndarray, tile_size: int = 1024, overlap: float = 0.25, per_tile_conf: float = 0.25, per_tile_iou: float = 0.7, softnms_iou: float = 0.55, softnms_method: str = "linear", softnms_sigma: float = 0.5, final_conf: float = 0.25, max_det: int = 3000, device: Optional[str] = None, imgsz: Optional[int] = None, class_agnostic_nms: bool = False ) -> Dict[str, np.ndarray]: """ Run YOLO on overlapping tiles, then fuse globally with class-wise Soft-NMS. Returns dict: {"xyxy","conf","cls","names"}. """ assert image.ndim == 3, "image must be HxWx3" H, W = image.shape[:2] names = getattr(model, "names", {i: str(i) for i in range(1000)}) tiles = make_overlapping_tiles(H, W, tile=tile_size, overlap=overlap) all_boxes, all_scores, all_classes = [], [], [] for (x0, y0, x1, y1) in tiles: tile = image[y0:y1, x0:x1] # Ultralytics returns boxes in original tile coords (pre-letterbox) results = model.predict( source=tile, conf=per_tile_conf, iou=per_tile_iou, imgsz=imgsz, # None -> model default device=device, verbose=False ) if not results: continue r = results[0] if r.boxes is None or r.boxes.shape[0] == 0: continue b = r.boxes.xyxy.cpu().numpy() s = r.boxes.conf.cpu().numpy() c = r.boxes.cls.cpu().numpy().astype(int) # Map to full-image coordinates b[:, [0, 2]] += x0 b[:, [1, 3]] += y0 # Clip b[:, 0] = np.clip(b[:, 0], 0, W - 1) b[:, 1] = np.clip(b[:, 1], 0, H - 1) b[:, 2] = np.clip(b[:, 2], 0, W - 1) b[:, 3] = np.clip(b[:, 3], 0, H - 1) # Filter degenerate boxes valid = (b[:, 2] > b[:, 0]) & (b[:, 3] > b[:, 1]) if not np.any(valid): continue all_boxes.append(b[valid]) all_scores.append(s[valid]) all_classes.append(c[valid]) if not all_boxes: return {"xyxy": np.zeros((0, 4), dtype=np.float32), "conf": np.zeros((0,), dtype=np.float32), "cls": np.zeros((0,), dtype=np.int32), "names": names} boxes = np.concatenate(all_boxes, axis=0).astype(np.float32) scores = np.concatenate(all_scores, axis=0).astype(np.float32) classes = np.concatenate(all_classes, axis=0).astype(np.int32) # Global fusion: class-wise Soft-NMS or class-agnostic if chosen if class_agnostic_nms: classes = np.zeros_like(classes) boxes, scores, classes = soft_nms_classwise( boxes, scores, classes, iou_thr=softnms_iou, method=softnms_method, sigma=softnms_sigma, score_thresh=1e-3, max_det=max_det ) # Final confidence gate keep = scores >= final_conf boxes, scores, classes = boxes[keep], scores[keep], classes[keep] return {"xyxy": boxes, "conf": scores, "cls": classes, "names": names}