| """ |
| tiled_yolo_softnms.py |
| Tiled inference + class-wise Soft-NMS for YOLO (Ultralytics). |
| - Runs YOLO on overlapping tiles to boost recall on small symbols. |
| - Maps all tile detections back to full-image coords. |
| - Fuses duplicates with Soft-NMS per class. |
| |
| Usage |
| ----- |
| from ultralytics import YOLO |
| import cv2 |
| |
| model = YOLO("best.pt") # your YOLO v12/v11/v8 checkpoint |
| img = cv2.imread("example.jpg")[:, :, ::-1] # BGR->RGB (optional; YOLO accepts BGR too) |
| |
| out = detect_tiled_softnms( |
| model, img, |
| tile_size=1024, overlap=0.25, |
| per_tile_conf=0.2, per_tile_iou=0.7, |
| softnms_iou=0.55, softnms_method="linear", softnms_sigma=0.5, |
| final_conf=0.25, device=None, imgsz=None |
| ) |
| |
| # Access results |
| xyxy = out["xyxy"] |
| conf = out["conf"] |
| cls = out["cls"] |
| annot = draw_detections(img.copy(), xyxy, conf, cls, out["names"]) |
| cv2.imwrite("annotated.jpg", annot[:, :, ::-1]) # RGB->BGR for writing |
| """ |
|
|
| from typing import List, Tuple, Dict, Optional |
| import numpy as np |
| import cv2 |
|
|
| |
| |
| |
|
|
| def make_overlapping_tiles(H: int, W: int, tile: int, overlap: float) -> List[Tuple[int, int, int, int]]: |
| """Return list of (x0, y0, x1, y1) tile boxes covering the image with given overlap.""" |
| assert 0.0 <= overlap < 1.0 |
| stride = max(1, int(tile * (1.0 - overlap))) |
| xs = list(range(0, max(W - tile, 0) + 1, stride)) |
| ys = list(range(0, max(H - tile, 0) + 1, stride)) |
| if xs[-1] + tile < W: |
| xs.append(W - tile) |
| if ys[-1] + tile < H: |
| ys.append(H - tile) |
| tiles = [] |
| for y in ys: |
| for x in xs: |
| x0, y0 = max(0, x), max(0, y) |
| x1, y1 = min(W, x0 + tile), min(H, y0 + tile) |
| tiles.append((x0, y0, x1, y1)) |
| return tiles |
|
|
| def iou_xyxy(a: np.ndarray, b: np.ndarray) -> np.ndarray: |
| """IoU between one box a (4,x) and many boxes b (N,4).""" |
| xx1 = np.maximum(a[0], b[:, 0]) |
| yy1 = np.maximum(a[1], b[:, 1]) |
| xx2 = np.minimum(a[2], b[:, 2]) |
| yy2 = np.minimum(a[3], b[:, 3]) |
| inter = np.maximum(0, xx2 - xx1) * np.maximum(0, yy2 - yy1) |
| area_a = (a[2]-a[0]) * (a[3]-a[1]) |
| area_b = (b[:, 2]-b[:, 0]) * (b[:, 3]-b[:, 1]) |
| union = np.maximum(1e-9, area_a + area_b - inter) |
| return inter / union |
|
|
| def soft_nms_classwise( |
| boxes: np.ndarray, scores: np.ndarray, classes: np.ndarray, |
| iou_thr: float = 0.55, method: str = "linear", sigma: float = 0.5, |
| score_thresh: float = 1e-3, max_det: Optional[int] = None |
| ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: |
| """ |
| Soft-NMS per class. |
| boxes: (N,4), scores:(N, x), classes:(N, x) |
| Returns filtered (boxes, scores, classes). |
| """ |
| keep_boxes, keep_scores, keep_classes = [], [], [] |
| for c in np.unique(classes): |
| m = classes == c |
| b = boxes[m].astype(np.float32).copy() |
| s = scores[m].astype(np.float32).copy() |
| idxs = np.arange(b.shape[0]) |
|
|
| kept = [] |
| while len(idxs): |
| i = idxs[np.argmax(s[idxs])] |
| M = b[i].copy() |
| Ms = s[i].copy() |
| kept.append(i) |
|
|
| idxs = idxs[idxs != i] |
| if len(idxs) == 0: |
| break |
| ious = iou_xyxy(M, b[idxs]) |
| if method == "linear": |
| decay = np.where(ious > iou_thr, 1.0 - ious, 1.0) |
| s[idxs] *= decay |
| elif method == "gaussian": |
| s[idxs] *= np.exp(-(ious ** 2) / sigma) |
| elif method == "hard": |
| |
| idxs = idxs[ious <= iou_thr] |
| else: |
| raise ValueError("method must be 'linear', 'gaussian', or 'hard'") |
|
|
| |
| idxs = idxs[s[idxs] >= score_thresh] |
|
|
| if kept: |
| kb, ks = b[kept], s[kept] |
| order = np.argsort(-ks) |
| kb, ks = kb[order], ks[order] |
| kc = np.full(len(ks), c, dtype=classes.dtype) |
| keep_boxes.append(kb) |
| keep_scores.append(ks) |
| keep_classes.append(kc) |
|
|
| if not keep_boxes: |
| return (np.zeros((0, 4), dtype=np.float32), |
| np.zeros((0,), dtype=np.float32), |
| np.zeros((0,), dtype=classes.dtype)) |
|
|
| B = np.concatenate(keep_boxes, axis=0) |
| S = np.concatenate(keep_scores, axis=0) |
| C = np.concatenate(keep_classes, axis=0) |
|
|
| order = np.argsort(-S) |
| if max_det is not None: |
| order = order[:max_det] |
| return B[order], S[order], C[order] |
|
|
| def draw_detections(img: np.ndarray, boxes: np.ndarray, scores: np.ndarray, classes: np.ndarray, names: Dict[int, str]) -> np.ndarray: |
| """Simple visualizer (RGB in, RGB out).""" |
| for (x1, y1, x2, y2), sc, cl in zip(boxes.astype(int), scores, classes.astype(int)): |
| label = f"{names.get(cl, str(cl))} {sc:.2f}" |
| cv2.rectangle(img, (x1, y1), (x2, y2), (0, 180, 255), 2) |
| (tw, th), bl = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2) |
| cv2.rectangle(img, (x1, y1 - th - 6), (x1 + tw + 4, y1), (0, 180, 255), -1) |
| cv2.putText(img, label, (x1 + 2, y1 - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2, cv2.LINE_AA) |
| return img |
|
|
| |
| |
| |
|
|
| def detect_tiled_softnms( |
| model, image: np.ndarray, |
| tile_size: int = 1024, overlap: float = 0.25, |
| per_tile_conf: float = 0.25, per_tile_iou: float = 0.7, |
| softnms_iou: float = 0.55, softnms_method: str = "linear", softnms_sigma: float = 0.5, |
| final_conf: float = 0.25, max_det: int = 3000, |
| device: Optional[str] = None, imgsz: Optional[int] = None, |
| class_agnostic_nms: bool = False |
| ) -> Dict[str, np.ndarray]: |
| """ |
| Run YOLO on overlapping tiles, then fuse globally with class-wise Soft-NMS. |
| Returns dict: {"xyxy","conf","cls","names"}. |
| """ |
| assert image.ndim == 3, "image must be HxWx3" |
| H, W = image.shape[:2] |
| names = getattr(model, "names", {i: str(i) for i in range(1000)}) |
|
|
| tiles = make_overlapping_tiles(H, W, tile=tile_size, overlap=overlap) |
|
|
| all_boxes, all_scores, all_classes = [], [], [] |
|
|
| for (x0, y0, x1, y1) in tiles: |
| tile = image[y0:y1, x0:x1] |
| |
| results = model.predict( |
| source=tile, |
| conf=per_tile_conf, |
| iou=per_tile_iou, |
| imgsz=imgsz, |
| device=device, |
| verbose=False |
| ) |
|
|
| if not results: |
| continue |
|
|
| r = results[0] |
| if r.boxes is None or r.boxes.shape[0] == 0: |
| continue |
|
|
| b = r.boxes.xyxy.cpu().numpy() |
| s = r.boxes.conf.cpu().numpy() |
| c = r.boxes.cls.cpu().numpy().astype(int) |
|
|
| |
| b[:, [0, 2]] += x0 |
| b[:, [1, 3]] += y0 |
|
|
| |
| b[:, 0] = np.clip(b[:, 0], 0, W - 1) |
| b[:, 1] = np.clip(b[:, 1], 0, H - 1) |
| b[:, 2] = np.clip(b[:, 2], 0, W - 1) |
| b[:, 3] = np.clip(b[:, 3], 0, H - 1) |
|
|
| |
| valid = (b[:, 2] > b[:, 0]) & (b[:, 3] > b[:, 1]) |
| if not np.any(valid): |
| continue |
| all_boxes.append(b[valid]) |
| all_scores.append(s[valid]) |
| all_classes.append(c[valid]) |
|
|
| if not all_boxes: |
| return {"xyxy": np.zeros((0, 4), dtype=np.float32), |
| "conf": np.zeros((0,), dtype=np.float32), |
| "cls": np.zeros((0,), dtype=np.int32), |
| "names": names} |
|
|
| boxes = np.concatenate(all_boxes, axis=0).astype(np.float32) |
| scores = np.concatenate(all_scores, axis=0).astype(np.float32) |
| classes = np.concatenate(all_classes, axis=0).astype(np.int32) |
|
|
| |
| if class_agnostic_nms: |
| classes = np.zeros_like(classes) |
|
|
| boxes, scores, classes = soft_nms_classwise( |
| boxes, scores, classes, |
| iou_thr=softnms_iou, |
| method=softnms_method, |
| sigma=softnms_sigma, |
| score_thresh=1e-3, |
| max_det=max_det |
| ) |
|
|
| |
| keep = scores >= final_conf |
| boxes, scores, classes = boxes[keep], scores[keep], classes[keep] |
|
|
| return {"xyxy": boxes, "conf": scores, "cls": classes, "names": names} |
|
|