ais-api / weld_tiling.py
csmith715's picture
renamed tiling.py
311720b
"""
tiled_yolo_softnms.py
Tiled inference + class-wise Soft-NMS for YOLO (Ultralytics).
- Runs YOLO on overlapping tiles to boost recall on small symbols.
- Maps all tile detections back to full-image coords.
- Fuses duplicates with Soft-NMS per class.
Usage
-----
from ultralytics import YOLO
import cv2
model = YOLO("best.pt") # your YOLO v12/v11/v8 checkpoint
img = cv2.imread("example.jpg")[:, :, ::-1] # BGR->RGB (optional; YOLO accepts BGR too)
out = detect_tiled_softnms(
model, img,
tile_size=1024, overlap=0.25,
per_tile_conf=0.2, per_tile_iou=0.7,
softnms_iou=0.55, softnms_method="linear", softnms_sigma=0.5,
final_conf=0.25, device=None, imgsz=None
)
# Access results
xyxy = out["xyxy"]
conf = out["conf"]
cls = out["cls"]
annot = draw_detections(img.copy(), xyxy, conf, cls, out["names"])
cv2.imwrite("annotated.jpg", annot[:, :, ::-1]) # RGB->BGR for writing
"""
from typing import List, Tuple, Dict, Optional
import numpy as np
import cv2
# ---------------------------
# Utilities
# ---------------------------
def make_overlapping_tiles(H: int, W: int, tile: int, overlap: float) -> List[Tuple[int, int, int, int]]:
"""Return list of (x0, y0, x1, y1) tile boxes covering the image with given overlap."""
assert 0.0 <= overlap < 1.0
stride = max(1, int(tile * (1.0 - overlap)))
xs = list(range(0, max(W - tile, 0) + 1, stride))
ys = list(range(0, max(H - tile, 0) + 1, stride))
if xs[-1] + tile < W:
xs.append(W - tile)
if ys[-1] + tile < H:
ys.append(H - tile)
tiles = []
for y in ys:
for x in xs:
x0, y0 = max(0, x), max(0, y)
x1, y1 = min(W, x0 + tile), min(H, y0 + tile)
tiles.append((x0, y0, x1, y1))
return tiles
def iou_xyxy(a: np.ndarray, b: np.ndarray) -> np.ndarray:
"""IoU between one box a (4,x) and many boxes b (N,4)."""
xx1 = np.maximum(a[0], b[:, 0])
yy1 = np.maximum(a[1], b[:, 1])
xx2 = np.minimum(a[2], b[:, 2])
yy2 = np.minimum(a[3], b[:, 3])
inter = np.maximum(0, xx2 - xx1) * np.maximum(0, yy2 - yy1)
area_a = (a[2]-a[0]) * (a[3]-a[1])
area_b = (b[:, 2]-b[:, 0]) * (b[:, 3]-b[:, 1])
union = np.maximum(1e-9, area_a + area_b - inter)
return inter / union
def soft_nms_classwise(
boxes: np.ndarray, scores: np.ndarray, classes: np.ndarray,
iou_thr: float = 0.55, method: str = "linear", sigma: float = 0.5,
score_thresh: float = 1e-3, max_det: Optional[int] = None
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Soft-NMS per class.
boxes: (N,4), scores:(N, x), classes:(N, x)
Returns filtered (boxes, scores, classes).
"""
keep_boxes, keep_scores, keep_classes = [], [], []
for c in np.unique(classes):
m = classes == c
b = boxes[m].astype(np.float32).copy()
s = scores[m].astype(np.float32).copy()
idxs = np.arange(b.shape[0])
kept = []
while len(idxs):
i = idxs[np.argmax(s[idxs])]
M = b[i].copy()
Ms = s[i].copy()
kept.append(i)
idxs = idxs[idxs != i]
if len(idxs) == 0:
break
ious = iou_xyxy(M, b[idxs])
if method == "linear":
decay = np.where(ious > iou_thr, 1.0 - ious, 1.0)
s[idxs] *= decay
elif method == "gaussian":
s[idxs] *= np.exp(-(ious ** 2) / sigma)
elif method == "hard":
# standard NMS behaviour
idxs = idxs[ious <= iou_thr]
else:
raise ValueError("method must be 'linear', 'gaussian', or 'hard'")
# prune very low scores
idxs = idxs[s[idxs] >= score_thresh]
if kept:
kb, ks = b[kept], s[kept]
order = np.argsort(-ks)
kb, ks = kb[order], ks[order]
kc = np.full(len(ks), c, dtype=classes.dtype)
keep_boxes.append(kb)
keep_scores.append(ks)
keep_classes.append(kc)
if not keep_boxes:
return (np.zeros((0, 4), dtype=np.float32),
np.zeros((0,), dtype=np.float32),
np.zeros((0,), dtype=classes.dtype))
B = np.concatenate(keep_boxes, axis=0)
S = np.concatenate(keep_scores, axis=0)
C = np.concatenate(keep_classes, axis=0)
order = np.argsort(-S)
if max_det is not None:
order = order[:max_det]
return B[order], S[order], C[order]
def draw_detections(img: np.ndarray, boxes: np.ndarray, scores: np.ndarray, classes: np.ndarray, names: Dict[int, str]) -> np.ndarray:
"""Simple visualizer (RGB in, RGB out)."""
for (x1, y1, x2, y2), sc, cl in zip(boxes.astype(int), scores, classes.astype(int)):
label = f"{names.get(cl, str(cl))} {sc:.2f}"
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 180, 255), 2)
(tw, th), bl = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
cv2.rectangle(img, (x1, y1 - th - 6), (x1 + tw + 4, y1), (0, 180, 255), -1)
cv2.putText(img, label, (x1 + 2, y1 - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2, cv2.LINE_AA)
return img
# ---------------------------
# Main tiled inference
# ---------------------------
def detect_tiled_softnms(
model, image: np.ndarray,
tile_size: int = 1024, overlap: float = 0.25,
per_tile_conf: float = 0.25, per_tile_iou: float = 0.7,
softnms_iou: float = 0.55, softnms_method: str = "linear", softnms_sigma: float = 0.5,
final_conf: float = 0.25, max_det: int = 3000,
device: Optional[str] = None, imgsz: Optional[int] = None,
class_agnostic_nms: bool = False
) -> Dict[str, np.ndarray]:
"""
Run YOLO on overlapping tiles, then fuse globally with class-wise Soft-NMS.
Returns dict: {"xyxy","conf","cls","names"}.
"""
assert image.ndim == 3, "image must be HxWx3"
H, W = image.shape[:2]
names = getattr(model, "names", {i: str(i) for i in range(1000)})
tiles = make_overlapping_tiles(H, W, tile=tile_size, overlap=overlap)
all_boxes, all_scores, all_classes = [], [], []
for (x0, y0, x1, y1) in tiles:
tile = image[y0:y1, x0:x1]
# Ultralytics returns boxes in original tile coords (pre-letterbox)
results = model.predict(
source=tile,
conf=per_tile_conf,
iou=per_tile_iou,
imgsz=imgsz, # None -> model default
device=device,
verbose=False
)
if not results:
continue
r = results[0]
if r.boxes is None or r.boxes.shape[0] == 0:
continue
b = r.boxes.xyxy.cpu().numpy()
s = r.boxes.conf.cpu().numpy()
c = r.boxes.cls.cpu().numpy().astype(int)
# Map to full-image coordinates
b[:, [0, 2]] += x0
b[:, [1, 3]] += y0
# Clip
b[:, 0] = np.clip(b[:, 0], 0, W - 1)
b[:, 1] = np.clip(b[:, 1], 0, H - 1)
b[:, 2] = np.clip(b[:, 2], 0, W - 1)
b[:, 3] = np.clip(b[:, 3], 0, H - 1)
# Filter degenerate boxes
valid = (b[:, 2] > b[:, 0]) & (b[:, 3] > b[:, 1])
if not np.any(valid):
continue
all_boxes.append(b[valid])
all_scores.append(s[valid])
all_classes.append(c[valid])
if not all_boxes:
return {"xyxy": np.zeros((0, 4), dtype=np.float32),
"conf": np.zeros((0,), dtype=np.float32),
"cls": np.zeros((0,), dtype=np.int32),
"names": names}
boxes = np.concatenate(all_boxes, axis=0).astype(np.float32)
scores = np.concatenate(all_scores, axis=0).astype(np.float32)
classes = np.concatenate(all_classes, axis=0).astype(np.int32)
# Global fusion: class-wise Soft-NMS or class-agnostic if chosen
if class_agnostic_nms:
classes = np.zeros_like(classes)
boxes, scores, classes = soft_nms_classwise(
boxes, scores, classes,
iou_thr=softnms_iou,
method=softnms_method,
sigma=softnms_sigma,
score_thresh=1e-3,
max_det=max_det
)
# Final confidence gate
keep = scores >= final_conf
boxes, scores, classes = boxes[keep], scores[keep], classes[keep]
return {"xyxy": boxes, "conf": scores, "cls": classes, "names": names}