deploy push for beverage (deploy)
Browse files- miner.py +117 -38
- weights.onnx +2 -2
miner.py
CHANGED
|
@@ -1,5 +1,30 @@
|
|
| 1 |
-
# build-marker:
|
| 2 |
-
"""SN44 beverage detection miner —
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
Changes from v4:
|
| 5 |
- Input resolution 640 -> 1280. Stadium-CCTV frames have 30-70 px objects
|
|
@@ -15,8 +40,9 @@ Changes from v4:
|
|
| 15 |
- ort.preload_dlls() instead of manual nvidia-* ctypes preload.
|
| 16 |
- Single postproc handler (no raw fallback) since our export is e2e [1,300,6].
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
|
|
|
| 20 |
"""
|
| 21 |
import math
|
| 22 |
from pathlib import Path
|
|
@@ -53,9 +79,9 @@ class Miner:
|
|
| 53 |
|
| 54 |
# Validator's positional class order from the SN44 element manifest.
|
| 55 |
self.class_names = ["cup", "bottle", "can"]
|
| 56 |
-
#
|
| 57 |
-
#
|
| 58 |
-
self.cls_remap = np.
|
| 59 |
|
| 60 |
try:
|
| 61 |
ort.preload_dlls()
|
|
@@ -85,17 +111,22 @@ class Miner:
|
|
| 85 |
self.input_h = 1280
|
| 86 |
self.input_w = 1280
|
| 87 |
|
| 88 |
-
#
|
| 89 |
-
self.conf_threshold = 0.55
|
| 90 |
-
self.iou_thresh = 0.
|
| 91 |
-
self.cross_iou_thresh = 0.7 # cross-class dedup
|
| 92 |
self.max_det = 150
|
| 93 |
self.use_tta = True
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
-
# Geometry filters (
|
| 96 |
-
self.min_box_area =
|
| 97 |
-
self.
|
| 98 |
-
self.
|
|
|
|
|
|
|
| 99 |
|
| 100 |
# GPU warmup.
|
| 101 |
warm = np.zeros((self.input_h, self.input_w, 3), dtype=np.uint8)
|
|
@@ -107,8 +138,9 @@ class Miner:
|
|
| 107 |
|
| 108 |
def __repr__(self) -> str:
|
| 109 |
return (
|
| 110 |
-
f"BeverageMiner
|
| 111 |
-
f"
|
|
|
|
| 112 |
f"providers={self.session.get_providers()}"
|
| 113 |
)
|
| 114 |
|
|
@@ -270,9 +302,13 @@ class Miner:
|
|
| 270 |
return self._to_boundingboxes(boxes, confs, cls_ids, orig_w, orig_h)
|
| 271 |
|
| 272 |
def _infer_tta(self, image_bgr: ndarray) -> list[BoundingBox]:
|
| 273 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
boxes_orig = self._infer_single(image_bgr)
|
| 275 |
-
|
| 276 |
h, w = image_bgr.shape[:2]
|
| 277 |
flipped = cv2.flip(image_bgr, 1)
|
| 278 |
boxes_flip_raw = self._infer_single(flipped)
|
|
@@ -281,30 +317,70 @@ class Miner:
|
|
| 281 |
cls_id=b.cls_id, conf=b.conf)
|
| 282 |
for b in boxes_flip_raw
|
| 283 |
]
|
|
|
|
|
|
|
| 284 |
|
| 285 |
-
|
| 286 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
return []
|
| 288 |
|
| 289 |
-
|
| 290 |
-
scores = np.array(
|
| 291 |
-
cls_ids = np.array(
|
| 292 |
|
| 293 |
-
|
| 294 |
-
if len(
|
| 295 |
return []
|
| 296 |
-
|
| 297 |
-
boosted = self._max_score_per_cluster(coords, scores, keep_idx, self.iou_thresh)
|
| 298 |
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
)
|
| 307 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
|
| 309 |
def _to_boundingboxes(
|
| 310 |
self, boxes: ndarray, confs: ndarray, cls_ids: ndarray,
|
|
@@ -322,11 +398,14 @@ class Miner:
|
|
| 322 |
bw, bh = ix2 - ix1, iy2 - iy1
|
| 323 |
if bw * bh < self.min_box_area:
|
| 324 |
continue
|
| 325 |
-
if
|
| 326 |
continue
|
| 327 |
ar = max(bw / max(bh, 1), bh / max(bw, 1))
|
| 328 |
if ar > self.max_aspect_ratio:
|
| 329 |
continue
|
|
|
|
|
|
|
|
|
|
| 330 |
out.append(BoundingBox(
|
| 331 |
x1=ix1, y1=iy1, x2=ix2, y2=iy2,
|
| 332 |
cls_id=int(cls_ids[i]),
|
|
|
|
| 1 |
+
# build-marker: v8-yolo26s-fp16-1280-hermestech-pipeline
|
| 2 |
+
"""SN44 beverage detection miner — v8 (yolo26s FP16 at 1280, hermestech-style pipeline).
|
| 3 |
+
|
| 4 |
+
v8 (2026-05-04 ~22:30Z): two simultaneous changes from v7 (emu):
|
| 5 |
+
1. WEIGHTS: yolo26s @ 1280, FP16 ONNX (~18.7 MB). Trained natively in
|
| 6 |
+
validator class order [cup, bottle, can] on merged_v8 (~38k images),
|
| 7 |
+
epoch 40 best (mAP50=0.840 / mAP50-95=0.694). Replaces v7's yolo26n
|
| 8 |
+
(~10.3 MB FP32). FP16 quantization: <0.001 mAP loss vs FP32 export.
|
| 9 |
+
2. INFERENCE PIPELINE: ported from hermestech00/person-detect-0 (top-1
|
| 10 |
+
beverage miner). Aggressive precision-over-recall:
|
| 11 |
+
- conf_threshold 0.55 → 0.75
|
| 12 |
+
- iou_thresh 0.5 → 0.07 (very aggressive NMS)
|
| 13 |
+
- new max_aspect_ratio 5.0 (was 8.0)
|
| 14 |
+
- new max_box_area_ratio 0.85 (rejects frame-covering FPs)
|
| 15 |
+
- new min_w/min_h 6/6 (replaces min_side=8)
|
| 16 |
+
- TTA-consensus: all orig-view boxes accepted directly (conf_high=0.0);
|
| 17 |
+
flip view used only to BOOST scores at near-perfect IoU match
|
| 18 |
+
(tta_match_iou=0.99); flip-only boxes added if no orig overlap.
|
| 19 |
+
Offline mAP DROPS (~13% on this val set) but the manak0 manifest scores
|
| 20 |
+
0.6×mAP50 + 0.4×false_positive — the precision boost is expected to
|
| 21 |
+
dominate the FP component. Empirical: hermestech with this exact pipeline
|
| 22 |
+
is rank-1 (0.67 mean) vs our emu's 0.46 mean (rank 5).
|
| 23 |
+
|
| 24 |
+
OLD v7 (kept for context, see miner.py.v7_backup_*):
|
| 25 |
+
- alfred-aligned: conf=0.55, iou=0.5, TTA=union-then-NMS-then-boost
|
| 26 |
+
- yolo26n FP32 (~10.3 MB)
|
| 27 |
+
"""
|
| 28 |
|
| 29 |
Changes from v4:
|
| 30 |
- Input resolution 640 -> 1280. Stadium-CCTV frames have 30-70 px objects
|
|
|
|
| 40 |
- ort.preload_dlls() instead of manual nvidia-* ctypes preload.
|
| 41 |
- Single postproc handler (no raw fallback) since our export is e2e [1,300,6].
|
| 42 |
|
| 43 |
+
v7 model is trained natively in validator class order [cup, bottle, can] on
|
| 44 |
+
merged_v7_aug (38k v1+OI images + 10k CCTV-degraded augmentations, 30% ratio),
|
| 45 |
+
so cls_remap is identity. Compare to v5 emu which used [1,2,0] remap.
|
| 46 |
"""
|
| 47 |
import math
|
| 48 |
from pathlib import Path
|
|
|
|
| 79 |
|
| 80 |
# Validator's positional class order from the SN44 element manifest.
|
| 81 |
self.class_names = ["cup", "bottle", "can"]
|
| 82 |
+
# v7: model trained natively in validator class order [cup, bottle, can]
|
| 83 |
+
# so cls_remap is identity (no remap needed).
|
| 84 |
+
self.cls_remap = np.arange(3, dtype=np.int32)
|
| 85 |
|
| 86 |
try:
|
| 87 |
ort.preload_dlls()
|
|
|
|
| 111 |
self.input_h = 1280
|
| 112 |
self.input_w = 1280
|
| 113 |
|
| 114 |
+
# hermestech-inspired aggressive filtering (top-1 beverage miner pattern).
|
| 115 |
+
self.conf_threshold = 0.75 # was 0.55 — drop borderline detections
|
| 116 |
+
self.iou_thresh = 0.07 # was 0.5 — very aggressive NMS
|
| 117 |
+
self.cross_iou_thresh = 0.7 # cross-class dedup (kept; hermestech omits)
|
| 118 |
self.max_det = 150
|
| 119 |
self.use_tta = True
|
| 120 |
+
# TTA-consensus thresholds (port of hermestech _merge_tta_consensus):
|
| 121 |
+
self.conf_high = 0.0 # ALL orig-view boxes accepted directly
|
| 122 |
+
self.tta_match_iou = 0.99 # near-perfect IoU required to fuse orig+flip scores
|
| 123 |
|
| 124 |
+
# Geometry filters (hermestech-tuned for beverage).
|
| 125 |
+
self.min_box_area = 144 # was 100 (12x12 vs 10x10)
|
| 126 |
+
self.min_w = 6 # NEW
|
| 127 |
+
self.min_h = 6 # NEW
|
| 128 |
+
self.max_aspect_ratio = 5.0 # was 8.0
|
| 129 |
+
self.max_box_area_ratio = 0.85 # NEW — reject frame-covering false positives
|
| 130 |
|
| 131 |
# GPU warmup.
|
| 132 |
warm = np.zeros((self.input_h, self.input_w, 3), dtype=np.uint8)
|
|
|
|
| 138 |
|
| 139 |
def __repr__(self) -> str:
|
| 140 |
return (
|
| 141 |
+
f"BeverageMiner v8-hermestech input={self.input_h}x{self.input_w} "
|
| 142 |
+
f"conf>={self.conf_threshold} iou={self.iou_thresh} "
|
| 143 |
+
f"tta_match_iou={self.tta_match_iou} use_tta={self.use_tta} "
|
| 144 |
f"providers={self.session.get_providers()}"
|
| 145 |
)
|
| 146 |
|
|
|
|
| 302 |
return self._to_boundingboxes(boxes, confs, cls_ids, orig_w, orig_h)
|
| 303 |
|
| 304 |
def _infer_tta(self, image_bgr: ndarray) -> list[BoundingBox]:
|
| 305 |
+
"""Hermestech-style TTA consensus (port from hermestech00/person-detect-0):
|
| 306 |
+
- all orig-view boxes accepted directly (conf_high=0.0)
|
| 307 |
+
- flip-view ONLY used to boost orig scores at near-perfect IoU match
|
| 308 |
+
- flip-only boxes added if no original-view overlap at tta_match_iou
|
| 309 |
+
- final per-class NMS at iou_thresh (0.07) + geometry filters
|
| 310 |
+
"""
|
| 311 |
boxes_orig = self._infer_single(image_bgr)
|
|
|
|
| 312 |
h, w = image_bgr.shape[:2]
|
| 313 |
flipped = cv2.flip(image_bgr, 1)
|
| 314 |
boxes_flip_raw = self._infer_single(flipped)
|
|
|
|
| 317 |
cls_id=b.cls_id, conf=b.conf)
|
| 318 |
for b in boxes_flip_raw
|
| 319 |
]
|
| 320 |
+
if not boxes_orig and not boxes_flip:
|
| 321 |
+
return []
|
| 322 |
|
| 323 |
+
coords_o = np.array([[b.x1, b.y1, b.x2, b.y2] for b in boxes_orig], dtype=np.float32) if boxes_orig else np.empty((0, 4), dtype=np.float32)
|
| 324 |
+
scores_o = np.array([b.conf for b in boxes_orig], dtype=np.float32) if boxes_orig else np.empty((0,), dtype=np.float32)
|
| 325 |
+
cls_o = np.array([b.cls_id for b in boxes_orig], dtype=np.int32) if boxes_orig else np.empty((0,), dtype=np.int32)
|
| 326 |
+
coords_f = np.array([[b.x1, b.y1, b.x2, b.y2] for b in boxes_flip], dtype=np.float32) if boxes_flip else np.empty((0, 4), dtype=np.float32)
|
| 327 |
+
scores_f = np.array([b.conf for b in boxes_flip], dtype=np.float32) if boxes_flip else np.empty((0,), dtype=np.float32)
|
| 328 |
+
cls_f = np.array([b.cls_id for b in boxes_flip], dtype=np.int32) if boxes_flip else np.empty((0,), dtype=np.int32)
|
| 329 |
+
|
| 330 |
+
acc_b: list[ndarray] = []
|
| 331 |
+
acc_s: list[float] = []
|
| 332 |
+
acc_c: list[int] = []
|
| 333 |
+
|
| 334 |
+
# Original-view loop: accept all >= conf_high directly; below, require flip match
|
| 335 |
+
for i in range(len(coords_o)):
|
| 336 |
+
sc = float(scores_o[i])
|
| 337 |
+
if sc >= self.conf_high:
|
| 338 |
+
acc_b.append(coords_o[i]); acc_s.append(sc); acc_c.append(int(cls_o[i]))
|
| 339 |
+
elif len(coords_f) > 0:
|
| 340 |
+
ious = self._box_iou_one_to_many(coords_o[i], coords_f)
|
| 341 |
+
j = int(np.argmax(ious))
|
| 342 |
+
if ious[j] >= self.tta_match_iou:
|
| 343 |
+
acc_b.append(coords_o[i])
|
| 344 |
+
acc_s.append(max(sc, float(scores_f[j])))
|
| 345 |
+
acc_c.append(int(cls_o[i]))
|
| 346 |
+
|
| 347 |
+
# Flipped-view loop: only add high-conf boxes that have NO match in original
|
| 348 |
+
for i in range(len(coords_f)):
|
| 349 |
+
sc = float(scores_f[i])
|
| 350 |
+
if sc < self.conf_high:
|
| 351 |
+
continue
|
| 352 |
+
if len(coords_o) == 0:
|
| 353 |
+
acc_b.append(coords_f[i]); acc_s.append(sc); acc_c.append(int(cls_f[i])); continue
|
| 354 |
+
ious = self._box_iou_one_to_many(coords_f[i], coords_o)
|
| 355 |
+
if np.max(ious) < self.tta_match_iou:
|
| 356 |
+
acc_b.append(coords_f[i]); acc_s.append(sc); acc_c.append(int(cls_f[i]))
|
| 357 |
+
|
| 358 |
+
if not acc_b:
|
| 359 |
return []
|
| 360 |
|
| 361 |
+
boxes = np.array(acc_b, dtype=np.float32)
|
| 362 |
+
scores = np.array(acc_s, dtype=np.float32)
|
| 363 |
+
cls_ids = np.array(acc_c, dtype=np.int32)
|
| 364 |
|
| 365 |
+
keep = self._per_class_hard_nms(boxes, scores, cls_ids, self.iou_thresh)
|
| 366 |
+
if len(keep) == 0:
|
| 367 |
return []
|
| 368 |
+
keep = keep[: self.max_det]
|
|
|
|
| 369 |
|
| 370 |
+
# Apply geometry filters (min_w/h, aspect, area-ratio) via _to_boundingboxes
|
| 371 |
+
return self._to_boundingboxes(boxes[keep], scores[keep], cls_ids[keep], w, h)
|
| 372 |
+
|
| 373 |
+
@staticmethod
|
| 374 |
+
def _box_iou_one_to_many(box: ndarray, others: ndarray) -> ndarray:
|
| 375 |
+
"""IoU of one box [x1,y1,x2,y2] vs Nx4 array of others. Returns 1-D scores."""
|
| 376 |
+
if len(others) == 0:
|
| 377 |
+
return np.array([], dtype=np.float32)
|
| 378 |
+
x1 = np.maximum(box[0], others[:, 0]); y1 = np.maximum(box[1], others[:, 1])
|
| 379 |
+
x2 = np.minimum(box[2], others[:, 2]); y2 = np.minimum(box[3], others[:, 3])
|
| 380 |
+
inter = np.maximum(0.0, x2 - x1) * np.maximum(0.0, y2 - y1)
|
| 381 |
+
a = (box[2] - box[0]) * (box[3] - box[1])
|
| 382 |
+
b = (others[:, 2] - others[:, 0]) * (others[:, 3] - others[:, 1])
|
| 383 |
+
return inter / (a + b - inter + 1e-7)
|
| 384 |
|
| 385 |
def _to_boundingboxes(
|
| 386 |
self, boxes: ndarray, confs: ndarray, cls_ids: ndarray,
|
|
|
|
| 398 |
bw, bh = ix2 - ix1, iy2 - iy1
|
| 399 |
if bw * bh < self.min_box_area:
|
| 400 |
continue
|
| 401 |
+
if bw < self.min_w or bh < self.min_h:
|
| 402 |
continue
|
| 403 |
ar = max(bw / max(bh, 1), bh / max(bw, 1))
|
| 404 |
if ar > self.max_aspect_ratio:
|
| 405 |
continue
|
| 406 |
+
# NEW: reject boxes covering > max_box_area_ratio of frame (FP guard)
|
| 407 |
+
if (bw * bh) / max(1, orig_w * orig_h) > self.max_box_area_ratio:
|
| 408 |
+
continue
|
| 409 |
out.append(BoundingBox(
|
| 410 |
x1=ix1, y1=iy1, x2=ix2, y2=iy2,
|
| 411 |
cls_id=int(cls_ids[i]),
|
weights.onnx
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e4210b31ad35eb77d865c7cf76891fb4c8e4cb8f24c3f340b51421bfe26fe6e
|
| 3 |
+
size 19637792
|