meaculpitt commited on
Commit
880e81a
·
verified ·
1 Parent(s): 6a13407

deploy push for beverage (deploy)

Browse files
Files changed (2) hide show
  1. miner.py +117 -38
  2. weights.onnx +2 -2
miner.py CHANGED
@@ -1,5 +1,30 @@
1
- # build-marker: v5-yolo26n-1280-tta
2
- """SN44 beverage detection miner — v5 (yolo26n at 1280, alfred-aligned pipeline).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  Changes from v4:
5
  - Input resolution 640 -> 1280. Stadium-CCTV frames have 30-70 px objects
@@ -15,8 +40,9 @@ Changes from v4:
15
  - ort.preload_dlls() instead of manual nvidia-* ctypes preload.
16
  - Single postproc handler (no raw fallback) since our export is e2e [1,300,6].
17
 
18
- Class remap [1,2,0]: model trained as [bottle, can, cup] -> validator [cup, bottle, can].
19
- TODO v6: retrain in validator class order to drop remap.
 
20
  """
21
  import math
22
  from pathlib import Path
@@ -53,9 +79,9 @@ class Miner:
53
 
54
  # Validator's positional class order from the SN44 element manifest.
55
  self.class_names = ["cup", "bottle", "can"]
56
- # Our model trained with data.yaml class order [bottle, can, cup].
57
- # Remap: model 0 (bottle) -> 1, model 1 (can) -> 2, model 2 (cup) -> 0.
58
- self.cls_remap = np.array([1, 2, 0], dtype=np.int32)
59
 
60
  try:
61
  ort.preload_dlls()
@@ -85,17 +111,22 @@ class Miner:
85
  self.input_h = 1280
86
  self.input_w = 1280
87
 
88
- # alfred-aligned thresholds.
89
- self.conf_threshold = 0.55
90
- self.iou_thresh = 0.5 # per-class hard NMS
91
- self.cross_iou_thresh = 0.7 # cross-class dedup
92
  self.max_det = 150
93
  self.use_tta = True
 
 
 
94
 
95
- # Geometry filters (kept from v4).
96
- self.min_box_area = 100 # 10x10 px²
97
- self.min_side = 8
98
- self.max_aspect_ratio = 8.0
 
 
99
 
100
  # GPU warmup.
101
  warm = np.zeros((self.input_h, self.input_w, 3), dtype=np.uint8)
@@ -107,8 +138,9 @@ class Miner:
107
 
108
  def __repr__(self) -> str:
109
  return (
110
- f"BeverageMiner v5 input={self.input_h}x{self.input_w} "
111
- f"classes={len(self.class_names)} use_tta={self.use_tta} "
 
112
  f"providers={self.session.get_providers()}"
113
  )
114
 
@@ -270,9 +302,13 @@ class Miner:
270
  return self._to_boundingboxes(boxes, confs, cls_ids, orig_w, orig_h)
271
 
272
  def _infer_tta(self, image_bgr: ndarray) -> list[BoundingBox]:
273
- """H-flip TTA: union(orig, flipped) -> per-class NMS -> conf-boost."""
 
 
 
 
 
274
  boxes_orig = self._infer_single(image_bgr)
275
-
276
  h, w = image_bgr.shape[:2]
277
  flipped = cv2.flip(image_bgr, 1)
278
  boxes_flip_raw = self._infer_single(flipped)
@@ -281,30 +317,70 @@ class Miner:
281
  cls_id=b.cls_id, conf=b.conf)
282
  for b in boxes_flip_raw
283
  ]
 
 
284
 
285
- all_boxes = boxes_orig + boxes_flip
286
- if not all_boxes:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  return []
288
 
289
- coords = np.array([[b.x1, b.y1, b.x2, b.y2] for b in all_boxes], dtype=np.float32)
290
- scores = np.array([b.conf for b in all_boxes], dtype=np.float32)
291
- cls_ids = np.array([b.cls_id for b in all_boxes], dtype=np.int32)
292
 
293
- keep_idx = self._per_class_hard_nms(coords, scores, cls_ids, self.iou_thresh)
294
- if len(keep_idx) == 0:
295
  return []
296
- keep_idx = keep_idx[: self.max_det]
297
- boosted = self._max_score_per_cluster(coords, scores, keep_idx, self.iou_thresh)
298
 
299
- out_boxes: list[BoundingBox] = []
300
- for j, idx in enumerate(keep_idx):
301
- b = all_boxes[idx]
302
- out_boxes.append(BoundingBox(
303
- x1=b.x1, y1=b.y1, x2=b.x2, y2=b.y2,
304
- cls_id=b.cls_id,
305
- conf=max(0.0, min(1.0, float(boosted[j]))),
306
- ))
307
- return out_boxes
 
 
 
 
 
308
 
309
  def _to_boundingboxes(
310
  self, boxes: ndarray, confs: ndarray, cls_ids: ndarray,
@@ -322,11 +398,14 @@ class Miner:
322
  bw, bh = ix2 - ix1, iy2 - iy1
323
  if bw * bh < self.min_box_area:
324
  continue
325
- if min(bw, bh) < self.min_side:
326
  continue
327
  ar = max(bw / max(bh, 1), bh / max(bw, 1))
328
  if ar > self.max_aspect_ratio:
329
  continue
 
 
 
330
  out.append(BoundingBox(
331
  x1=ix1, y1=iy1, x2=ix2, y2=iy2,
332
  cls_id=int(cls_ids[i]),
 
1
+ # build-marker: v8-yolo26s-fp16-1280-hermestech-pipeline
2
+ """SN44 beverage detection miner — v8 (yolo26s FP16 at 1280, hermestech-style pipeline).
3
+
4
+ v8 (2026-05-04 ~22:30Z): two simultaneous changes from v7 (emu):
5
+ 1. WEIGHTS: yolo26s @ 1280, FP16 ONNX (~18.7 MB). Trained natively in
6
+ validator class order [cup, bottle, can] on merged_v8 (~38k images),
7
+ epoch 40 best (mAP50=0.840 / mAP50-95=0.694). Replaces v7's yolo26n
8
+ (~10.3 MB FP32). FP16 quantization: <0.001 mAP loss vs FP32 export.
9
+ 2. INFERENCE PIPELINE: ported from hermestech00/person-detect-0 (top-1
10
+ beverage miner). Aggressive precision-over-recall:
11
+ - conf_threshold 0.55 → 0.75
12
+ - iou_thresh 0.5 → 0.07 (very aggressive NMS)
13
+ - new max_aspect_ratio 5.0 (was 8.0)
14
+ - new max_box_area_ratio 0.85 (rejects frame-covering FPs)
15
+ - new min_w/min_h 6/6 (replaces min_side=8)
16
+ - TTA-consensus: all orig-view boxes accepted directly (conf_high=0.0);
17
+ flip view used only to BOOST scores at near-perfect IoU match
18
+ (tta_match_iou=0.99); flip-only boxes added if no orig overlap.
19
+ Offline mAP DROPS (~13% on this val set) but the manak0 manifest scores
20
+ 0.6×mAP50 + 0.4×false_positive — the precision boost is expected to
21
+ dominate the FP component. Empirical: hermestech with this exact pipeline
22
+ is rank-1 (0.67 mean) vs our emu's 0.46 mean (rank 5).
23
+
24
+ OLD v7 (kept for context, see miner.py.v7_backup_*):
25
+ - alfred-aligned: conf=0.55, iou=0.5, TTA=union-then-NMS-then-boost
26
+ - yolo26n FP32 (~10.3 MB)
27
+ """
28
 
29
  Changes from v4:
30
  - Input resolution 640 -> 1280. Stadium-CCTV frames have 30-70 px objects
 
40
  - ort.preload_dlls() instead of manual nvidia-* ctypes preload.
41
  - Single postproc handler (no raw fallback) since our export is e2e [1,300,6].
42
 
43
+ v7 model is trained natively in validator class order [cup, bottle, can] on
44
+ merged_v7_aug (38k v1+OI images + 10k CCTV-degraded augmentations, 30% ratio),
45
+ so cls_remap is identity. Compare to v5 emu which used [1,2,0] remap.
46
  """
47
  import math
48
  from pathlib import Path
 
79
 
80
  # Validator's positional class order from the SN44 element manifest.
81
  self.class_names = ["cup", "bottle", "can"]
82
+ # v7: model trained natively in validator class order [cup, bottle, can]
83
+ # so cls_remap is identity (no remap needed).
84
+ self.cls_remap = np.arange(3, dtype=np.int32)
85
 
86
  try:
87
  ort.preload_dlls()
 
111
  self.input_h = 1280
112
  self.input_w = 1280
113
 
114
+ # hermestech-inspired aggressive filtering (top-1 beverage miner pattern).
115
+ self.conf_threshold = 0.75 # was 0.55 — drop borderline detections
116
+ self.iou_thresh = 0.07 # was 0.5 — very aggressive NMS
117
+ self.cross_iou_thresh = 0.7 # cross-class dedup (kept; hermestech omits)
118
  self.max_det = 150
119
  self.use_tta = True
120
+ # TTA-consensus thresholds (port of hermestech _merge_tta_consensus):
121
+ self.conf_high = 0.0 # ALL orig-view boxes accepted directly
122
+ self.tta_match_iou = 0.99 # near-perfect IoU required to fuse orig+flip scores
123
 
124
+ # Geometry filters (hermestech-tuned for beverage).
125
+ self.min_box_area = 144 # was 100 (12x12 vs 10x10)
126
+ self.min_w = 6 # NEW
127
+ self.min_h = 6 # NEW
128
+ self.max_aspect_ratio = 5.0 # was 8.0
129
+ self.max_box_area_ratio = 0.85 # NEW — reject frame-covering false positives
130
 
131
  # GPU warmup.
132
  warm = np.zeros((self.input_h, self.input_w, 3), dtype=np.uint8)
 
138
 
139
  def __repr__(self) -> str:
140
  return (
141
+ f"BeverageMiner v8-hermestech input={self.input_h}x{self.input_w} "
142
+ f"conf>={self.conf_threshold} iou={self.iou_thresh} "
143
+ f"tta_match_iou={self.tta_match_iou} use_tta={self.use_tta} "
144
  f"providers={self.session.get_providers()}"
145
  )
146
 
 
302
  return self._to_boundingboxes(boxes, confs, cls_ids, orig_w, orig_h)
303
 
304
  def _infer_tta(self, image_bgr: ndarray) -> list[BoundingBox]:
305
+ """Hermestech-style TTA consensus (port from hermestech00/person-detect-0):
306
+ - all orig-view boxes accepted directly (conf_high=0.0)
307
+ - flip-view ONLY used to boost orig scores at near-perfect IoU match
308
+ - flip-only boxes added if no original-view overlap at tta_match_iou
309
+ - final per-class NMS at iou_thresh (0.07) + geometry filters
310
+ """
311
  boxes_orig = self._infer_single(image_bgr)
 
312
  h, w = image_bgr.shape[:2]
313
  flipped = cv2.flip(image_bgr, 1)
314
  boxes_flip_raw = self._infer_single(flipped)
 
317
  cls_id=b.cls_id, conf=b.conf)
318
  for b in boxes_flip_raw
319
  ]
320
+ if not boxes_orig and not boxes_flip:
321
+ return []
322
 
323
+ coords_o = np.array([[b.x1, b.y1, b.x2, b.y2] for b in boxes_orig], dtype=np.float32) if boxes_orig else np.empty((0, 4), dtype=np.float32)
324
+ scores_o = np.array([b.conf for b in boxes_orig], dtype=np.float32) if boxes_orig else np.empty((0,), dtype=np.float32)
325
+ cls_o = np.array([b.cls_id for b in boxes_orig], dtype=np.int32) if boxes_orig else np.empty((0,), dtype=np.int32)
326
+ coords_f = np.array([[b.x1, b.y1, b.x2, b.y2] for b in boxes_flip], dtype=np.float32) if boxes_flip else np.empty((0, 4), dtype=np.float32)
327
+ scores_f = np.array([b.conf for b in boxes_flip], dtype=np.float32) if boxes_flip else np.empty((0,), dtype=np.float32)
328
+ cls_f = np.array([b.cls_id for b in boxes_flip], dtype=np.int32) if boxes_flip else np.empty((0,), dtype=np.int32)
329
+
330
+ acc_b: list[ndarray] = []
331
+ acc_s: list[float] = []
332
+ acc_c: list[int] = []
333
+
334
+ # Original-view loop: accept all >= conf_high directly; below, require flip match
335
+ for i in range(len(coords_o)):
336
+ sc = float(scores_o[i])
337
+ if sc >= self.conf_high:
338
+ acc_b.append(coords_o[i]); acc_s.append(sc); acc_c.append(int(cls_o[i]))
339
+ elif len(coords_f) > 0:
340
+ ious = self._box_iou_one_to_many(coords_o[i], coords_f)
341
+ j = int(np.argmax(ious))
342
+ if ious[j] >= self.tta_match_iou:
343
+ acc_b.append(coords_o[i])
344
+ acc_s.append(max(sc, float(scores_f[j])))
345
+ acc_c.append(int(cls_o[i]))
346
+
347
+ # Flipped-view loop: only add high-conf boxes that have NO match in original
348
+ for i in range(len(coords_f)):
349
+ sc = float(scores_f[i])
350
+ if sc < self.conf_high:
351
+ continue
352
+ if len(coords_o) == 0:
353
+ acc_b.append(coords_f[i]); acc_s.append(sc); acc_c.append(int(cls_f[i])); continue
354
+ ious = self._box_iou_one_to_many(coords_f[i], coords_o)
355
+ if np.max(ious) < self.tta_match_iou:
356
+ acc_b.append(coords_f[i]); acc_s.append(sc); acc_c.append(int(cls_f[i]))
357
+
358
+ if not acc_b:
359
  return []
360
 
361
+ boxes = np.array(acc_b, dtype=np.float32)
362
+ scores = np.array(acc_s, dtype=np.float32)
363
+ cls_ids = np.array(acc_c, dtype=np.int32)
364
 
365
+ keep = self._per_class_hard_nms(boxes, scores, cls_ids, self.iou_thresh)
366
+ if len(keep) == 0:
367
  return []
368
+ keep = keep[: self.max_det]
 
369
 
370
+ # Apply geometry filters (min_w/h, aspect, area-ratio) via _to_boundingboxes
371
+ return self._to_boundingboxes(boxes[keep], scores[keep], cls_ids[keep], w, h)
372
+
373
+ @staticmethod
374
+ def _box_iou_one_to_many(box: ndarray, others: ndarray) -> ndarray:
375
+ """IoU of one box [x1,y1,x2,y2] vs Nx4 array of others. Returns 1-D scores."""
376
+ if len(others) == 0:
377
+ return np.array([], dtype=np.float32)
378
+ x1 = np.maximum(box[0], others[:, 0]); y1 = np.maximum(box[1], others[:, 1])
379
+ x2 = np.minimum(box[2], others[:, 2]); y2 = np.minimum(box[3], others[:, 3])
380
+ inter = np.maximum(0.0, x2 - x1) * np.maximum(0.0, y2 - y1)
381
+ a = (box[2] - box[0]) * (box[3] - box[1])
382
+ b = (others[:, 2] - others[:, 0]) * (others[:, 3] - others[:, 1])
383
+ return inter / (a + b - inter + 1e-7)
384
 
385
  def _to_boundingboxes(
386
  self, boxes: ndarray, confs: ndarray, cls_ids: ndarray,
 
398
  bw, bh = ix2 - ix1, iy2 - iy1
399
  if bw * bh < self.min_box_area:
400
  continue
401
+ if bw < self.min_w or bh < self.min_h:
402
  continue
403
  ar = max(bw / max(bh, 1), bh / max(bw, 1))
404
  if ar > self.max_aspect_ratio:
405
  continue
406
+ # NEW: reject boxes covering > max_box_area_ratio of frame (FP guard)
407
+ if (bw * bh) / max(1, orig_w * orig_h) > self.max_box_area_ratio:
408
+ continue
409
  out.append(BoundingBox(
410
  x1=ix1, y1=iy1, x2=ix2, y2=iy2,
411
  cls_id=int(cls_ids[i]),
weights.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:418586a93c7b7664722bc4e9980b4426d68b2f3917db7e1e73037bde914cb3ee
3
- size 10311067
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e4210b31ad35eb77d865c7cf76891fb4c8e4cb8f24c3f340b51421bfe26fe6e
3
+ size 19637792