meaculpitt
/

ScoreVision-Beverage

@@ -1,5 +1,30 @@
-# build-marker: v5-yolo26n-1280-tta
-"""SN44 beverage detection miner — v5 (yolo26n at 1280, alfred-aligned pipeline).
 Changes from v4:
   - Input resolution 640 -> 1280. Stadium-CCTV frames have 30-70 px objects
@@ -15,8 +40,9 @@ Changes from v4:
   - ort.preload_dlls() instead of manual nvidia-* ctypes preload.
   - Single postproc handler (no raw fallback) since our export is e2e [1,300,6].
-Class remap [1,2,0]: model trained as [bottle, can, cup] -> validator [cup, bottle, can].
-TODO v6: retrain in validator class order to drop remap.
 """
 import math
 from pathlib import Path
@@ -53,9 +79,9 @@ class Miner:
         # Validator's positional class order from the SN44 element manifest.
         self.class_names = ["cup", "bottle", "can"]
-        # Our model trained with data.yaml class order [bottle, can, cup].
-        # Remap: model 0 (bottle) -> 1, model 1 (can) -> 2, model 2 (cup) -> 0.
-        self.cls_remap = np.array([1, 2, 0], dtype=np.int32)
         try:
             ort.preload_dlls()
@@ -85,17 +111,22 @@ class Miner:
         self.input_h = 1280
         self.input_w = 1280
-        # alfred-aligned thresholds.
-        self.conf_threshold = 0.55
-        self.iou_thresh = 0.5            # per-class hard NMS
-        self.cross_iou_thresh = 0.7      # cross-class dedup
         self.max_det = 150
         self.use_tta = True
-        # Geometry filters (kept from v4).
-        self.min_box_area = 100      # 10x10 px²
-        self.min_side = 8
-        self.max_aspect_ratio = 8.0
         # GPU warmup.
         warm = np.zeros((self.input_h, self.input_w, 3), dtype=np.uint8)
@@ -107,8 +138,9 @@ class Miner:
     def __repr__(self) -> str:
         return (
-            f"BeverageMiner v5 input={self.input_h}x{self.input_w} "
-            f"classes={len(self.class_names)} use_tta={self.use_tta} "
             f"providers={self.session.get_providers()}"
         )
@@ -270,9 +302,13 @@ class Miner:
         return self._to_boundingboxes(boxes, confs, cls_ids, orig_w, orig_h)
     def _infer_tta(self, image_bgr: ndarray) -> list[BoundingBox]:
-        """H-flip TTA: union(orig, flipped) -> per-class NMS -> conf-boost."""
         boxes_orig = self._infer_single(image_bgr)
         h, w = image_bgr.shape[:2]
         flipped = cv2.flip(image_bgr, 1)
         boxes_flip_raw = self._infer_single(flipped)
@@ -281,30 +317,70 @@ class Miner:
                         cls_id=b.cls_id, conf=b.conf)
             for b in boxes_flip_raw
         ]
-        all_boxes = boxes_orig + boxes_flip
-        if not all_boxes:
             return []
-        coords = np.array([[b.x1, b.y1, b.x2, b.y2] for b in all_boxes], dtype=np.float32)
-        scores = np.array([b.conf for b in all_boxes], dtype=np.float32)
-        cls_ids = np.array([b.cls_id for b in all_boxes], dtype=np.int32)
-        keep_idx = self._per_class_hard_nms(coords, scores, cls_ids, self.iou_thresh)
-        if len(keep_idx) == 0:
             return []
-        keep_idx = keep_idx[: self.max_det]
-        boosted = self._max_score_per_cluster(coords, scores, keep_idx, self.iou_thresh)
-        out_boxes: list[BoundingBox] = []
-        for j, idx in enumerate(keep_idx):
-            b = all_boxes[idx]
-            out_boxes.append(BoundingBox(
-                x1=b.x1, y1=b.y1, x2=b.x2, y2=b.y2,
-                cls_id=b.cls_id,
-                conf=max(0.0, min(1.0, float(boosted[j]))),
-            ))
-        return out_boxes
     def _to_boundingboxes(
         self, boxes: ndarray, confs: ndarray, cls_ids: ndarray,
@@ -322,11 +398,14 @@ class Miner:
             bw, bh = ix2 - ix1, iy2 - iy1
             if bw * bh < self.min_box_area:
                 continue
-            if min(bw, bh) < self.min_side:
                 continue
             ar = max(bw / max(bh, 1), bh / max(bw, 1))
             if ar > self.max_aspect_ratio:
                 continue
             out.append(BoundingBox(
                 x1=ix1, y1=iy1, x2=ix2, y2=iy2,
                 cls_id=int(cls_ids[i]),

+# build-marker: v8-yolo26s-fp16-1280-hermestech-pipeline
+"""SN44 beverage detection miner — v8 (yolo26s FP16 at 1280, hermestech-style pipeline).
+v8 (2026-05-04 ~22:30Z): two simultaneous changes from v7 (emu):
+  1. WEIGHTS: yolo26s @ 1280, FP16 ONNX (~18.7 MB). Trained natively in
+     validator class order [cup, bottle, can] on merged_v8 (~38k images),
+     epoch 40 best (mAP50=0.840 / mAP50-95=0.694). Replaces v7's yolo26n
+     (~10.3 MB FP32). FP16 quantization: <0.001 mAP loss vs FP32 export.
+  2. INFERENCE PIPELINE: ported from hermestech00/person-detect-0 (top-1
+     beverage miner). Aggressive precision-over-recall:
+        - conf_threshold 0.55 → 0.75
+        - iou_thresh 0.5 → 0.07 (very aggressive NMS)
+        - new max_aspect_ratio 5.0 (was 8.0)
+        - new max_box_area_ratio 0.85 (rejects frame-covering FPs)
+        - new min_w/min_h 6/6 (replaces min_side=8)
+        - TTA-consensus: all orig-view boxes accepted directly (conf_high=0.0);
+          flip view used only to BOOST scores at near-perfect IoU match
+          (tta_match_iou=0.99); flip-only boxes added if no orig overlap.
+     Offline mAP DROPS (~13% on this val set) but the manak0 manifest scores
+     0.6×mAP50 + 0.4×false_positive — the precision boost is expected to
+     dominate the FP component. Empirical: hermestech with this exact pipeline
+     is rank-1 (0.67 mean) vs our emu's 0.46 mean (rank 5).
+OLD v7 (kept for context, see miner.py.v7_backup_*):
+  - alfred-aligned: conf=0.55, iou=0.5, TTA=union-then-NMS-then-boost
+  - yolo26n FP32 (~10.3 MB)
+"""
 Changes from v4:
   - Input resolution 640 -> 1280. Stadium-CCTV frames have 30-70 px objects
   - ort.preload_dlls() instead of manual nvidia-* ctypes preload.
   - Single postproc handler (no raw fallback) since our export is e2e [1,300,6].
+v7 model is trained natively in validator class order [cup, bottle, can] on
+merged_v7_aug (38k v1+OI images + 10k CCTV-degraded augmentations, 30% ratio),
+so cls_remap is identity. Compare to v5 emu which used [1,2,0] remap.
 """
 import math
 from pathlib import Path
         # Validator's positional class order from the SN44 element manifest.
         self.class_names = ["cup", "bottle", "can"]
+        # v7: model trained natively in validator class order [cup, bottle, can]
+        # so cls_remap is identity (no remap needed).
+        self.cls_remap = np.arange(3, dtype=np.int32)
         try:
             ort.preload_dlls()
         self.input_h = 1280
         self.input_w = 1280
+        # hermestech-inspired aggressive filtering (top-1 beverage miner pattern).
+        self.conf_threshold = 0.75       # was 0.55 — drop borderline detections
+        self.iou_thresh = 0.07           # was 0.5 — very aggressive NMS
+        self.cross_iou_thresh = 0.7      # cross-class dedup (kept; hermestech omits)
         self.max_det = 150
         self.use_tta = True
+        # TTA-consensus thresholds (port of hermestech _merge_tta_consensus):
+        self.conf_high = 0.0             # ALL orig-view boxes accepted directly
+        self.tta_match_iou = 0.99        # near-perfect IoU required to fuse orig+flip scores
+        # Geometry filters (hermestech-tuned for beverage).
+        self.min_box_area = 144          # was 100 (12x12 vs 10x10)
+        self.min_w = 6                   # NEW
+        self.min_h = 6                   # NEW
+        self.max_aspect_ratio = 5.0      # was 8.0
+        self.max_box_area_ratio = 0.85   # NEW — reject frame-covering false positives
         # GPU warmup.
         warm = np.zeros((self.input_h, self.input_w, 3), dtype=np.uint8)
     def __repr__(self) -> str:
         return (
+            f"BeverageMiner v8-hermestech input={self.input_h}x{self.input_w} "
+            f"conf>={self.conf_threshold} iou={self.iou_thresh} "
+            f"tta_match_iou={self.tta_match_iou} use_tta={self.use_tta} "
             f"providers={self.session.get_providers()}"
         )
         return self._to_boundingboxes(boxes, confs, cls_ids, orig_w, orig_h)
     def _infer_tta(self, image_bgr: ndarray) -> list[BoundingBox]:
+        """Hermestech-style TTA consensus (port from hermestech00/person-detect-0):
+        - all orig-view boxes accepted directly (conf_high=0.0)
+        - flip-view ONLY used to boost orig scores at near-perfect IoU match
+        - flip-only boxes added if no original-view overlap at tta_match_iou
+        - final per-class NMS at iou_thresh (0.07) + geometry filters
+        """
         boxes_orig = self._infer_single(image_bgr)
         h, w = image_bgr.shape[:2]
         flipped = cv2.flip(image_bgr, 1)
         boxes_flip_raw = self._infer_single(flipped)
                         cls_id=b.cls_id, conf=b.conf)
             for b in boxes_flip_raw
         ]
+        if not boxes_orig and not boxes_flip:
+            return []
+        coords_o = np.array([[b.x1, b.y1, b.x2, b.y2] for b in boxes_orig], dtype=np.float32) if boxes_orig else np.empty((0, 4), dtype=np.float32)
+        scores_o = np.array([b.conf   for b in boxes_orig], dtype=np.float32) if boxes_orig else np.empty((0,), dtype=np.float32)
+        cls_o    = np.array([b.cls_id for b in boxes_orig], dtype=np.int32)   if boxes_orig else np.empty((0,), dtype=np.int32)
+        coords_f = np.array([[b.x1, b.y1, b.x2, b.y2] for b in boxes_flip], dtype=np.float32) if boxes_flip else np.empty((0, 4), dtype=np.float32)
+        scores_f = np.array([b.conf   for b in boxes_flip], dtype=np.float32) if boxes_flip else np.empty((0,), dtype=np.float32)
+        cls_f    = np.array([b.cls_id for b in boxes_flip], dtype=np.int32)   if boxes_flip else np.empty((0,), dtype=np.int32)
+        acc_b: list[ndarray] = []
+        acc_s: list[float] = []
+        acc_c: list[int] = []
+        # Original-view loop: accept all >= conf_high directly; below, require flip match
+        for i in range(len(coords_o)):
+            sc = float(scores_o[i])
+            if sc >= self.conf_high:
+                acc_b.append(coords_o[i]); acc_s.append(sc); acc_c.append(int(cls_o[i]))
+            elif len(coords_f) > 0:
+                ious = self._box_iou_one_to_many(coords_o[i], coords_f)
+                j = int(np.argmax(ious))
+                if ious[j] >= self.tta_match_iou:
+                    acc_b.append(coords_o[i])
+                    acc_s.append(max(sc, float(scores_f[j])))
+                    acc_c.append(int(cls_o[i]))
+        # Flipped-view loop: only add high-conf boxes that have NO match in original
+        for i in range(len(coords_f)):
+            sc = float(scores_f[i])
+            if sc < self.conf_high:
+                continue
+            if len(coords_o) == 0:
+                acc_b.append(coords_f[i]); acc_s.append(sc); acc_c.append(int(cls_f[i])); continue
+            ious = self._box_iou_one_to_many(coords_f[i], coords_o)
+            if np.max(ious) < self.tta_match_iou:
+                acc_b.append(coords_f[i]); acc_s.append(sc); acc_c.append(int(cls_f[i]))
+        if not acc_b:
             return []
+        boxes = np.array(acc_b, dtype=np.float32)
+        scores = np.array(acc_s, dtype=np.float32)
+        cls_ids = np.array(acc_c, dtype=np.int32)
+        keep = self._per_class_hard_nms(boxes, scores, cls_ids, self.iou_thresh)
+        if len(keep) == 0:
             return []
+        keep = keep[: self.max_det]
+        # Apply geometry filters (min_w/h, aspect, area-ratio) via _to_boundingboxes
+        return self._to_boundingboxes(boxes[keep], scores[keep], cls_ids[keep], w, h)
+    @staticmethod
+    def _box_iou_one_to_many(box: ndarray, others: ndarray) -> ndarray:
+        """IoU of one box [x1,y1,x2,y2] vs Nx4 array of others. Returns 1-D scores."""
+        if len(others) == 0:
+            return np.array([], dtype=np.float32)
+        x1 = np.maximum(box[0], others[:, 0]); y1 = np.maximum(box[1], others[:, 1])
+        x2 = np.minimum(box[2], others[:, 2]); y2 = np.minimum(box[3], others[:, 3])
+        inter = np.maximum(0.0, x2 - x1) * np.maximum(0.0, y2 - y1)
+        a = (box[2] - box[0]) * (box[3] - box[1])
+        b = (others[:, 2] - others[:, 0]) * (others[:, 3] - others[:, 1])
+        return inter / (a + b - inter + 1e-7)
     def _to_boundingboxes(
         self, boxes: ndarray, confs: ndarray, cls_ids: ndarray,
             bw, bh = ix2 - ix1, iy2 - iy1
             if bw * bh < self.min_box_area:
                 continue
+            if bw < self.min_w or bh < self.min_h:
                 continue
             ar = max(bw / max(bh, 1), bh / max(bw, 1))
             if ar > self.max_aspect_ratio:
                 continue
+            # NEW: reject boxes covering > max_box_area_ratio of frame (FP guard)
+            if (bw * bh) / max(1, orig_w * orig_h) > self.max_box_area_ratio:
+                continue
             out.append(BoundingBox(
                 x1=ix1, y1=iy1, x2=ix2, y2=iy2,
                 cls_id=int(cls_ids[i]),

weights.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:418586a93c7b7664722bc4e9980b4426d68b2f3917db7e1e73037bde914cb3ee
-size 10311067

 version https://git-lfs.github.com/spec/v1
+oid sha256:8e4210b31ad35eb77d865c7cf76891fb4c8e4cb8f24c3f340b51421bfe26fe6e
+size 19637792