baxtos
/

ScoreVision

ONNX

Model card Files Files and versions

xet

Community

baxtos commited on 5 days ago

Commit

652de85

verified ·

1 Parent(s): 56286d1

scorevision: push artifact

Browse files

Files changed (1) hide show

miner.py +246 -62

miner.py CHANGED Viewed

@@ -1,13 +1,16 @@
-"""Open-source Detect-beverage miner (manak0/Detect-beverage-detect).
-ONNX + onnxruntime (no torch/ultralytics at inference -> light repo,
-deterministic; spot-check re-runs this same code+weights). Trained
-yolo11n with class order [cup, bottle, can] == manifest `objects`, so
-cls_id maps directly (0=cup,1=bottle,2=can). Letterbox 1280 (manifest
-preproc resize_long), flip-TTA, per-class conf, global NMS.
-Contract (turbovision example_miner): class `Miner` at HF repo root;
-`predict_batch(batch_images, offset, n_keypoints) -> list[TVFrameResult]`.
 """
 from __future__ import annotations
@@ -39,12 +42,24 @@ class TVFrameResult(BaseModel):
 class Miner:
     weights_file = "best.onnx"
     input_size = 1280
-    num_classes = 3                       # cup, bottle, can
-    # per-class confidence (tuned on held-out; cup scarcer -> lower gate)
-    conf_thres = np.array([0.25, 0.35, 0.35], dtype=np.float32)
-    iou_thres = 0.55
     max_det = 100
-    min_box_area = 36.0
     use_flip_tta = True
     def __init__(self, path_hf_repo: Path) -> None:
@@ -59,34 +74,31 @@ class Miner:
             sess_options=so,
         )
         self.inp = self.sess.get_inputs()[0].name
-        # ONNX может быть экспортирован в fp16 (для лимита репо ≤30MB) —
-        # кастим вход в тот же dtype, иначе INVALID_ARGUMENT на sess.run.
-        _ort_type = self.sess.get_inputs()[0].type  # e.g. "tensor(float16)"
         self.np_dtype = np.float16 if "float16" in _ort_type else np.float32
         active = self.sess.get_providers()[0]
-        print(f"✅ ONNX beverage model loaded (provider={active}, dtype={self.np_dtype.__name__})")
-        # Eager CUDA EP allocation: ORT lazily binds CUDA on first sess.run,
-        # so without this the validator's first /predict eats the cold-bind
-        # cost (30-300s in TEE-VM) and the scheduler reaps the instance
-        # before activation. Run a no-op inference here so on_startup only
-        # returns once GPU kernels/buffers are hot.
         try:
-            _dummy = np.zeros((self.input_size, self.input_size, 3), dtype=np.uint8)
-            _ = self._infer(_dummy)
-            print(f"✅ ONNX warmup pass completed (provider={active})")
         except Exception as e:
-            print(f"⚠️ ONNX warmup pass failed (not fatal): {e}")
     def __repr__(self) -> str:
-        return f"BeverageONNX(in={self.input_size}, cls={self.num_classes})"
-    # ---- preprocessing ---------------------------------------------------
-    def _letterbox(self, im: ndarray):
         h0, w0 = im.shape[:2]
         s = min(self.input_size / h0, self.input_size / w0)
         nh, nw = int(round(h0 * s)), int(round(w0 * s))
-        r = cv2.resize(im, (nw, nh))
         out = np.full((self.input_size, self.input_size, 3), 114, np.uint8)
         out[:nh, :nw] = r
         return out, s
@@ -95,54 +107,226 @@ class Miner:
         lb, s = self._letterbox(im_bgr)
         x = (lb[:, :, ::-1].transpose(2, 0, 1)[None].astype(np.float32) / 255.0
              ).astype(self.np_dtype)
-        out = self.sess.run(None, {self.inp: x})[0][0]      # (4+nc, N)
-        # ONNX fp16 → numpy float16 в out; для последующего NMS на CPU
-        # удобнее float32, кастим обратно
         out = np.asarray(out, dtype=np.float32)
-        p = out.T if out.shape[0] < out.shape[1] else out    # (N, 4+nc)
         boxes = p[:, :4].copy()
         scores = p[:, 4:4 + self.num_classes]
-        # xywh(center) -> xyxy in original image coords
         xy = boxes[:, :2]
         wh = boxes[:, 2:4]
         x1y1 = (xy - wh / 2) / s
         x2y2 = (xy + wh / 2) / s
-        return np.concatenate([x1y1, x2y2, scores], axis=1)   # (N,4+nc)
     def _detect(self, im_bgr: ndarray) -> list[BoundingBox]:
         det = self._infer(im_bgr)
         if self.use_flip_tta:
             fl = self._infer(im_bgr[:, ::-1])
             W = im_bgr.shape[1]
-            x1 = W - fl[:, 2]
-            x2 = W - fl[:, 0]
-            fl[:, 0], fl[:, 2] = x1, x2
             det = np.concatenate([det, fl], axis=0)
-        cls = det[:, 4:].argmax(1)
-        conf = det[:, 4:].max(1)
-        keep = conf >= self.conf_thres[cls]
-        det, cls, conf = det[keep], cls[keep], conf[keep]
-        out: list[BoundingBox] = []
-        for c in range(self.num_classes):
             m = cls == c
-            if not m.any():
                 continue
-            b = det[m, :4]
-            sc = conf[m]
-            idx = cv2.dnn.NMSBoxes(
-                bboxes=[[float(x1), float(y1), float(x2 - x1),
-                         float(y2 - y1)] for x1, y1, x2, y2 in b],
-                scores=sc.tolist(), score_threshold=0.0,
-                nms_threshold=self.iou_thres,
-            )
-            for i in np.array(idx).flatten()[: self.max_det]:
-                x1, y1, x2, y2 = b[i]
-                if (x2 - x1) * (y2 - y1) < self.min_box_area:
-                    continue
-                out.append(BoundingBox(
-                    x1=int(x1), y1=int(y1), x2=int(x2), y2=int(y2),
-                    cls_id=int(c), conf=float(sc[i])))
         return out
     def predict_batch(
@@ -156,7 +340,7 @@ class Miner:
             try:
                 boxes = self._detect(np.ascontiguousarray(img))
             except Exception as e:                # never crash the chute
-                print(f"⚠️ frame {offset + i} detect error: {e}")
                 boxes = []
             results.append(TVFrameResult(
                 frame_id=offset + i, boxes=boxes,

+"""Open-source Detect-beverage miner v9 (post-proc upgrade, weights unchanged).
+Same ONNX weights as v8 (yolo11s fp16, mAP50 0.835 on holdout). Post-proc
+synthesised from the three strongest current peers:
+- per-class conf + can-rescue bonus  (navierstocks/drink @98280af6)
+- sane-box geometric filter           (drink + yevheniiapopova)
+- containment dedup same-class        (yevheniiapopova @f3becc13)
+- cross-class dedup high-IoU          (drink)
+- INTER_CUBIC on upsample letterbox   (drink + tensorminer)
+- TTA flip + cluster-boost conf       (drink)
+Contract: class `Miner` at HF root, `predict_batch(...) -> list[TVFrameResult]`.
 """
 from __future__ import annotations
 class Miner:
     weights_file = "best.onnx"
     input_size = 1280
+    num_classes = 3                          # cup, bottle, can
+    # per-class conf (swept on validator-pseudo holdout 73 imgs against v10 weights,
+    # peak UI 79.28%): cup/bottle moderate (model is more accurate now), can softer + rescue.
+    conf_thres = np.array([0.55, 0.55, 0.45], dtype=np.float32)
+    # per-class rescue bonus: if no boxes of class c pass conf, admit its top-1
+    # candidate when conf >= conf_thres[c] - bonus[c]. Only `can` (was 7/12 of
+    # our misses on common challenges with lead).
+    rescue_bonus = np.array([0.0, 0.0, 0.20], dtype=np.float32)
+    iou_thres = 0.40                         # per-class NMS (was 0.55)
+    cross_iou_thres = 0.70                   # cross-class dedup
+    containment_thres = 1.00                 # OFF for v10 (better recall without)
+    min_box_area = 100.0                     # was 36 (5 of 20 our FPs <400px²)
+    min_side = 8.0
+    max_aspect_ratio = 10.0
     max_det = 100
     use_flip_tta = True
     def __init__(self, path_hf_repo: Path) -> None:
             sess_options=so,
         )
         self.inp = self.sess.get_inputs()[0].name
+        _ort_type = self.sess.get_inputs()[0].type   # "tensor(float16)" or fp32
         self.np_dtype = np.float16 if "float16" in _ort_type else np.float32
         active = self.sess.get_providers()[0]
+        print(f"✅ v9 ONNX beverage model loaded (provider={active}, dtype={self.np_dtype.__name__})")
+        # Eager CUDA EP allocation — same trick as v8: ORT lazily binds CUDA on
+        # first sess.run, TEE cold-bind eats 30-300s otherwise.
         try:
+            dummy = np.zeros((self.input_size, self.input_size, 3), dtype=np.uint8)
+            _ = self._infer(dummy)
+            print(f"✅ v9 ONNX warmup pass completed (provider={active})")
         except Exception as e:
+            print(f"⚠️ v9 ONNX warmup pass failed (not fatal): {e}")
     def __repr__(self) -> str:
+        return f"BeverageONNXv9(in={self.input_size}, cls={self.num_classes})"
+    # ---- preprocessing --------------------------------------------------
+    def _letterbox(self, im: ndarray) -> tuple[ndarray, float]:
         h0, w0 = im.shape[:2]
         s = min(self.input_size / h0, self.input_size / w0)
         nh, nw = int(round(h0 * s)), int(round(w0 * s))
+        # INTER_CUBIC if upsampling, INTER_LINEAR if downsampling (peer trick)
+        interp = cv2.INTER_CUBIC if s > 1.0 else cv2.INTER_LINEAR
+        r = cv2.resize(im, (nw, nh), interpolation=interp)
         out = np.full((self.input_size, self.input_size, 3), 114, np.uint8)
         out[:nh, :nw] = r
         return out, s
         lb, s = self._letterbox(im_bgr)
         x = (lb[:, :, ::-1].transpose(2, 0, 1)[None].astype(np.float32) / 255.0
              ).astype(self.np_dtype)
+        out = self.sess.run(None, {self.inp: x})[0][0]   # (4+nc, N) or (N, 4+nc)
         out = np.asarray(out, dtype=np.float32)
+        p = out.T if out.shape[0] < out.shape[1] else out  # → (N, 4+nc)
         boxes = p[:, :4].copy()
         scores = p[:, 4:4 + self.num_classes]
+        # xywh(center) → xyxy in original image coords
         xy = boxes[:, :2]
         wh = boxes[:, 2:4]
         x1y1 = (xy - wh / 2) / s
         x2y2 = (xy + wh / 2) / s
+        return np.concatenate([x1y1, x2y2, scores], axis=1)   # (N, 4+nc)
+    # ---- post-processing primitives -------------------------------------
+    @staticmethod
+    def _hard_nms(boxes: np.ndarray, scores: np.ndarray, iou_thresh: float) -> np.ndarray:
+        if len(boxes) == 0:
+            return np.array([], dtype=np.intp)
+        order = np.argsort(-scores)
+        keep: list[int] = []
+        while len(order):
+            i = int(order[0])
+            keep.append(i)
+            if len(order) == 1:
+                break
+            rest = order[1:]
+            xx1 = np.maximum(boxes[i, 0], boxes[rest, 0])
+            yy1 = np.maximum(boxes[i, 1], boxes[rest, 1])
+            xx2 = np.minimum(boxes[i, 2], boxes[rest, 2])
+            yy2 = np.minimum(boxes[i, 3], boxes[rest, 3])
+            inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
+            ai = (boxes[i, 2] - boxes[i, 0]) * (boxes[i, 3] - boxes[i, 1])
+            ar = (boxes[rest, 2] - boxes[rest, 0]) * (boxes[rest, 3] - boxes[rest, 1])
+            iou = inter / (ai + ar - inter + 1e-7)
+            order = rest[iou <= iou_thresh]
+        return np.array(keep, dtype=np.intp)
+    def _sane_filter(self, boxes: np.ndarray, scores: np.ndarray, cls: np.ndarray,
+                     orig_h: int, orig_w: int) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+        if len(boxes) == 0:
+            return boxes, scores, cls
+        bw = np.maximum(0.0, boxes[:, 2] - boxes[:, 0])
+        bh = np.maximum(0.0, boxes[:, 3] - boxes[:, 1])
+        area = bw * bh
+        ar = np.where(
+            (bw > 0) & (bh > 0),
+            np.maximum(bw / np.maximum(bh, 1e-6), bh / np.maximum(bw, 1e-6)),
+            np.inf,
+        )
+        keep = (
+            (bw >= self.min_side) & (bh >= self.min_side)
+            & (area >= self.min_box_area)
+            & (area <= 0.95 * orig_h * orig_w)
+            & (ar <= self.max_aspect_ratio)
+        )
+        return boxes[keep], scores[keep], cls[keep]
+    def _conf_filter_with_rescue(self, scores: np.ndarray, cls: np.ndarray) -> np.ndarray:
+        if len(scores) == 0:
+            return np.zeros(0, dtype=bool)
+        keep = scores >= self.conf_thres[cls]
+        # per-class rescue: if class c has zero passes, admit top-1 candidate
+        # whose conf >= conf_thres[c] - rescue_bonus[c]
+        for c in np.unique(cls):
+            b = float(self.rescue_bonus[c])
+            if b <= 0.0:
+                continue
+            cm = cls == c
+            if keep[cm].any():
+                continue
+            idx = np.where(cm)[0]
+            top = int(idx[int(np.argmax(scores[idx]))])
+            if scores[top] >= self.conf_thres[c] - b:
+                keep[top] = True
+        return keep
+    def _cross_class_dedup(self, boxes: np.ndarray, scores: np.ndarray, cls: np.ndarray,
+                           ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """Drop dup boxes between classes (one object getting two cls labels).
+        Lexsort by larger margin-over-threshold first, then larger area."""
+        n = len(boxes)
+        if n <= 1:
+            return boxes, scores, cls
+        areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+        margins = scores - self.conf_thres[cls]
+        order = np.lexsort((-areas, -margins))
+        suppressed = np.zeros(n, dtype=bool)
+        keep: list[int] = []
+        for i in order:
+            if suppressed[i]:
+                continue
+            keep.append(int(i))
+            bi = boxes[i]
+            xx1 = np.maximum(bi[0], boxes[:, 0])
+            yy1 = np.maximum(bi[1], boxes[:, 1])
+            xx2 = np.minimum(bi[2], boxes[:, 2])
+            yy2 = np.minimum(bi[3], boxes[:, 3])
+            inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
+            ai = max(1e-7, float((bi[2] - bi[0]) * (bi[3] - bi[1])))
+            iou = inter / (ai + areas - inter + 1e-7)
+            dup = iou > self.cross_iou_thres
+            dup[i] = False
+            suppressed |= dup
+        idx = np.array(keep, dtype=np.intp)
+        return boxes[idx], scores[idx], cls[idx]
+    def _containment_dedup(self, boxes: np.ndarray, scores: np.ndarray, cls: np.ndarray,
+                           ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """Drop a box if ≥ containment_thres of its area is inside a same-class
+        box that is larger (or equal-size with higher conf). Catches the
+        bottle-inside-bottle / cup-inside-cup pattern YOLO often produces."""
+        n = len(boxes)
+        if n <= 1:
+            return boxes, scores, cls
+        area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+        iw = np.maximum(0.0, np.minimum(boxes[:, 2:3], boxes[None, :, 2])
+                        - np.maximum(boxes[:, 0:1], boxes[None, :, 0]))
+        ih = np.maximum(0.0, np.minimum(boxes[:, 3:4], boxes[None, :, 3])
+                        - np.maximum(boxes[:, 1:2], boxes[None, :, 1]))
+        inter = iw * ih
+        contain = inter / np.maximum(area[:, None], 1e-9)   # frac of i contained in j
+        same_class = cls[:, None] == cls[None, :]
+        bigger = area[None, :] > area[:, None]
+        tiebreak = (area[None, :] == area[:, None]) & (scores[None, :] > scores[:, None])
+        dominator = same_class & (bigger | tiebreak)
+        np.fill_diagonal(dominator, False)
+        suppressed = ((contain >= self.containment_thres) & dominator).any(axis=1)
+        keep = np.where(~suppressed)[0]
+        return boxes[keep], scores[keep], cls[keep]
+    def _cluster_boost(self, kept_boxes: np.ndarray, kept_cls: np.ndarray,
+                       all_boxes: np.ndarray, all_scores: np.ndarray, all_cls: np.ndarray,
+                       ) -> np.ndarray:
+        """For each kept box, return max conf among same-class boxes overlapping
+        with IoU≥iou_thres (incl. itself). TTA confidence aggregation."""
+        n = len(kept_boxes)
+        if n == 0:
+            return np.empty(0, dtype=np.float32)
+        all_areas = (np.maximum(0.0, all_boxes[:, 2] - all_boxes[:, 0])
+                     * np.maximum(0.0, all_boxes[:, 3] - all_boxes[:, 1]))
+        out = np.empty(n, dtype=np.float32)
+        for i in range(n):
+            bi = kept_boxes[i]
+            xx1 = np.maximum(bi[0], all_boxes[:, 0])
+            yy1 = np.maximum(bi[1], all_boxes[:, 1])
+            xx2 = np.minimum(bi[2], all_boxes[:, 2])
+            yy2 = np.minimum(bi[3], all_boxes[:, 3])
+            inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
+            ai = max(0.0, float((bi[2] - bi[0]) * (bi[3] - bi[1])))
+            iou = inter / (ai + all_areas - inter + 1e-7)
+            cluster = (iou >= self.iou_thres) & (all_cls == kept_cls[i])
+            out[i] = float(np.max(all_scores[cluster])) if np.any(cluster) else 0.0
+        return out
+    # ---- top-level detect with TTA --------------------------------------
     def _detect(self, im_bgr: ndarray) -> list[BoundingBox]:
+        orig_h, orig_w = im_bgr.shape[:2]
+        # 1. Inference + optional flip TTA
         det = self._infer(im_bgr)
         if self.use_flip_tta:
             fl = self._infer(im_bgr[:, ::-1])
             W = im_bgr.shape[1]
+            x1n = W - fl[:, 2]
+            x2n = W - fl[:, 0]
+            fl[:, 0], fl[:, 2] = x1n, x2n
             det = np.concatenate([det, fl], axis=0)
+        # 2. Pick class + per-class conf filter + rescue
+        boxes = det[:, :4]
+        cls_all = det[:, 4:].argmax(1).astype(np.int32)
+        conf_all = det[:, 4:].max(1)
+        keep = self._conf_filter_with_rescue(conf_all, cls_all)
+        boxes, scores, cls = boxes[keep], conf_all[keep], cls_all[keep]
+        if len(boxes) == 0:
+            return []
+        # 3. Sane filter (geometric)
+        boxes, scores, cls = self._sane_filter(boxes, scores, cls, orig_h, orig_w)
+        if len(boxes) == 0:
+            return []
+        # Keep raw cluster for boost (before any dedup)
+        raw_boxes, raw_scores, raw_cls = boxes.copy(), scores.copy(), cls.copy()
+        # 4. Per-class hard NMS
+        keep_idx: list[int] = []
+        for c in np.unique(cls):
             m = cls == c
+            mi = np.where(m)[0]
+            k = self._hard_nms(boxes[m], scores[m], self.iou_thres)
+            keep_idx.extend(mi[k].tolist())
+        keep_idx.sort()
+        ki = np.array(keep_idx, dtype=np.intp)
+        boxes, scores, cls = boxes[ki], scores[ki], cls[ki]
+        # 5. Containment dedup (drop a box mostly inside same-class bigger box)
+        boxes, scores, cls = self._containment_dedup(boxes, scores, cls)
+        # 6. Cross-class dedup (one object → one class only)
+        boxes, scores, cls = self._cross_class_dedup(boxes, scores, cls)
+        # 7. Cluster-boost confidence (TTA aggregation)
+        if len(boxes):
+            boosted = self._cluster_boost(boxes, cls, raw_boxes, raw_scores, raw_cls)
+        else:
+            boosted = scores
+        # 8. Cap at max_det
+        if len(boxes) > self.max_det:
+            top = np.argsort(-boosted)[: self.max_det]
+            boxes, cls, boosted = boxes[top], cls[top], boosted[top]
+        out: list[BoundingBox] = []
+        for (x1, y1, x2, y2), c, s in zip(boxes, cls, boosted):
+            if x2 <= x1 or y2 <= y1:
                 continue
+            out.append(BoundingBox(
+                x1=int(x1), y1=int(y1), x2=int(x2), y2=int(y2),
+                cls_id=int(c), conf=float(min(1.0, max(0.0, s))),
+            ))
         return out
     def predict_batch(
             try:
                 boxes = self._detect(np.ascontiguousarray(img))
             except Exception as e:                # never crash the chute
+                print(f"⚠️ v9 frame {offset + i} detect error: {e}")
                 boxes = []
             results.append(TVFrameResult(
                 frame_id=offset + i, boxes=boxes,