baxtos
/

ScoreVision

ONNX

Model card Files Files and versions

xet

Community

baxtos commited on 4 days ago

Commit

cd7f036

verified ·

1 Parent(s): 909bc91

scorevision: push artifact

Browse files

Files changed (1) hide show

miner.py +52 -31

miner.py CHANGED Viewed

@@ -1,14 +1,17 @@
-"""Open-source Detect-beverage miner v9 (post-proc upgrade, weights unchanged).
-Same ONNX weights as v8 (yolo11s fp16, mAP50 0.835 on holdout). Post-proc
-synthesised from the three strongest current peers:
-- per-class conf + can-rescue bonus  (navierstocks/drink @98280af6)
-- sane-box geometric filter           (drink + yevheniiapopova)
-- containment dedup same-class        (yevheniiapopova @f3becc13)
-- cross-class dedup high-IoU          (drink)
-- INTER_CUBIC on upsample letterbox   (drink + tensorminer)
-- TTA flip + cluster-boost conf       (drink)
 Contract: class `Miner` at HF root, `predict_batch(...) -> list[TVFrameResult]`.
 """
@@ -44,23 +47,20 @@ class Miner:
     input_size = 1280
     num_classes = 3                          # cup, bottle, can
-    # per-class conf (swept on validator-pseudo holdout 73 imgs against v10 weights,
-    # peak UI 79.28%): cup/bottle moderate (model is more accurate now), can softer + rescue.
-    conf_thres = np.array([0.55, 0.55, 0.45], dtype=np.float32)
-    # per-class rescue bonus: if no boxes of class c pass conf, admit its top-1
-    # candidate when conf >= conf_thres[c] - bonus[c]. Only `can` (was 7/12 of
-    # our misses on common challenges with lead).
-    rescue_bonus = np.array([0.0, 0.0, 0.20], dtype=np.float32)
-    iou_thres = 0.40                         # per-class NMS (was 0.55)
     cross_iou_thres = 0.70                   # cross-class dedup
-    containment_thres = 1.00                 # OFF for v10 (better recall without)
-    min_box_area = 100.0                     # was 36 (5 of 20 our FPs <400px²)
     min_side = 8.0
     max_aspect_ratio = 10.0
-    max_det = 100
-    use_flip_tta = True
     def __init__(self, path_hf_repo: Path) -> None:
         so = ort.SessionOptions()
@@ -77,19 +77,23 @@ class Miner:
         _ort_type = self.sess.get_inputs()[0].type   # "tensor(float16)" or fp32
         self.np_dtype = np.float16 if "float16" in _ort_type else np.float32
         active = self.sess.get_providers()[0]
-        print(f"✅ v9 ONNX beverage model loaded (provider={active}, dtype={self.np_dtype.__name__})")
-        # Eager CUDA EP allocation — same trick as v8: ORT lazily binds CUDA on
-        # first sess.run, TEE cold-bind eats 30-300s otherwise.
         try:
             dummy = np.zeros((self.input_size, self.input_size, 3), dtype=np.uint8)
             _ = self._infer(dummy)
-            print(f"✅ v9 ONNX warmup pass completed (provider={active})")
         except Exception as e:
-            print(f"⚠️ v9 ONNX warmup pass failed (not fatal): {e}")
     def __repr__(self) -> str:
-        return f"BeverageONNXv9(in={self.input_size}, cls={self.num_classes})"
     # ---- preprocessing --------------------------------------------------
     def _letterbox(self, im: ndarray) -> tuple[ndarray, float]:
@@ -107,17 +111,34 @@ class Miner:
         lb, s = self._letterbox(im_bgr)
         x = (lb[:, :, ::-1].transpose(2, 0, 1)[None].astype(np.float32) / 255.0
              ).astype(self.np_dtype)
-        out = self.sess.run(None, {self.inp: x})[0][0]   # (4+nc, N) or (N, 4+nc)
-        out = np.asarray(out, dtype=np.float32)
         p = out.T if out.shape[0] < out.shape[1] else out  # → (N, 4+nc)
         boxes = p[:, :4].copy()
         scores = p[:, 4:4 + self.num_classes]
-        # xywh(center) → xyxy in original image coords
         xy = boxes[:, :2]
         wh = boxes[:, 2:4]
         x1y1 = (xy - wh / 2) / s
         x2y2 = (xy + wh / 2) / s
-        return np.concatenate([x1y1, x2y2, scores], axis=1)   # (N, 4+nc)
     # ---- post-processing primitives -------------------------------------
     @staticmethod

+"""Open-source Detect-beverage miner v12 (new yolo11s weights + NMS-baked ONNX).
+New v12 ONNX weights: trained on combined dataset (375 fresh cross-consensus
+pseudo-GT from top 3 Beverage miners + 279 prior validator-pseudo). NMS-baked
+export — output shape `[1, 300, 6]` (xyxy, conf, cls). On holdout 73 imgs:
+ultralytics-val mAP50=0.879 (v8: 0.835).
+Post-proc:
+- detect NMS-baked output and unpack to (N, 4+num_classes) one-hot scores
+- per-class conf filter `[0.60, 0.40, 0.60]` (best from sweep on v12 ONNX)
+- sane-box geometric filter (min_box_area=100, max_aspect_ratio=10)
+- per-class hard NMS @ iou=0.4 (redundant after baked NMS but safe)
+- cross-class dedup @ iou=0.7
+- TTA off (sweep showed flip-TTA + cluster-boost hurt UI on NMS-baked output)
 Contract: class `Miner` at HF root, `predict_batch(...) -> list[TVFrameResult]`.
 """
     input_size = 1280
     num_classes = 3                          # cup, bottle, can
+    # per-class conf — best v12 sweep without TTA (UI 80.32%, FP 0.832):
+    conf_thres = np.array([0.60, 0.40, 0.60], dtype=np.float32)
+    # rescue bonus disabled — v12 model strong enough not to need it
+    rescue_bonus = np.array([0.0, 0.0, 0.0], dtype=np.float32)
+    iou_thres = 0.40                         # per-class NMS (redundant after baked-NMS but safe)
     cross_iou_thres = 0.70                   # cross-class dedup
+    containment_thres = 1.00                 # OFF
+    min_box_area = 100.0
     min_side = 8.0
     max_aspect_ratio = 10.0
+    max_det = 300                            # match NMS-baked graph max_det
+    use_flip_tta = False                     # flip-TTA hurt UI on NMS-baked v12 (sweep -0.8 pp)
     def __init__(self, path_hf_repo: Path) -> None:
         so = ort.SessionOptions()
         _ort_type = self.sess.get_inputs()[0].type   # "tensor(float16)" or fp32
         self.np_dtype = np.float16 if "float16" in _ort_type else np.float32
         active = self.sess.get_providers()[0]
+        print(f"✅ v12 ONNX beverage model loaded (provider={active}, dtype={self.np_dtype.__name__})")
+        # Detect output format once
+        out0 = self.sess.get_outputs()[0]
+        print(f"ONNX output: name={out0.name} shape={out0.shape}")
+        # Eager CUDA EP allocation: ORT lazily binds CUDA on first sess.run,
+        # TEE cold-bind eats 30-300s otherwise.
         try:
             dummy = np.zeros((self.input_size, self.input_size, 3), dtype=np.uint8)
             _ = self._infer(dummy)
+            print(f"✅ v12 ONNX warmup pass completed (provider={active})")
         except Exception as e:
+            print(f"⚠️ v12 ONNX warmup pass failed (not fatal): {e}")
     def __repr__(self) -> str:
+        return f"BeverageONNXv12(in={self.input_size}, cls={self.num_classes})"
     # ---- preprocessing --------------------------------------------------
     def _letterbox(self, im: ndarray) -> tuple[ndarray, float]:
         lb, s = self._letterbox(im_bgr)
         x = (lb[:, :, ::-1].transpose(2, 0, 1)[None].astype(np.float32) / 255.0
              ).astype(self.np_dtype)
+        raw = self.sess.run(None, {self.inp: x})[0]
+        raw = np.asarray(raw, dtype=np.float32)
+        # NMS-baked output: [1, N, 6] = (x1, y1, x2, y2, conf, cls)
+        if raw.ndim == 3 and raw.shape[-1] == 6:
+            arr = raw[0]
+            keep = arr[:, 4] > 0           # drop zero-padding rows
+            arr = arr[keep]
+            if len(arr) == 0:
+                return np.zeros((0, 4 + self.num_classes), dtype=np.float32)
+            boxes = arr[:, :4].copy() / s   # letterbox → orig coords
+            confs = arr[:, 4]
+            cls_ids = arr[:, 5].astype(np.int32)
+            cls_ids = np.clip(cls_ids, 0, self.num_classes - 1)
+            scores = np.zeros((len(arr), self.num_classes), dtype=np.float32)
+            scores[np.arange(len(arr)), cls_ids] = confs
+            return np.concatenate([boxes, scores], axis=1)
+        # Legacy raw YOLO output: [1, 4+nc, N] or [1, N, 4+nc] (xywh-center)
+        out = raw[0]
         p = out.T if out.shape[0] < out.shape[1] else out  # → (N, 4+nc)
         boxes = p[:, :4].copy()
         scores = p[:, 4:4 + self.num_classes]
         xy = boxes[:, :2]
         wh = boxes[:, 2:4]
         x1y1 = (xy - wh / 2) / s
         x2y2 = (xy + wh / 2) / s
+        return np.concatenate([x1y1, x2y2, scores], axis=1)
     # ---- post-processing primitives -------------------------------------
     @staticmethod