baxtos
/

ScoreVision

ONNX

Model card Files Files and versions

xet

Community

baxtos commited on 12 days ago

Commit

a427865

verified ·

1 Parent(s): 8b7748c

scorevision: push artifact

Browse files

Files changed (1) hide show

miner.py +140 -0

miner.py ADDED Viewed

	@@ -0,0 +1,140 @@

+"""Open-source Detect-beverage miner (manak0/Detect-beverage-detect).
+ONNX + onnxruntime (no torch/ultralytics at inference -> light repo,
+deterministic; spot-check re-runs this same code+weights). Trained
+yolo11n with class order [cup, bottle, can] == manifest `objects`, so
+cls_id maps directly (0=cup,1=bottle,2=can). Letterbox 1280 (manifest
+preproc resize_long), flip-TTA, per-class conf, global NMS.
+Contract (turbovision example_miner): class `Miner` at HF repo root;
+`predict_batch(batch_images, offset, n_keypoints) -> list[TVFrameResult]`.
+"""
+from __future__ import annotations
+from pathlib import Path
+import cv2
+import numpy as np
+import onnxruntime as ort
+from numpy import ndarray
+from pydantic import BaseModel
+class BoundingBox(BaseModel):
+    x1: int
+    y1: int
+    x2: int
+    y2: int
+    cls_id: int
+    conf: float
+class TVFrameResult(BaseModel):
+    frame_id: int
+    boxes: list[BoundingBox]
+    keypoints: list[tuple[int, int]]
+class Miner:
+    weights_file = "best.onnx"
+    input_size = 1280
+    num_classes = 3                       # cup, bottle, can
+    # per-class confidence (tuned on held-out; cup scarcer -> lower gate)
+    conf_thres = np.array([0.25, 0.35, 0.35], dtype=np.float32)
+    iou_thres = 0.55
+    max_det = 100
+    min_box_area = 36.0
+    use_flip_tta = True
+    def __init__(self, path_hf_repo: Path) -> None:
+        so = ort.SessionOptions()
+        so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+        self.sess = ort.InferenceSession(
+            str(Path(path_hf_repo) / self.weights_file),
+            providers=["CPUExecutionProvider"],
+            sess_options=so,
+        )
+        self.inp = self.sess.get_inputs()[0].name
+        print("✅ ONNX beverage model loaded")
+    def __repr__(self) -> str:
+        return f"BeverageONNX(in={self.input_size}, cls={self.num_classes})"
+    # ---- preprocessing ---------------------------------------------------
+    def _letterbox(self, im: ndarray):
+        h0, w0 = im.shape[:2]
+        s = min(self.input_size / h0, self.input_size / w0)
+        nh, nw = int(round(h0 * s)), int(round(w0 * s))
+        r = cv2.resize(im, (nw, nh))
+        out = np.full((self.input_size, self.input_size, 3), 114, np.uint8)
+        out[:nh, :nw] = r
+        return out, s
+    def _infer(self, im_bgr: ndarray) -> ndarray:
+        lb, s = self._letterbox(im_bgr)
+        x = lb[:, :, ::-1].transpose(2, 0, 1)[None].astype(np.float32) / 255.0
+        out = self.sess.run(None, {self.inp: x})[0][0]      # (4+nc, N)
+        p = out.T if out.shape[0] < out.shape[1] else out    # (N, 4+nc)
+        boxes = p[:, :4].copy()
+        scores = p[:, 4:4 + self.num_classes]
+        # xywh(center) -> xyxy in original image coords
+        xy = boxes[:, :2]
+        wh = boxes[:, 2:4]
+        x1y1 = (xy - wh / 2) / s
+        x2y2 = (xy + wh / 2) / s
+        return np.concatenate([x1y1, x2y2, scores], axis=1)   # (N,4+nc)
+    def _detect(self, im_bgr: ndarray) -> list[BoundingBox]:
+        det = self._infer(im_bgr)
+        if self.use_flip_tta:
+            fl = self._infer(im_bgr[:, ::-1])
+            W = im_bgr.shape[1]
+            x1 = W - fl[:, 2]
+            x2 = W - fl[:, 0]
+            fl[:, 0], fl[:, 2] = x1, x2
+            det = np.concatenate([det, fl], axis=0)
+        cls = det[:, 4:].argmax(1)
+        conf = det[:, 4:].max(1)
+        keep = conf >= self.conf_thres[cls]
+        det, cls, conf = det[keep], cls[keep], conf[keep]
+        out: list[BoundingBox] = []
+        for c in range(self.num_classes):
+            m = cls == c
+            if not m.any():
+                continue
+            b = det[m, :4]
+            sc = conf[m]
+            idx = cv2.dnn.NMSBoxes(
+                bboxes=[[float(x1), float(y1), float(x2 - x1),
+                         float(y2 - y1)] for x1, y1, x2, y2 in b],
+                scores=sc.tolist(), score_threshold=0.0,
+                nms_threshold=self.iou_thres,
+            )
+            for i in np.array(idx).flatten()[: self.max_det]:
+                x1, y1, x2, y2 = b[i]
+                if (x2 - x1) * (y2 - y1) < self.min_box_area:
+                    continue
+                out.append(BoundingBox(
+                    x1=int(x1), y1=int(y1), x2=int(x2), y2=int(y2),
+                    cls_id=int(c), conf=float(sc[i])))
+        return out
+    def predict_batch(
+        self,
+        batch_images: list[ndarray],
+        offset: int,
+        n_keypoints: int,
+    ) -> list[TVFrameResult]:
+        results: list[TVFrameResult] = []
+        for i, img in enumerate(batch_images):
+            try:
+                boxes = self._detect(np.ascontiguousarray(img))
+            except Exception as e:                # never crash the chute
+                print(f"⚠️ frame {offset + i} detect error: {e}")
+                boxes = []
+            results.append(TVFrameResult(
+                frame_id=offset + i, boxes=boxes,
+                keypoints=[(0, 0) for _ in range(n_keypoints)]))
+        return results