baxtos commited on
Commit
a427865
·
verified ·
1 Parent(s): 8b7748c

scorevision: push artifact

Browse files
Files changed (1) hide show
  1. miner.py +140 -0
miner.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Open-source Detect-beverage miner (manak0/Detect-beverage-detect).
2
+
3
+ ONNX + onnxruntime (no torch/ultralytics at inference -> light repo,
4
+ deterministic; spot-check re-runs this same code+weights). Trained
5
+ yolo11n with class order [cup, bottle, can] == manifest `objects`, so
6
+ cls_id maps directly (0=cup,1=bottle,2=can). Letterbox 1280 (manifest
7
+ preproc resize_long), flip-TTA, per-class conf, global NMS.
8
+
9
+ Contract (turbovision example_miner): class `Miner` at HF repo root;
10
+ `predict_batch(batch_images, offset, n_keypoints) -> list[TVFrameResult]`.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from pathlib import Path
16
+
17
+ import cv2
18
+ import numpy as np
19
+ import onnxruntime as ort
20
+ from numpy import ndarray
21
+ from pydantic import BaseModel
22
+
23
+
24
+ class BoundingBox(BaseModel):
25
+ x1: int
26
+ y1: int
27
+ x2: int
28
+ y2: int
29
+ cls_id: int
30
+ conf: float
31
+
32
+
33
+ class TVFrameResult(BaseModel):
34
+ frame_id: int
35
+ boxes: list[BoundingBox]
36
+ keypoints: list[tuple[int, int]]
37
+
38
+
39
+ class Miner:
40
+ weights_file = "best.onnx"
41
+ input_size = 1280
42
+ num_classes = 3 # cup, bottle, can
43
+ # per-class confidence (tuned on held-out; cup scarcer -> lower gate)
44
+ conf_thres = np.array([0.25, 0.35, 0.35], dtype=np.float32)
45
+ iou_thres = 0.55
46
+ max_det = 100
47
+ min_box_area = 36.0
48
+ use_flip_tta = True
49
+
50
+ def __init__(self, path_hf_repo: Path) -> None:
51
+ so = ort.SessionOptions()
52
+ so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
53
+ self.sess = ort.InferenceSession(
54
+ str(Path(path_hf_repo) / self.weights_file),
55
+ providers=["CPUExecutionProvider"],
56
+ sess_options=so,
57
+ )
58
+ self.inp = self.sess.get_inputs()[0].name
59
+ print("✅ ONNX beverage model loaded")
60
+
61
+ def __repr__(self) -> str:
62
+ return f"BeverageONNX(in={self.input_size}, cls={self.num_classes})"
63
+
64
+ # ---- preprocessing ---------------------------------------------------
65
+ def _letterbox(self, im: ndarray):
66
+ h0, w0 = im.shape[:2]
67
+ s = min(self.input_size / h0, self.input_size / w0)
68
+ nh, nw = int(round(h0 * s)), int(round(w0 * s))
69
+ r = cv2.resize(im, (nw, nh))
70
+ out = np.full((self.input_size, self.input_size, 3), 114, np.uint8)
71
+ out[:nh, :nw] = r
72
+ return out, s
73
+
74
+ def _infer(self, im_bgr: ndarray) -> ndarray:
75
+ lb, s = self._letterbox(im_bgr)
76
+ x = lb[:, :, ::-1].transpose(2, 0, 1)[None].astype(np.float32) / 255.0
77
+ out = self.sess.run(None, {self.inp: x})[0][0] # (4+nc, N)
78
+ p = out.T if out.shape[0] < out.shape[1] else out # (N, 4+nc)
79
+ boxes = p[:, :4].copy()
80
+ scores = p[:, 4:4 + self.num_classes]
81
+ # xywh(center) -> xyxy in original image coords
82
+ xy = boxes[:, :2]
83
+ wh = boxes[:, 2:4]
84
+ x1y1 = (xy - wh / 2) / s
85
+ x2y2 = (xy + wh / 2) / s
86
+ return np.concatenate([x1y1, x2y2, scores], axis=1) # (N,4+nc)
87
+
88
+ def _detect(self, im_bgr: ndarray) -> list[BoundingBox]:
89
+ det = self._infer(im_bgr)
90
+ if self.use_flip_tta:
91
+ fl = self._infer(im_bgr[:, ::-1])
92
+ W = im_bgr.shape[1]
93
+ x1 = W - fl[:, 2]
94
+ x2 = W - fl[:, 0]
95
+ fl[:, 0], fl[:, 2] = x1, x2
96
+ det = np.concatenate([det, fl], axis=0)
97
+
98
+ cls = det[:, 4:].argmax(1)
99
+ conf = det[:, 4:].max(1)
100
+ keep = conf >= self.conf_thres[cls]
101
+ det, cls, conf = det[keep], cls[keep], conf[keep]
102
+ out: list[BoundingBox] = []
103
+ for c in range(self.num_classes):
104
+ m = cls == c
105
+ if not m.any():
106
+ continue
107
+ b = det[m, :4]
108
+ sc = conf[m]
109
+ idx = cv2.dnn.NMSBoxes(
110
+ bboxes=[[float(x1), float(y1), float(x2 - x1),
111
+ float(y2 - y1)] for x1, y1, x2, y2 in b],
112
+ scores=sc.tolist(), score_threshold=0.0,
113
+ nms_threshold=self.iou_thres,
114
+ )
115
+ for i in np.array(idx).flatten()[: self.max_det]:
116
+ x1, y1, x2, y2 = b[i]
117
+ if (x2 - x1) * (y2 - y1) < self.min_box_area:
118
+ continue
119
+ out.append(BoundingBox(
120
+ x1=int(x1), y1=int(y1), x2=int(x2), y2=int(y2),
121
+ cls_id=int(c), conf=float(sc[i])))
122
+ return out
123
+
124
+ def predict_batch(
125
+ self,
126
+ batch_images: list[ndarray],
127
+ offset: int,
128
+ n_keypoints: int,
129
+ ) -> list[TVFrameResult]:
130
+ results: list[TVFrameResult] = []
131
+ for i, img in enumerate(batch_images):
132
+ try:
133
+ boxes = self._detect(np.ascontiguousarray(img))
134
+ except Exception as e: # never crash the chute
135
+ print(f"⚠️ frame {offset + i} detect error: {e}")
136
+ boxes = []
137
+ results.append(TVFrameResult(
138
+ frame_id=offset + i, boxes=boxes,
139
+ keypoints=[(0, 0) for _ in range(n_keypoints)]))
140
+ return results