baxtos commited on
Commit
652de85
·
verified ·
1 Parent(s): 56286d1

scorevision: push artifact

Browse files
Files changed (1) hide show
  1. miner.py +246 -62
miner.py CHANGED
@@ -1,13 +1,16 @@
1
- """Open-source Detect-beverage miner (manak0/Detect-beverage-detect).
2
 
3
- ONNX + onnxruntime (no torch/ultralytics at inference -> light repo,
4
- deterministic; spot-check re-runs this same code+weights). Trained
5
- yolo11n with class order [cup, bottle, can] == manifest `objects`, so
6
- cls_id maps directly (0=cup,1=bottle,2=can). Letterbox 1280 (manifest
7
- preproc resize_long), flip-TTA, per-class conf, global NMS.
8
 
9
- Contract (turbovision example_miner): class `Miner` at HF repo root;
10
- `predict_batch(batch_images, offset, n_keypoints) -> list[TVFrameResult]`.
 
 
 
 
 
 
11
  """
12
 
13
  from __future__ import annotations
@@ -39,12 +42,24 @@ class TVFrameResult(BaseModel):
39
  class Miner:
40
  weights_file = "best.onnx"
41
  input_size = 1280
42
- num_classes = 3 # cup, bottle, can
43
- # per-class confidence (tuned on held-out; cup scarcer -> lower gate)
44
- conf_thres = np.array([0.25, 0.35, 0.35], dtype=np.float32)
45
- iou_thres = 0.55
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  max_det = 100
47
- min_box_area = 36.0
48
  use_flip_tta = True
49
 
50
  def __init__(self, path_hf_repo: Path) -> None:
@@ -59,34 +74,31 @@ class Miner:
59
  sess_options=so,
60
  )
61
  self.inp = self.sess.get_inputs()[0].name
62
- # ONNX может быть экспортирован в fp16 (для лимита репо ≤30MB)
63
- # кастим вход в тот же dtype, иначе INVALID_ARGUMENT на sess.run.
64
- _ort_type = self.sess.get_inputs()[0].type # e.g. "tensor(float16)"
65
  self.np_dtype = np.float16 if "float16" in _ort_type else np.float32
66
  active = self.sess.get_providers()[0]
67
- print(f"✅ ONNX beverage model loaded (provider={active}, dtype={self.np_dtype.__name__})")
68
 
69
- # Eager CUDA EP allocation: ORT lazily binds CUDA on first sess.run,
70
- # so without this the validator's first /predict eats the cold-bind
71
- # cost (30-300s in TEE-VM) and the scheduler reaps the instance
72
- # before activation. Run a no-op inference here so on_startup only
73
- # returns once GPU kernels/buffers are hot.
74
  try:
75
- _dummy = np.zeros((self.input_size, self.input_size, 3), dtype=np.uint8)
76
- _ = self._infer(_dummy)
77
- print(f"✅ ONNX warmup pass completed (provider={active})")
78
  except Exception as e:
79
- print(f"⚠️ ONNX warmup pass failed (not fatal): {e}")
80
 
81
  def __repr__(self) -> str:
82
- return f"BeverageONNX(in={self.input_size}, cls={self.num_classes})"
83
 
84
- # ---- preprocessing ---------------------------------------------------
85
- def _letterbox(self, im: ndarray):
86
  h0, w0 = im.shape[:2]
87
  s = min(self.input_size / h0, self.input_size / w0)
88
  nh, nw = int(round(h0 * s)), int(round(w0 * s))
89
- r = cv2.resize(im, (nw, nh))
 
 
90
  out = np.full((self.input_size, self.input_size, 3), 114, np.uint8)
91
  out[:nh, :nw] = r
92
  return out, s
@@ -95,54 +107,226 @@ class Miner:
95
  lb, s = self._letterbox(im_bgr)
96
  x = (lb[:, :, ::-1].transpose(2, 0, 1)[None].astype(np.float32) / 255.0
97
  ).astype(self.np_dtype)
98
- out = self.sess.run(None, {self.inp: x})[0][0] # (4+nc, N)
99
- # ONNX fp16 → numpy float16 в out; для последующего NMS на CPU
100
- # удобнее float32, кастим обратно
101
  out = np.asarray(out, dtype=np.float32)
102
- p = out.T if out.shape[0] < out.shape[1] else out # (N, 4+nc)
103
  boxes = p[:, :4].copy()
104
  scores = p[:, 4:4 + self.num_classes]
105
- # xywh(center) -> xyxy in original image coords
106
  xy = boxes[:, :2]
107
  wh = boxes[:, 2:4]
108
  x1y1 = (xy - wh / 2) / s
109
  x2y2 = (xy + wh / 2) / s
110
- return np.concatenate([x1y1, x2y2, scores], axis=1) # (N,4+nc)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  def _detect(self, im_bgr: ndarray) -> list[BoundingBox]:
 
 
 
113
  det = self._infer(im_bgr)
114
  if self.use_flip_tta:
115
  fl = self._infer(im_bgr[:, ::-1])
116
  W = im_bgr.shape[1]
117
- x1 = W - fl[:, 2]
118
- x2 = W - fl[:, 0]
119
- fl[:, 0], fl[:, 2] = x1, x2
120
  det = np.concatenate([det, fl], axis=0)
121
 
122
- cls = det[:, 4:].argmax(1)
123
- conf = det[:, 4:].max(1)
124
- keep = conf >= self.conf_thres[cls]
125
- det, cls, conf = det[keep], cls[keep], conf[keep]
126
- out: list[BoundingBox] = []
127
- for c in range(self.num_classes):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  m = cls == c
129
- if not m.any():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  continue
131
- b = det[m, :4]
132
- sc = conf[m]
133
- idx = cv2.dnn.NMSBoxes(
134
- bboxes=[[float(x1), float(y1), float(x2 - x1),
135
- float(y2 - y1)] for x1, y1, x2, y2 in b],
136
- scores=sc.tolist(), score_threshold=0.0,
137
- nms_threshold=self.iou_thres,
138
- )
139
- for i in np.array(idx).flatten()[: self.max_det]:
140
- x1, y1, x2, y2 = b[i]
141
- if (x2 - x1) * (y2 - y1) < self.min_box_area:
142
- continue
143
- out.append(BoundingBox(
144
- x1=int(x1), y1=int(y1), x2=int(x2), y2=int(y2),
145
- cls_id=int(c), conf=float(sc[i])))
146
  return out
147
 
148
  def predict_batch(
@@ -156,7 +340,7 @@ class Miner:
156
  try:
157
  boxes = self._detect(np.ascontiguousarray(img))
158
  except Exception as e: # never crash the chute
159
- print(f"⚠️ frame {offset + i} detect error: {e}")
160
  boxes = []
161
  results.append(TVFrameResult(
162
  frame_id=offset + i, boxes=boxes,
 
1
+ """Open-source Detect-beverage miner v9 (post-proc upgrade, weights unchanged).
2
 
3
+ Same ONNX weights as v8 (yolo11s fp16, mAP50 0.835 on holdout). Post-proc
4
+ synthesised from the three strongest current peers:
 
 
 
5
 
6
+ - per-class conf + can-rescue bonus (navierstocks/drink @98280af6)
7
+ - sane-box geometric filter (drink + yevheniiapopova)
8
+ - containment dedup same-class (yevheniiapopova @f3becc13)
9
+ - cross-class dedup high-IoU (drink)
10
+ - INTER_CUBIC on upsample letterbox (drink + tensorminer)
11
+ - TTA flip + cluster-boost conf (drink)
12
+
13
+ Contract: class `Miner` at HF root, `predict_batch(...) -> list[TVFrameResult]`.
14
  """
15
 
16
  from __future__ import annotations
 
42
  class Miner:
43
  weights_file = "best.onnx"
44
  input_size = 1280
45
+ num_classes = 3 # cup, bottle, can
46
+
47
+ # per-class conf (swept on validator-pseudo holdout 73 imgs against v10 weights,
48
+ # peak UI 79.28%): cup/bottle moderate (model is more accurate now), can softer + rescue.
49
+ conf_thres = np.array([0.55, 0.55, 0.45], dtype=np.float32)
50
+ # per-class rescue bonus: if no boxes of class c pass conf, admit its top-1
51
+ # candidate when conf >= conf_thres[c] - bonus[c]. Only `can` (was 7/12 of
52
+ # our misses on common challenges with lead).
53
+ rescue_bonus = np.array([0.0, 0.0, 0.20], dtype=np.float32)
54
+
55
+ iou_thres = 0.40 # per-class NMS (was 0.55)
56
+ cross_iou_thres = 0.70 # cross-class dedup
57
+ containment_thres = 1.00 # OFF for v10 (better recall without)
58
+
59
+ min_box_area = 100.0 # was 36 (5 of 20 our FPs <400px²)
60
+ min_side = 8.0
61
+ max_aspect_ratio = 10.0
62
  max_det = 100
 
63
  use_flip_tta = True
64
 
65
  def __init__(self, path_hf_repo: Path) -> None:
 
74
  sess_options=so,
75
  )
76
  self.inp = self.sess.get_inputs()[0].name
77
+ _ort_type = self.sess.get_inputs()[0].type # "tensor(float16)" or fp32
 
 
78
  self.np_dtype = np.float16 if "float16" in _ort_type else np.float32
79
  active = self.sess.get_providers()[0]
80
+ print(f"✅ v9 ONNX beverage model loaded (provider={active}, dtype={self.np_dtype.__name__})")
81
 
82
+ # Eager CUDA EP allocation — same trick as v8: ORT lazily binds CUDA on
83
+ # first sess.run, TEE cold-bind eats 30-300s otherwise.
 
 
 
84
  try:
85
+ dummy = np.zeros((self.input_size, self.input_size, 3), dtype=np.uint8)
86
+ _ = self._infer(dummy)
87
+ print(f"✅ v9 ONNX warmup pass completed (provider={active})")
88
  except Exception as e:
89
+ print(f"⚠️ v9 ONNX warmup pass failed (not fatal): {e}")
90
 
91
  def __repr__(self) -> str:
92
+ return f"BeverageONNXv9(in={self.input_size}, cls={self.num_classes})"
93
 
94
+ # ---- preprocessing --------------------------------------------------
95
+ def _letterbox(self, im: ndarray) -> tuple[ndarray, float]:
96
  h0, w0 = im.shape[:2]
97
  s = min(self.input_size / h0, self.input_size / w0)
98
  nh, nw = int(round(h0 * s)), int(round(w0 * s))
99
+ # INTER_CUBIC if upsampling, INTER_LINEAR if downsampling (peer trick)
100
+ interp = cv2.INTER_CUBIC if s > 1.0 else cv2.INTER_LINEAR
101
+ r = cv2.resize(im, (nw, nh), interpolation=interp)
102
  out = np.full((self.input_size, self.input_size, 3), 114, np.uint8)
103
  out[:nh, :nw] = r
104
  return out, s
 
107
  lb, s = self._letterbox(im_bgr)
108
  x = (lb[:, :, ::-1].transpose(2, 0, 1)[None].astype(np.float32) / 255.0
109
  ).astype(self.np_dtype)
110
+ out = self.sess.run(None, {self.inp: x})[0][0] # (4+nc, N) or (N, 4+nc)
 
 
111
  out = np.asarray(out, dtype=np.float32)
112
+ p = out.T if out.shape[0] < out.shape[1] else out # (N, 4+nc)
113
  boxes = p[:, :4].copy()
114
  scores = p[:, 4:4 + self.num_classes]
115
+ # xywh(center) xyxy in original image coords
116
  xy = boxes[:, :2]
117
  wh = boxes[:, 2:4]
118
  x1y1 = (xy - wh / 2) / s
119
  x2y2 = (xy + wh / 2) / s
120
+ return np.concatenate([x1y1, x2y2, scores], axis=1) # (N, 4+nc)
121
+
122
+ # ---- post-processing primitives -------------------------------------
123
+ @staticmethod
124
+ def _hard_nms(boxes: np.ndarray, scores: np.ndarray, iou_thresh: float) -> np.ndarray:
125
+ if len(boxes) == 0:
126
+ return np.array([], dtype=np.intp)
127
+ order = np.argsort(-scores)
128
+ keep: list[int] = []
129
+ while len(order):
130
+ i = int(order[0])
131
+ keep.append(i)
132
+ if len(order) == 1:
133
+ break
134
+ rest = order[1:]
135
+ xx1 = np.maximum(boxes[i, 0], boxes[rest, 0])
136
+ yy1 = np.maximum(boxes[i, 1], boxes[rest, 1])
137
+ xx2 = np.minimum(boxes[i, 2], boxes[rest, 2])
138
+ yy2 = np.minimum(boxes[i, 3], boxes[rest, 3])
139
+ inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
140
+ ai = (boxes[i, 2] - boxes[i, 0]) * (boxes[i, 3] - boxes[i, 1])
141
+ ar = (boxes[rest, 2] - boxes[rest, 0]) * (boxes[rest, 3] - boxes[rest, 1])
142
+ iou = inter / (ai + ar - inter + 1e-7)
143
+ order = rest[iou <= iou_thresh]
144
+ return np.array(keep, dtype=np.intp)
145
+
146
+ def _sane_filter(self, boxes: np.ndarray, scores: np.ndarray, cls: np.ndarray,
147
+ orig_h: int, orig_w: int) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
148
+ if len(boxes) == 0:
149
+ return boxes, scores, cls
150
+ bw = np.maximum(0.0, boxes[:, 2] - boxes[:, 0])
151
+ bh = np.maximum(0.0, boxes[:, 3] - boxes[:, 1])
152
+ area = bw * bh
153
+ ar = np.where(
154
+ (bw > 0) & (bh > 0),
155
+ np.maximum(bw / np.maximum(bh, 1e-6), bh / np.maximum(bw, 1e-6)),
156
+ np.inf,
157
+ )
158
+ keep = (
159
+ (bw >= self.min_side) & (bh >= self.min_side)
160
+ & (area >= self.min_box_area)
161
+ & (area <= 0.95 * orig_h * orig_w)
162
+ & (ar <= self.max_aspect_ratio)
163
+ )
164
+ return boxes[keep], scores[keep], cls[keep]
165
+
166
+ def _conf_filter_with_rescue(self, scores: np.ndarray, cls: np.ndarray) -> np.ndarray:
167
+ if len(scores) == 0:
168
+ return np.zeros(0, dtype=bool)
169
+ keep = scores >= self.conf_thres[cls]
170
+ # per-class rescue: if class c has zero passes, admit top-1 candidate
171
+ # whose conf >= conf_thres[c] - rescue_bonus[c]
172
+ for c in np.unique(cls):
173
+ b = float(self.rescue_bonus[c])
174
+ if b <= 0.0:
175
+ continue
176
+ cm = cls == c
177
+ if keep[cm].any():
178
+ continue
179
+ idx = np.where(cm)[0]
180
+ top = int(idx[int(np.argmax(scores[idx]))])
181
+ if scores[top] >= self.conf_thres[c] - b:
182
+ keep[top] = True
183
+ return keep
184
+
185
+ def _cross_class_dedup(self, boxes: np.ndarray, scores: np.ndarray, cls: np.ndarray,
186
+ ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
187
+ """Drop dup boxes between classes (one object getting two cls labels).
188
+ Lexsort by larger margin-over-threshold first, then larger area."""
189
+ n = len(boxes)
190
+ if n <= 1:
191
+ return boxes, scores, cls
192
+ areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
193
+ margins = scores - self.conf_thres[cls]
194
+ order = np.lexsort((-areas, -margins))
195
+ suppressed = np.zeros(n, dtype=bool)
196
+ keep: list[int] = []
197
+ for i in order:
198
+ if suppressed[i]:
199
+ continue
200
+ keep.append(int(i))
201
+ bi = boxes[i]
202
+ xx1 = np.maximum(bi[0], boxes[:, 0])
203
+ yy1 = np.maximum(bi[1], boxes[:, 1])
204
+ xx2 = np.minimum(bi[2], boxes[:, 2])
205
+ yy2 = np.minimum(bi[3], boxes[:, 3])
206
+ inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
207
+ ai = max(1e-7, float((bi[2] - bi[0]) * (bi[3] - bi[1])))
208
+ iou = inter / (ai + areas - inter + 1e-7)
209
+ dup = iou > self.cross_iou_thres
210
+ dup[i] = False
211
+ suppressed |= dup
212
+ idx = np.array(keep, dtype=np.intp)
213
+ return boxes[idx], scores[idx], cls[idx]
214
+
215
+ def _containment_dedup(self, boxes: np.ndarray, scores: np.ndarray, cls: np.ndarray,
216
+ ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
217
+ """Drop a box if ≥ containment_thres of its area is inside a same-class
218
+ box that is larger (or equal-size with higher conf). Catches the
219
+ bottle-inside-bottle / cup-inside-cup pattern YOLO often produces."""
220
+ n = len(boxes)
221
+ if n <= 1:
222
+ return boxes, scores, cls
223
+ area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
224
+ iw = np.maximum(0.0, np.minimum(boxes[:, 2:3], boxes[None, :, 2])
225
+ - np.maximum(boxes[:, 0:1], boxes[None, :, 0]))
226
+ ih = np.maximum(0.0, np.minimum(boxes[:, 3:4], boxes[None, :, 3])
227
+ - np.maximum(boxes[:, 1:2], boxes[None, :, 1]))
228
+ inter = iw * ih
229
+ contain = inter / np.maximum(area[:, None], 1e-9) # frac of i contained in j
230
+ same_class = cls[:, None] == cls[None, :]
231
+ bigger = area[None, :] > area[:, None]
232
+ tiebreak = (area[None, :] == area[:, None]) & (scores[None, :] > scores[:, None])
233
+ dominator = same_class & (bigger | tiebreak)
234
+ np.fill_diagonal(dominator, False)
235
+ suppressed = ((contain >= self.containment_thres) & dominator).any(axis=1)
236
+ keep = np.where(~suppressed)[0]
237
+ return boxes[keep], scores[keep], cls[keep]
238
 
239
+ def _cluster_boost(self, kept_boxes: np.ndarray, kept_cls: np.ndarray,
240
+ all_boxes: np.ndarray, all_scores: np.ndarray, all_cls: np.ndarray,
241
+ ) -> np.ndarray:
242
+ """For each kept box, return max conf among same-class boxes overlapping
243
+ with IoU≥iou_thres (incl. itself). TTA confidence aggregation."""
244
+ n = len(kept_boxes)
245
+ if n == 0:
246
+ return np.empty(0, dtype=np.float32)
247
+ all_areas = (np.maximum(0.0, all_boxes[:, 2] - all_boxes[:, 0])
248
+ * np.maximum(0.0, all_boxes[:, 3] - all_boxes[:, 1]))
249
+ out = np.empty(n, dtype=np.float32)
250
+ for i in range(n):
251
+ bi = kept_boxes[i]
252
+ xx1 = np.maximum(bi[0], all_boxes[:, 0])
253
+ yy1 = np.maximum(bi[1], all_boxes[:, 1])
254
+ xx2 = np.minimum(bi[2], all_boxes[:, 2])
255
+ yy2 = np.minimum(bi[3], all_boxes[:, 3])
256
+ inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
257
+ ai = max(0.0, float((bi[2] - bi[0]) * (bi[3] - bi[1])))
258
+ iou = inter / (ai + all_areas - inter + 1e-7)
259
+ cluster = (iou >= self.iou_thres) & (all_cls == kept_cls[i])
260
+ out[i] = float(np.max(all_scores[cluster])) if np.any(cluster) else 0.0
261
+ return out
262
+
263
+ # ---- top-level detect with TTA --------------------------------------
264
  def _detect(self, im_bgr: ndarray) -> list[BoundingBox]:
265
+ orig_h, orig_w = im_bgr.shape[:2]
266
+
267
+ # 1. Inference + optional flip TTA
268
  det = self._infer(im_bgr)
269
  if self.use_flip_tta:
270
  fl = self._infer(im_bgr[:, ::-1])
271
  W = im_bgr.shape[1]
272
+ x1n = W - fl[:, 2]
273
+ x2n = W - fl[:, 0]
274
+ fl[:, 0], fl[:, 2] = x1n, x2n
275
  det = np.concatenate([det, fl], axis=0)
276
 
277
+ # 2. Pick class + per-class conf filter + rescue
278
+ boxes = det[:, :4]
279
+ cls_all = det[:, 4:].argmax(1).astype(np.int32)
280
+ conf_all = det[:, 4:].max(1)
281
+ keep = self._conf_filter_with_rescue(conf_all, cls_all)
282
+ boxes, scores, cls = boxes[keep], conf_all[keep], cls_all[keep]
283
+ if len(boxes) == 0:
284
+ return []
285
+
286
+ # 3. Sane filter (geometric)
287
+ boxes, scores, cls = self._sane_filter(boxes, scores, cls, orig_h, orig_w)
288
+ if len(boxes) == 0:
289
+ return []
290
+
291
+ # Keep raw cluster for boost (before any dedup)
292
+ raw_boxes, raw_scores, raw_cls = boxes.copy(), scores.copy(), cls.copy()
293
+
294
+ # 4. Per-class hard NMS
295
+ keep_idx: list[int] = []
296
+ for c in np.unique(cls):
297
  m = cls == c
298
+ mi = np.where(m)[0]
299
+ k = self._hard_nms(boxes[m], scores[m], self.iou_thres)
300
+ keep_idx.extend(mi[k].tolist())
301
+ keep_idx.sort()
302
+ ki = np.array(keep_idx, dtype=np.intp)
303
+ boxes, scores, cls = boxes[ki], scores[ki], cls[ki]
304
+
305
+ # 5. Containment dedup (drop a box mostly inside same-class bigger box)
306
+ boxes, scores, cls = self._containment_dedup(boxes, scores, cls)
307
+
308
+ # 6. Cross-class dedup (one object → one class only)
309
+ boxes, scores, cls = self._cross_class_dedup(boxes, scores, cls)
310
+
311
+ # 7. Cluster-boost confidence (TTA aggregation)
312
+ if len(boxes):
313
+ boosted = self._cluster_boost(boxes, cls, raw_boxes, raw_scores, raw_cls)
314
+ else:
315
+ boosted = scores
316
+
317
+ # 8. Cap at max_det
318
+ if len(boxes) > self.max_det:
319
+ top = np.argsort(-boosted)[: self.max_det]
320
+ boxes, cls, boosted = boxes[top], cls[top], boosted[top]
321
+
322
+ out: list[BoundingBox] = []
323
+ for (x1, y1, x2, y2), c, s in zip(boxes, cls, boosted):
324
+ if x2 <= x1 or y2 <= y1:
325
  continue
326
+ out.append(BoundingBox(
327
+ x1=int(x1), y1=int(y1), x2=int(x2), y2=int(y2),
328
+ cls_id=int(c), conf=float(min(1.0, max(0.0, s))),
329
+ ))
 
 
 
 
 
 
 
 
 
 
 
330
  return out
331
 
332
  def predict_batch(
 
340
  try:
341
  boxes = self._detect(np.ascontiguousarray(img))
342
  except Exception as e: # never crash the chute
343
+ print(f"⚠️ v9 frame {offset + i} detect error: {e}")
344
  boxes = []
345
  results.append(TVFrameResult(
346
  frame_id=offset + i, boxes=boxes,