baxtos commited on
Commit
cd7f036
·
verified ·
1 Parent(s): 909bc91

scorevision: push artifact

Browse files
Files changed (1) hide show
  1. miner.py +52 -31
miner.py CHANGED
@@ -1,14 +1,17 @@
1
- """Open-source Detect-beverage miner v9 (post-proc upgrade, weights unchanged).
2
 
3
- Same ONNX weights as v8 (yolo11s fp16, mAP50 0.835 on holdout). Post-proc
4
- synthesised from the three strongest current peers:
 
 
5
 
6
- - per-class conf + can-rescue bonus (navierstocks/drink @98280af6)
7
- - sane-box geometric filter (drink + yevheniiapopova)
8
- - containment dedup same-class (yevheniiapopova @f3becc13)
9
- - cross-class dedup high-IoU (drink)
10
- - INTER_CUBIC on upsample letterbox (drink + tensorminer)
11
- - TTA flip + cluster-boost conf (drink)
 
12
 
13
  Contract: class `Miner` at HF root, `predict_batch(...) -> list[TVFrameResult]`.
14
  """
@@ -44,23 +47,20 @@ class Miner:
44
  input_size = 1280
45
  num_classes = 3 # cup, bottle, can
46
 
47
- # per-class conf (swept on validator-pseudo holdout 73 imgs against v10 weights,
48
- # peak UI 79.28%): cup/bottle moderate (model is more accurate now), can softer + rescue.
49
- conf_thres = np.array([0.55, 0.55, 0.45], dtype=np.float32)
50
- # per-class rescue bonus: if no boxes of class c pass conf, admit its top-1
51
- # candidate when conf >= conf_thres[c] - bonus[c]. Only `can` (was 7/12 of
52
- # our misses on common challenges with lead).
53
- rescue_bonus = np.array([0.0, 0.0, 0.20], dtype=np.float32)
54
 
55
- iou_thres = 0.40 # per-class NMS (was 0.55)
56
  cross_iou_thres = 0.70 # cross-class dedup
57
- containment_thres = 1.00 # OFF for v10 (better recall without)
58
 
59
- min_box_area = 100.0 # was 36 (5 of 20 our FPs <400px²)
60
  min_side = 8.0
61
  max_aspect_ratio = 10.0
62
- max_det = 100
63
- use_flip_tta = True
64
 
65
  def __init__(self, path_hf_repo: Path) -> None:
66
  so = ort.SessionOptions()
@@ -77,19 +77,23 @@ class Miner:
77
  _ort_type = self.sess.get_inputs()[0].type # "tensor(float16)" or fp32
78
  self.np_dtype = np.float16 if "float16" in _ort_type else np.float32
79
  active = self.sess.get_providers()[0]
80
- print(f"✅ v9 ONNX beverage model loaded (provider={active}, dtype={self.np_dtype.__name__})")
81
 
82
- # Eager CUDA EP allocation — same trick as v8: ORT lazily binds CUDA on
83
- # first sess.run, TEE cold-bind eats 30-300s otherwise.
 
 
 
 
84
  try:
85
  dummy = np.zeros((self.input_size, self.input_size, 3), dtype=np.uint8)
86
  _ = self._infer(dummy)
87
- print(f"✅ v9 ONNX warmup pass completed (provider={active})")
88
  except Exception as e:
89
- print(f"⚠️ v9 ONNX warmup pass failed (not fatal): {e}")
90
 
91
  def __repr__(self) -> str:
92
- return f"BeverageONNXv9(in={self.input_size}, cls={self.num_classes})"
93
 
94
  # ---- preprocessing --------------------------------------------------
95
  def _letterbox(self, im: ndarray) -> tuple[ndarray, float]:
@@ -107,17 +111,34 @@ class Miner:
107
  lb, s = self._letterbox(im_bgr)
108
  x = (lb[:, :, ::-1].transpose(2, 0, 1)[None].astype(np.float32) / 255.0
109
  ).astype(self.np_dtype)
110
- out = self.sess.run(None, {self.inp: x})[0][0] # (4+nc, N) or (N, 4+nc)
111
- out = np.asarray(out, dtype=np.float32)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  p = out.T if out.shape[0] < out.shape[1] else out # → (N, 4+nc)
113
  boxes = p[:, :4].copy()
114
  scores = p[:, 4:4 + self.num_classes]
115
- # xywh(center) → xyxy in original image coords
116
  xy = boxes[:, :2]
117
  wh = boxes[:, 2:4]
118
  x1y1 = (xy - wh / 2) / s
119
  x2y2 = (xy + wh / 2) / s
120
- return np.concatenate([x1y1, x2y2, scores], axis=1) # (N, 4+nc)
121
 
122
  # ---- post-processing primitives -------------------------------------
123
  @staticmethod
 
1
+ """Open-source Detect-beverage miner v12 (new yolo11s weights + NMS-baked ONNX).
2
 
3
+ New v12 ONNX weights: trained on combined dataset (375 fresh cross-consensus
4
+ pseudo-GT from top 3 Beverage miners + 279 prior validator-pseudo). NMS-baked
5
+ export — output shape `[1, 300, 6]` (xyxy, conf, cls). On holdout 73 imgs:
6
+ ultralytics-val mAP50=0.879 (v8: 0.835).
7
 
8
+ Post-proc:
9
+ - detect NMS-baked output and unpack to (N, 4+num_classes) one-hot scores
10
+ - per-class conf filter `[0.60, 0.40, 0.60]` (best from sweep on v12 ONNX)
11
+ - sane-box geometric filter (min_box_area=100, max_aspect_ratio=10)
12
+ - per-class hard NMS @ iou=0.4 (redundant after baked NMS but safe)
13
+ - cross-class dedup @ iou=0.7
14
+ - TTA off (sweep showed flip-TTA + cluster-boost hurt UI on NMS-baked output)
15
 
16
  Contract: class `Miner` at HF root, `predict_batch(...) -> list[TVFrameResult]`.
17
  """
 
47
  input_size = 1280
48
  num_classes = 3 # cup, bottle, can
49
 
50
+ # per-class conf best v12 sweep without TTA (UI 80.32%, FP 0.832):
51
+ conf_thres = np.array([0.60, 0.40, 0.60], dtype=np.float32)
52
+ # rescue bonus disabled v12 model strong enough not to need it
53
+ rescue_bonus = np.array([0.0, 0.0, 0.0], dtype=np.float32)
 
 
 
54
 
55
+ iou_thres = 0.40 # per-class NMS (redundant after baked-NMS but safe)
56
  cross_iou_thres = 0.70 # cross-class dedup
57
+ containment_thres = 1.00 # OFF
58
 
59
+ min_box_area = 100.0
60
  min_side = 8.0
61
  max_aspect_ratio = 10.0
62
+ max_det = 300 # match NMS-baked graph max_det
63
+ use_flip_tta = False # flip-TTA hurt UI on NMS-baked v12 (sweep -0.8 pp)
64
 
65
  def __init__(self, path_hf_repo: Path) -> None:
66
  so = ort.SessionOptions()
 
77
  _ort_type = self.sess.get_inputs()[0].type # "tensor(float16)" or fp32
78
  self.np_dtype = np.float16 if "float16" in _ort_type else np.float32
79
  active = self.sess.get_providers()[0]
80
+ print(f"✅ v12 ONNX beverage model loaded (provider={active}, dtype={self.np_dtype.__name__})")
81
 
82
+ # Detect output format once
83
+ out0 = self.sess.get_outputs()[0]
84
+ print(f"ONNX output: name={out0.name} shape={out0.shape}")
85
+
86
+ # Eager CUDA EP allocation: ORT lazily binds CUDA on first sess.run,
87
+ # TEE cold-bind eats 30-300s otherwise.
88
  try:
89
  dummy = np.zeros((self.input_size, self.input_size, 3), dtype=np.uint8)
90
  _ = self._infer(dummy)
91
+ print(f"✅ v12 ONNX warmup pass completed (provider={active})")
92
  except Exception as e:
93
+ print(f"⚠️ v12 ONNX warmup pass failed (not fatal): {e}")
94
 
95
  def __repr__(self) -> str:
96
+ return f"BeverageONNXv12(in={self.input_size}, cls={self.num_classes})"
97
 
98
  # ---- preprocessing --------------------------------------------------
99
  def _letterbox(self, im: ndarray) -> tuple[ndarray, float]:
 
111
  lb, s = self._letterbox(im_bgr)
112
  x = (lb[:, :, ::-1].transpose(2, 0, 1)[None].astype(np.float32) / 255.0
113
  ).astype(self.np_dtype)
114
+ raw = self.sess.run(None, {self.inp: x})[0]
115
+ raw = np.asarray(raw, dtype=np.float32)
116
+
117
+ # NMS-baked output: [1, N, 6] = (x1, y1, x2, y2, conf, cls)
118
+ if raw.ndim == 3 and raw.shape[-1] == 6:
119
+ arr = raw[0]
120
+ keep = arr[:, 4] > 0 # drop zero-padding rows
121
+ arr = arr[keep]
122
+ if len(arr) == 0:
123
+ return np.zeros((0, 4 + self.num_classes), dtype=np.float32)
124
+ boxes = arr[:, :4].copy() / s # letterbox → orig coords
125
+ confs = arr[:, 4]
126
+ cls_ids = arr[:, 5].astype(np.int32)
127
+ cls_ids = np.clip(cls_ids, 0, self.num_classes - 1)
128
+ scores = np.zeros((len(arr), self.num_classes), dtype=np.float32)
129
+ scores[np.arange(len(arr)), cls_ids] = confs
130
+ return np.concatenate([boxes, scores], axis=1)
131
+
132
+ # Legacy raw YOLO output: [1, 4+nc, N] or [1, N, 4+nc] (xywh-center)
133
+ out = raw[0]
134
  p = out.T if out.shape[0] < out.shape[1] else out # → (N, 4+nc)
135
  boxes = p[:, :4].copy()
136
  scores = p[:, 4:4 + self.num_classes]
 
137
  xy = boxes[:, :2]
138
  wh = boxes[:, 2:4]
139
  x1y1 = (xy - wh / 2) / s
140
  x2y2 = (xy + wh / 2) / s
141
+ return np.concatenate([x1y1, x2y2, scores], axis=1)
142
 
143
  # ---- post-processing primitives -------------------------------------
144
  @staticmethod