scorevision: push artifact
Browse files
miner.py
CHANGED
|
@@ -1,14 +1,17 @@
|
|
| 1 |
-
"""Open-source Detect-beverage miner
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
-
|
| 7 |
-
-
|
| 8 |
-
-
|
| 9 |
-
-
|
| 10 |
-
-
|
| 11 |
-
-
|
|
|
|
| 12 |
|
| 13 |
Contract: class `Miner` at HF root, `predict_batch(...) -> list[TVFrameResult]`.
|
| 14 |
"""
|
|
@@ -44,23 +47,20 @@ class Miner:
|
|
| 44 |
input_size = 1280
|
| 45 |
num_classes = 3 # cup, bottle, can
|
| 46 |
|
| 47 |
-
# per-class conf
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
# candidate when conf >= conf_thres[c] - bonus[c]. Only `can` (was 7/12 of
|
| 52 |
-
# our misses on common challenges with lead).
|
| 53 |
-
rescue_bonus = np.array([0.0, 0.0, 0.20], dtype=np.float32)
|
| 54 |
|
| 55 |
-
iou_thres = 0.40 # per-class NMS (
|
| 56 |
cross_iou_thres = 0.70 # cross-class dedup
|
| 57 |
-
containment_thres = 1.00 # OFF
|
| 58 |
|
| 59 |
-
min_box_area = 100.0
|
| 60 |
min_side = 8.0
|
| 61 |
max_aspect_ratio = 10.0
|
| 62 |
-
max_det =
|
| 63 |
-
use_flip_tta =
|
| 64 |
|
| 65 |
def __init__(self, path_hf_repo: Path) -> None:
|
| 66 |
so = ort.SessionOptions()
|
|
@@ -77,19 +77,23 @@ class Miner:
|
|
| 77 |
_ort_type = self.sess.get_inputs()[0].type # "tensor(float16)" or fp32
|
| 78 |
self.np_dtype = np.float16 if "float16" in _ort_type else np.float32
|
| 79 |
active = self.sess.get_providers()[0]
|
| 80 |
-
print(f"✅
|
| 81 |
|
| 82 |
-
#
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
try:
|
| 85 |
dummy = np.zeros((self.input_size, self.input_size, 3), dtype=np.uint8)
|
| 86 |
_ = self._infer(dummy)
|
| 87 |
-
print(f"✅
|
| 88 |
except Exception as e:
|
| 89 |
-
print(f"⚠️
|
| 90 |
|
| 91 |
def __repr__(self) -> str:
|
| 92 |
-
return f"
|
| 93 |
|
| 94 |
# ---- preprocessing --------------------------------------------------
|
| 95 |
def _letterbox(self, im: ndarray) -> tuple[ndarray, float]:
|
|
@@ -107,17 +111,34 @@ class Miner:
|
|
| 107 |
lb, s = self._letterbox(im_bgr)
|
| 108 |
x = (lb[:, :, ::-1].transpose(2, 0, 1)[None].astype(np.float32) / 255.0
|
| 109 |
).astype(self.np_dtype)
|
| 110 |
-
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
p = out.T if out.shape[0] < out.shape[1] else out # → (N, 4+nc)
|
| 113 |
boxes = p[:, :4].copy()
|
| 114 |
scores = p[:, 4:4 + self.num_classes]
|
| 115 |
-
# xywh(center) → xyxy in original image coords
|
| 116 |
xy = boxes[:, :2]
|
| 117 |
wh = boxes[:, 2:4]
|
| 118 |
x1y1 = (xy - wh / 2) / s
|
| 119 |
x2y2 = (xy + wh / 2) / s
|
| 120 |
-
return np.concatenate([x1y1, x2y2, scores], axis=1)
|
| 121 |
|
| 122 |
# ---- post-processing primitives -------------------------------------
|
| 123 |
@staticmethod
|
|
|
|
| 1 |
+
"""Open-source Detect-beverage miner v12 (new yolo11s weights + NMS-baked ONNX).
|
| 2 |
|
| 3 |
+
New v12 ONNX weights: trained on combined dataset (375 fresh cross-consensus
|
| 4 |
+
pseudo-GT from top 3 Beverage miners + 279 prior validator-pseudo). NMS-baked
|
| 5 |
+
export — output shape `[1, 300, 6]` (xyxy, conf, cls). On holdout 73 imgs:
|
| 6 |
+
ultralytics-val mAP50=0.879 (v8: 0.835).
|
| 7 |
|
| 8 |
+
Post-proc:
|
| 9 |
+
- detect NMS-baked output and unpack to (N, 4+num_classes) one-hot scores
|
| 10 |
+
- per-class conf filter `[0.60, 0.40, 0.60]` (best from sweep on v12 ONNX)
|
| 11 |
+
- sane-box geometric filter (min_box_area=100, max_aspect_ratio=10)
|
| 12 |
+
- per-class hard NMS @ iou=0.4 (redundant after baked NMS but safe)
|
| 13 |
+
- cross-class dedup @ iou=0.7
|
| 14 |
+
- TTA off (sweep showed flip-TTA + cluster-boost hurt UI on NMS-baked output)
|
| 15 |
|
| 16 |
Contract: class `Miner` at HF root, `predict_batch(...) -> list[TVFrameResult]`.
|
| 17 |
"""
|
|
|
|
| 47 |
input_size = 1280
|
| 48 |
num_classes = 3 # cup, bottle, can
|
| 49 |
|
| 50 |
+
# per-class conf — best v12 sweep without TTA (UI 80.32%, FP 0.832):
|
| 51 |
+
conf_thres = np.array([0.60, 0.40, 0.60], dtype=np.float32)
|
| 52 |
+
# rescue bonus disabled — v12 model strong enough not to need it
|
| 53 |
+
rescue_bonus = np.array([0.0, 0.0, 0.0], dtype=np.float32)
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
+
iou_thres = 0.40 # per-class NMS (redundant after baked-NMS but safe)
|
| 56 |
cross_iou_thres = 0.70 # cross-class dedup
|
| 57 |
+
containment_thres = 1.00 # OFF
|
| 58 |
|
| 59 |
+
min_box_area = 100.0
|
| 60 |
min_side = 8.0
|
| 61 |
max_aspect_ratio = 10.0
|
| 62 |
+
max_det = 300 # match NMS-baked graph max_det
|
| 63 |
+
use_flip_tta = False # flip-TTA hurt UI on NMS-baked v12 (sweep -0.8 pp)
|
| 64 |
|
| 65 |
def __init__(self, path_hf_repo: Path) -> None:
|
| 66 |
so = ort.SessionOptions()
|
|
|
|
| 77 |
_ort_type = self.sess.get_inputs()[0].type # "tensor(float16)" or fp32
|
| 78 |
self.np_dtype = np.float16 if "float16" in _ort_type else np.float32
|
| 79 |
active = self.sess.get_providers()[0]
|
| 80 |
+
print(f"✅ v12 ONNX beverage model loaded (provider={active}, dtype={self.np_dtype.__name__})")
|
| 81 |
|
| 82 |
+
# Detect output format once
|
| 83 |
+
out0 = self.sess.get_outputs()[0]
|
| 84 |
+
print(f"ONNX output: name={out0.name} shape={out0.shape}")
|
| 85 |
+
|
| 86 |
+
# Eager CUDA EP allocation: ORT lazily binds CUDA on first sess.run,
|
| 87 |
+
# TEE cold-bind eats 30-300s otherwise.
|
| 88 |
try:
|
| 89 |
dummy = np.zeros((self.input_size, self.input_size, 3), dtype=np.uint8)
|
| 90 |
_ = self._infer(dummy)
|
| 91 |
+
print(f"✅ v12 ONNX warmup pass completed (provider={active})")
|
| 92 |
except Exception as e:
|
| 93 |
+
print(f"⚠️ v12 ONNX warmup pass failed (not fatal): {e}")
|
| 94 |
|
| 95 |
def __repr__(self) -> str:
|
| 96 |
+
return f"BeverageONNXv12(in={self.input_size}, cls={self.num_classes})"
|
| 97 |
|
| 98 |
# ---- preprocessing --------------------------------------------------
|
| 99 |
def _letterbox(self, im: ndarray) -> tuple[ndarray, float]:
|
|
|
|
| 111 |
lb, s = self._letterbox(im_bgr)
|
| 112 |
x = (lb[:, :, ::-1].transpose(2, 0, 1)[None].astype(np.float32) / 255.0
|
| 113 |
).astype(self.np_dtype)
|
| 114 |
+
raw = self.sess.run(None, {self.inp: x})[0]
|
| 115 |
+
raw = np.asarray(raw, dtype=np.float32)
|
| 116 |
+
|
| 117 |
+
# NMS-baked output: [1, N, 6] = (x1, y1, x2, y2, conf, cls)
|
| 118 |
+
if raw.ndim == 3 and raw.shape[-1] == 6:
|
| 119 |
+
arr = raw[0]
|
| 120 |
+
keep = arr[:, 4] > 0 # drop zero-padding rows
|
| 121 |
+
arr = arr[keep]
|
| 122 |
+
if len(arr) == 0:
|
| 123 |
+
return np.zeros((0, 4 + self.num_classes), dtype=np.float32)
|
| 124 |
+
boxes = arr[:, :4].copy() / s # letterbox → orig coords
|
| 125 |
+
confs = arr[:, 4]
|
| 126 |
+
cls_ids = arr[:, 5].astype(np.int32)
|
| 127 |
+
cls_ids = np.clip(cls_ids, 0, self.num_classes - 1)
|
| 128 |
+
scores = np.zeros((len(arr), self.num_classes), dtype=np.float32)
|
| 129 |
+
scores[np.arange(len(arr)), cls_ids] = confs
|
| 130 |
+
return np.concatenate([boxes, scores], axis=1)
|
| 131 |
+
|
| 132 |
+
# Legacy raw YOLO output: [1, 4+nc, N] or [1, N, 4+nc] (xywh-center)
|
| 133 |
+
out = raw[0]
|
| 134 |
p = out.T if out.shape[0] < out.shape[1] else out # → (N, 4+nc)
|
| 135 |
boxes = p[:, :4].copy()
|
| 136 |
scores = p[:, 4:4 + self.num_classes]
|
|
|
|
| 137 |
xy = boxes[:, :2]
|
| 138 |
wh = boxes[:, 2:4]
|
| 139 |
x1y1 = (xy - wh / 2) / s
|
| 140 |
x2y2 = (xy + wh / 2) / s
|
| 141 |
+
return np.concatenate([x1y1, x2y2, scores], axis=1)
|
| 142 |
|
| 143 |
# ---- post-processing primitives -------------------------------------
|
| 144 |
@staticmethod
|