licensy commited on
Commit
ae42c18
·
verified ·
1 Parent(s): 2f8bd3c

scorevision: push artifact

Browse files
Files changed (1) hide show
  1. miner.py +304 -130
miner.py CHANGED
@@ -1,17 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from pathlib import Path
2
  import math
3
- import os
4
- import glob
5
- import site
6
- import ctypes
7
-
8
- # Preload pip-installed NVIDIA cuDNN so onnxruntime can use CUDAExecutionProvider
9
- for sp in site.getsitepackages():
10
- for d in glob.glob(os.path.join(sp, 'nvidia', '*', 'lib')):
11
- os.environ['LD_LIBRARY_PATH'] = d + ':' + os.environ.get('LD_LIBRARY_PATH', '')
12
- _cudnn = os.path.join(d, 'libcudnn.so.9')
13
- if os.path.exists(_cudnn):
14
- ctypes.CDLL(_cudnn, mode=ctypes.RTLD_GLOBAL)
15
 
16
  import cv2
17
  import numpy as np
@@ -35,136 +40,305 @@ class TVFrameResult(BaseModel):
35
  keypoints: list[tuple[int, int]]
36
 
37
 
 
 
 
38
  class Miner:
39
  def __init__(self, path_hf_repo: Path) -> None:
40
- self.path_hf_repo = path_hf_repo
41
- self.class_names = ['numberplate']
42
- self.session = ort.InferenceSession(
43
- str(path_hf_repo / "weights.onnx"),
44
- providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
45
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  self.input_name = self.session.get_inputs()[0].name
47
- input_shape = self.session.get_inputs()[0].shape
48
- self.input_size = int(input_shape[2]) # 1280
49
- self.conf_threshold = 0.20
50
- self.iou_threshold = 0.5
51
- self.tile_overlap = 0.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  def __repr__(self) -> str:
54
- return f"ONNX Miner session={type(self.session).__name__} classes={len(self.class_names)}"
55
-
56
- def _letterbox(self, img: ndarray) -> tuple[ndarray, float, int, int]:
57
- h, w = img.shape[:2]
58
- r = min(self.input_size / h, self.input_size / w)
59
- nw, nh = int(w * r), int(h * r)
60
- resized = cv2.resize(img, (nw, nh), interpolation=cv2.INTER_LINEAR)
61
- canvas = np.full((self.input_size, self.input_size, 3), 114, dtype=np.uint8)
62
- dx = (self.input_size - nw) // 2
63
- dy = (self.input_size - nh) // 2
64
- canvas[dy:dy+nh, dx:dx+nw] = resized
65
- return canvas, r, dx, dy
66
-
67
- def _run_single(self, img: ndarray) -> list[tuple[float, float, float, float, float]]:
68
- h, w = img.shape[:2]
69
- canvas, r, dx, dy = self._letterbox(img)
70
- blob = (canvas.astype(np.float32) / 255.0).transpose(2, 0, 1)[np.newaxis]
71
- out = self.session.run(None, {self.input_name: blob})[0][0]
72
- dets = []
73
- for row in out:
74
- x1, y1, x2, y2, conf, cls = row
75
- if conf < self.conf_threshold:
76
- continue
77
- dets.append((
78
- float(conf),
79
- (x1 - dx) / r,
80
- (y1 - dy) / r,
81
- (x2 - dx) / r,
82
- (y2 - dy) / r,
83
- ))
84
- return dets
85
-
86
- def _nms(self, dets: list[tuple[float, float, float, float, float]]) -> list[tuple[float, float, float, float, float]]:
87
- if not dets:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  return []
89
- dets.sort(key=lambda x: -x[0])
90
- keep = []
91
- used = [False] * len(dets)
92
- for i in range(len(dets)):
93
- if used[i]:
94
- continue
95
- keep.append(dets[i])
96
- for j in range(i + 1, len(dets)):
97
- if used[j]:
98
- continue
99
- # compute IoU
100
- ax1, ay1, ax2, ay2 = dets[i][1], dets[i][2], dets[i][3], dets[i][4]
101
- bx1, by1, bx2, by2 = dets[j][1], dets[j][2], dets[j][3], dets[j][4]
102
- ix1 = max(ax1, bx1); iy1 = max(ay1, by1)
103
- ix2 = min(ax2, bx2); iy2 = min(ay2, by2)
104
- inter = max(0, ix2-ix1) * max(0, iy2-iy1)
105
- aa = (ax2-ax1)*(ay2-ay1); bb = (bx2-bx1)*(by2-by1)
106
- iou = inter / (aa + bb - inter + 1e-6)
107
- if iou > self.iou_threshold:
108
- used[j] = True
109
- return keep
110
-
111
- def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
112
- orig_h, orig_w = image_bgr.shape[:2]
113
- all_dets = []
114
-
115
- # Full image pass
116
- all_dets.extend(self._run_single(image_bgr))
117
-
118
- # 2x2 tile passes
119
- tw = orig_w // 2
120
- th = orig_h // 2
121
- ox = int(tw * self.tile_overlap)
122
- oy = int(th * self.tile_overlap)
123
- tiles = [
124
- (0, 0, tw + ox, th + oy),
125
- (tw - ox, 0, orig_w, th + oy),
126
- (0, th - oy, tw + ox, orig_h),
127
- (tw - ox, th - oy, orig_w, orig_h),
128
- ]
129
- for tx1, ty1, tx2, ty2 in tiles:
130
- tx1 = max(0, tx1); ty1 = max(0, ty1)
131
- tx2 = min(orig_w, tx2); ty2 = min(orig_h, ty2)
132
- crop = image_bgr[ty1:ty2, tx1:tx2]
133
- tile_dets = self._run_single(crop)
134
- for conf, x1, y1, x2, y2 in tile_dets:
135
- all_dets.append((conf, x1 + tx1, y1 + ty1, x2 + tx1, y2 + ty1))
136
-
137
- # NMS to merge overlapping detections
138
- all_dets = self._nms(all_dets)
139
-
140
- out_boxes = []
141
- for conf, x1, y1, x2, y2 in all_dets:
142
- bx1 = max(0, min(orig_w, math.floor(x1)))
143
- by1 = max(0, min(orig_h, math.floor(y1)))
144
- bx2 = max(0, min(orig_w, math.ceil(x2)))
145
- by2 = max(0, min(orig_h, math.ceil(y2)))
146
- bw = bx2 - bx1
147
- bh = by2 - by1
148
- if bw < 6 or bh < 6 or bw * bh < 80:
149
- continue
150
- if max(bw / max(bh, 1), bh / max(bw, 1)) > 10:
151
  continue
152
- out_boxes.append(
153
  BoundingBox(
154
- x1=bx1, y1=by1, x2=bx2, y2=by2,
 
 
 
155
  cls_id=0,
156
- conf=max(0.0, min(1.0, conf)),
157
  )
158
  )
159
- return out_boxes
160
 
 
161
  def predict_batch(
162
- self, batch_images: list[ndarray], offset: int, n_keypoints: int,
 
 
 
163
  ) -> list[TVFrameResult]:
164
- results = []
165
- for idx, image in enumerate(batch_images):
166
- boxes = self._infer_single(image)
167
- keypoints = [(0, 0) for _ in range(max(0, int(n_keypoints)))]
168
- results.append(TVFrameResult(
169
- frame_id=offset + idx, boxes=boxes, keypoints=keypoints))
 
 
 
 
 
 
 
 
170
  return results
 
1
+ """Plate-detection miner — v3 "plate_v3 + tight softnms".
2
+
3
+ Base weights: plate_v3 (YOLO26s fine-tuned on Roboflow-filtered + 10x live pseudo-GT,
4
+ resumed from plate_v2). fp16 end2end ONNX, static 1x3x1280x1280, ~19.4 MB.
5
+
6
+ Inference pipeline (tuned per bench_v2.py on 184-shard pool):
7
+ - Single full-image pass with soft-NMS + hflip TTA
8
+ - Tight preset: conf=0.30, iou=0.45, sigma=0.5, max_det=16
9
+ - No tile fallback (v3's mAP=0.973 is already high enough; tiles only add FPs)
10
+
11
+ Bench on 184-shard live pseudo-GT pool (/mnt/shadeform-data/plate_research/live_gt/):
12
+ gated=0.441 mAP=0.973 fp/img=0.29 ms_med=152 ms_p95=161
13
+ Compared to:
14
+ plate_v2 best: gated=0.424
15
+ hermestech best: gated=0.422
16
+ 5GRAm best: gated=0.401
17
+ """
18
  from pathlib import Path
19
  import math
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  import cv2
22
  import numpy as np
 
40
  keypoints: list[tuple[int, int]]
41
 
42
 
43
+ SIZE = 1280
44
+
45
+
46
  class Miner:
47
  def __init__(self, path_hf_repo: Path) -> None:
48
+ model_path = path_hf_repo / "weights.onnx"
49
+ cn_path = model_path.with_name("class_names.txt")
50
+ if cn_path.is_file():
51
+ lines = cn_path.read_text(encoding="utf-8").splitlines()
52
+ self.class_names = [
53
+ ln.strip()
54
+ for ln in lines
55
+ if ln.strip() and not ln.strip().startswith("#")
56
+ ]
57
+ else:
58
+ self.class_names = ["numberplate"]
59
+ print("ORT version:", ort.__version__)
60
+
61
+ try:
62
+ ort.preload_dlls()
63
+ print("onnxruntime.preload_dlls() success")
64
+ except Exception as e:
65
+ print(f"preload_dlls failed: {e}")
66
+
67
+ print("ORT available providers BEFORE session:", ort.get_available_providers())
68
+
69
+ try:
70
+ import torch
71
+ if torch.cuda.is_available():
72
+ print(f"GPU: {torch.cuda.get_device_name(0)}")
73
+ print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
74
+ else:
75
+ print("GPU: CUDA not available via torch")
76
+ except Exception as e:
77
+ print(f"GPU detection failed: {e}")
78
+
79
+ sess_options = ort.SessionOptions()
80
+ sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
81
+
82
+ try:
83
+ self.session = ort.InferenceSession(
84
+ str(model_path),
85
+ sess_options=sess_options,
86
+ providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
87
+ )
88
+ print("Created ORT session with preferred CUDA provider list")
89
+ except Exception as e:
90
+ print(f"CUDA session creation failed, falling back to CPU: {e}")
91
+ self.session = ort.InferenceSession(
92
+ str(model_path),
93
+ sess_options=sess_options,
94
+ providers=["CPUExecutionProvider"],
95
+ )
96
+
97
+ print("ORT session providers:", self.session.get_providers())
98
+
99
+ for inp in self.session.get_inputs():
100
+ print("INPUT:", inp.name, inp.shape, inp.type)
101
+ for out in self.session.get_outputs():
102
+ print("OUTPUT:", out.name, out.shape, out.type)
103
+
104
  self.input_name = self.session.get_inputs()[0].name
105
+ self.output_names = [o.name for o in self.session.get_outputs()]
106
+ self.input_shape = self.session.get_inputs()[0].shape
107
+ # plate_v3 export is fp16 static [1,3,1280,1280]
108
+ self.input_dtype = (
109
+ np.float16
110
+ if "float16" in self.session.get_inputs()[0].type
111
+ else np.float32
112
+ )
113
+
114
+ self.input_height = self._safe_dim(self.input_shape[2], default=SIZE)
115
+ self.input_width = self._safe_dim(self.input_shape[3], default=SIZE)
116
+
117
+ # Tuned preset for plate_v3 (from bench_v2.py, 184-shard live pool).
118
+ # Best gated=0.441 AND lowest fp/img=0.29 AND tight ms_p95=161.
119
+ self.conf_thres = 0.30
120
+ self.iou_thres = 0.45
121
+ self.sigma = 0.5
122
+ self.max_det = 16
123
+ self.use_tta = True
124
+
125
+ print(f"ONNX model loaded from: {model_path}")
126
+ print(f"ONNX providers: {self.session.get_providers()}")
127
+ print(f"ONNX input: name={self.input_name}, shape={self.input_shape}, dtype={self.input_dtype}")
128
+ print(f"Preset: conf={self.conf_thres} iou={self.iou_thres} sigma={self.sigma} max_det={self.max_det}")
129
 
130
  def __repr__(self) -> str:
131
+ return (
132
+ f"ONNXRuntime(session={type(self.session).__name__}, "
133
+ f"providers={self.session.get_providers()})"
134
+ )
135
+
136
+ @staticmethod
137
+ def _safe_dim(value, default: int) -> int:
138
+ return value if isinstance(value, int) and value > 0 else default
139
+
140
+ # ---------- image preprocessing ----------
141
+ def _letterbox(
142
+ self,
143
+ image: ndarray,
144
+ new_shape: tuple[int, int],
145
+ color=(114, 114, 114),
146
+ ) -> tuple[ndarray, float, tuple[float, float]]:
147
+ h, w = image.shape[:2]
148
+ new_w, new_h = new_shape
149
+ ratio = min(new_w / w, new_h / h)
150
+ resized_w = int(round(w * ratio))
151
+ resized_h = int(round(h * ratio))
152
+ if (resized_w, resized_h) != (w, h):
153
+ interp = cv2.INTER_CUBIC if ratio > 1.0 else cv2.INTER_LINEAR
154
+ image = cv2.resize(image, (resized_w, resized_h), interpolation=interp)
155
+ dw = (new_w - resized_w) / 2.0
156
+ dh = (new_h - resized_h) / 2.0
157
+ left = int(round(dw - 0.1))
158
+ right = int(round(dw + 0.1))
159
+ top = int(round(dh - 0.1))
160
+ bottom = int(round(dh + 0.1))
161
+ padded = cv2.copyMakeBorder(
162
+ image, top, bottom, left, right,
163
+ borderType=cv2.BORDER_CONSTANT, value=color,
164
+ )
165
+ return padded, ratio, (dw, dh)
166
+
167
+ def _preprocess(self, image: ndarray):
168
+ img, ratio, pad = self._letterbox(image, (self.input_width, self.input_height))
169
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
170
+ img = np.transpose(img, (2, 0, 1))[None, ...]
171
+ return np.ascontiguousarray(img, dtype=self.input_dtype), ratio, pad
172
+
173
+ @staticmethod
174
+ def _clip_boxes(boxes: np.ndarray, image_size: tuple[int, int]) -> np.ndarray:
175
+ w, h = image_size
176
+ boxes[:, 0] = np.clip(boxes[:, 0], 0, w - 1)
177
+ boxes[:, 1] = np.clip(boxes[:, 1], 0, h - 1)
178
+ boxes[:, 2] = np.clip(boxes[:, 2], 0, w - 1)
179
+ boxes[:, 3] = np.clip(boxes[:, 3], 0, h - 1)
180
+ return boxes
181
+
182
+ # ---------- NMS primitives ----------
183
+ @staticmethod
184
+ def _hard_nms(boxes: np.ndarray, scores: np.ndarray, iou_thresh: float) -> np.ndarray:
185
+ N = len(boxes)
186
+ if N == 0:
187
+ return np.array([], dtype=np.intp)
188
+ boxes = np.asarray(boxes, dtype=np.float32)
189
+ scores = np.asarray(scores, dtype=np.float32)
190
+ order = np.argsort(-scores)
191
+ keep: list[int] = []
192
+ while len(order):
193
+ i = int(order[0])
194
+ keep.append(i)
195
+ if len(order) == 1:
196
+ break
197
+ rest = order[1:]
198
+ xx1 = np.maximum(boxes[i, 0], boxes[rest, 0])
199
+ yy1 = np.maximum(boxes[i, 1], boxes[rest, 1])
200
+ xx2 = np.minimum(boxes[i, 2], boxes[rest, 2])
201
+ yy2 = np.minimum(boxes[i, 3], boxes[rest, 3])
202
+ inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
203
+ area_i = (boxes[i, 2] - boxes[i, 0]) * (boxes[i, 3] - boxes[i, 1])
204
+ area_r = (boxes[rest, 2] - boxes[rest, 0]) * (boxes[rest, 3] - boxes[rest, 1])
205
+ iou = inter / (area_i + area_r - inter + 1e-7)
206
+ order = rest[iou <= iou_thresh]
207
+ return np.array(keep, dtype=np.intp)
208
+
209
+ def _soft_nms(
210
+ self,
211
+ boxes: np.ndarray,
212
+ scores: np.ndarray,
213
+ sigma: float,
214
+ score_thresh: float = 0.01,
215
+ ) -> tuple[np.ndarray, np.ndarray]:
216
+ N = len(boxes)
217
+ if N == 0:
218
+ return np.array([], dtype=np.intp), np.array([], dtype=np.float32)
219
+ boxes = boxes.astype(np.float32, copy=True)
220
+ scores = scores.astype(np.float32, copy=True)
221
+ order = np.arange(N)
222
+ for i in range(N):
223
+ max_pos = i + int(np.argmax(scores[i:]))
224
+ boxes[[i, max_pos]] = boxes[[max_pos, i]]
225
+ scores[[i, max_pos]] = scores[[max_pos, i]]
226
+ order[[i, max_pos]] = order[[max_pos, i]]
227
+ if i + 1 >= N:
228
+ break
229
+ xx1 = np.maximum(boxes[i, 0], boxes[i + 1:, 0])
230
+ yy1 = np.maximum(boxes[i, 1], boxes[i + 1:, 1])
231
+ xx2 = np.minimum(boxes[i, 2], boxes[i + 1:, 2])
232
+ yy2 = np.minimum(boxes[i, 3], boxes[i + 1:, 3])
233
+ inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
234
+ area_i = float(
235
+ (boxes[i, 2] - boxes[i, 0]) * (boxes[i, 3] - boxes[i, 1])
236
+ )
237
+ areas_j = (
238
+ np.maximum(0.0, boxes[i + 1:, 2] - boxes[i + 1:, 0])
239
+ * np.maximum(0.0, boxes[i + 1:, 3] - boxes[i + 1:, 1])
240
+ )
241
+ iou = inter / (area_i + areas_j - inter + 1e-7)
242
+ scores[i + 1:] *= np.exp(-(iou ** 2) / sigma)
243
+ mask = scores > score_thresh
244
+ return order[mask], scores[mask]
245
+
246
+ # ---------- raw-dets helper ----------
247
+ def _raw_dets(self, image: ndarray, conf: float) -> np.ndarray:
248
+ """Run a single forward pass and return [N, 5] dets in ORIGINAL image coords."""
249
+ x, ratio, (dw, dh) = self._preprocess(image)
250
+ out = self.session.run(self.output_names, {self.input_name: x})[0]
251
+ if out.ndim == 3:
252
+ out = out[0]
253
+ if out.shape[1] < 5:
254
+ return np.zeros((0, 5), dtype=np.float32)
255
+ boxes = out[:, :4].astype(np.float32)
256
+ scores = out[:, 4].astype(np.float32)
257
+ keep = scores >= conf
258
+ boxes, scores = boxes[keep], scores[keep]
259
+ if len(boxes) == 0:
260
+ return np.zeros((0, 5), dtype=np.float32)
261
+ boxes[:, [0, 2]] -= dw
262
+ boxes[:, [1, 3]] -= dh
263
+ boxes /= ratio
264
+ oh, ow = image.shape[:2]
265
+ boxes = self._clip_boxes(boxes, (ow, oh))
266
+ return np.concatenate([boxes, scores[:, None]], axis=1)
267
+
268
+ # ---------- primary pass: soft-NMS + hflip TTA ----------
269
+ def _primary(self, image: ndarray) -> np.ndarray:
270
+ d1 = self._raw_dets(image, self.conf_thres)
271
+ if self.use_tta:
272
+ flipped = cv2.flip(image, 1)
273
+ d2 = self._raw_dets(flipped, self.conf_thres)
274
+ if len(d2):
275
+ w = image.shape[1]
276
+ x1 = w - d2[:, 2]
277
+ x2 = w - d2[:, 0]
278
+ d2 = np.stack([x1, d2[:, 1], x2, d2[:, 3], d2[:, 4]], axis=1)
279
+ all_d = np.concatenate([d1, d2], axis=0) if len(d2) else d1
280
+ else:
281
+ all_d = d1
282
+ if len(all_d) == 0:
283
+ return np.zeros((0, 5), dtype=np.float32)
284
+ # soft-NMS, then hard-NMS
285
+ keep_idx, scores = self._soft_nms(all_d[:, :4].copy(), all_d[:, 4].copy(), sigma=self.sigma)
286
+ if len(keep_idx) == 0:
287
+ return np.zeros((0, 5), dtype=np.float32)
288
+ merged = np.concatenate([all_d[keep_idx, :4], scores[:, None]], axis=1)
289
+ keep = self._hard_nms(merged[:, :4], merged[:, 4], self.iou_thres)
290
+ merged = merged[keep]
291
+ if len(merged) > self.max_det:
292
+ merged = merged[np.argsort(-merged[:, 4])[: self.max_det]]
293
+ return merged
294
+
295
+ # ---------- single-image predict ----------
296
+ def _predict_single(self, image: ndarray) -> list[BoundingBox]:
297
+ if image is None or not isinstance(image, np.ndarray) or image.ndim != 3:
298
  return []
299
+ if image.shape[0] <= 0 or image.shape[1] <= 0 or image.shape[2] != 3:
300
+ return []
301
+ if image.dtype != np.uint8:
302
+ image = image.astype(np.uint8)
303
+
304
+ dets = self._primary(image)
305
+
306
+ results: list[BoundingBox] = []
307
+ for row in dets:
308
+ x1, y1, x2, y2, conf = row.tolist()
309
+ if x2 <= x1 or y2 <= y1:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  continue
311
+ results.append(
312
  BoundingBox(
313
+ x1=int(math.floor(x1)),
314
+ y1=int(math.floor(y1)),
315
+ x2=int(math.ceil(x2)),
316
+ y2=int(math.ceil(y2)),
317
  cls_id=0,
318
+ conf=float(conf),
319
  )
320
  )
321
+ return results
322
 
323
+ # ---------- chute entrypoint ----------
324
  def predict_batch(
325
+ self,
326
+ batch_images: list[ndarray],
327
+ offset: int,
328
+ n_keypoints: int,
329
  ) -> list[TVFrameResult]:
330
+ results: list[TVFrameResult] = []
331
+ for frame_number_in_batch, image in enumerate(batch_images):
332
+ try:
333
+ boxes = self._predict_single(image)
334
+ except Exception as e:
335
+ print(f"Inference failed for frame {offset + frame_number_in_batch}: {e}")
336
+ boxes = []
337
+ results.append(
338
+ TVFrameResult(
339
+ frame_id=offset + frame_number_in_batch,
340
+ boxes=boxes,
341
+ keypoints=[(0, 0) for _ in range(max(0, int(n_keypoints)))],
342
+ )
343
+ )
344
  return results