meaculpitt commited on
Commit
3054cb2
Β·
verified Β·
1 Parent(s): 2df103a

scorevision: push artifact

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ person_weights_1280.onnx.bak filter=lfs diff=lfs merge=lfs -text
37
+ vehicle_weights_1280.onnx.bak filter=lfs diff=lfs merge=lfs -text
__pycache__/miner.cpython-312.pyc CHANGED
Binary files a/__pycache__/miner.cpython-312.pyc and b/__pycache__/miner.cpython-312.pyc differ
 
miner.py CHANGED
@@ -74,7 +74,7 @@ logger = logging.getLogger(__name__)
74
  # ── Vehicle config ──────────────────────────────────────────────────────────
75
  VEH_MODEL_TO_OUT: dict[int, int] = {0: 1, 1: 0, 2: 2, 3: 3}
76
  VEH_NUM_CLASSES = 4
77
- VEH_IMG_SIZE = 1280
78
  VEH_CONF_PER_CLASS = {0: 0.33, 1: 0.50, 2: 0.40, 3: 0.36}
79
  VEH_CONF_DEFAULT = 0.35
80
  VEH_TTA_CONF = 0.25
@@ -235,6 +235,9 @@ class Miner:
235
  providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
236
  )
237
  self.veh_input_name = self.veh_session.get_inputs()[0].name
 
 
 
238
 
239
  # Person model (YOLO11s, 1 class)
240
  self.per_session = ort.InferenceSession(
@@ -263,10 +266,10 @@ class Miner:
263
 
264
  def _veh_letterbox(self, img):
265
  h, w = img.shape[:2]
266
- r = min(VEH_IMG_SIZE / h, VEH_IMG_SIZE / w)
267
  nw, nh = int(round(w * r)), int(round(h * r))
268
  img_r = cv2.resize(img, (nw, nh), interpolation=cv2.INTER_LINEAR)
269
- dw, dh = VEH_IMG_SIZE - nw, VEH_IMG_SIZE - nh
270
  pl, pt = dw // 2, dh // 2
271
  img_p = cv2.copyMakeBorder(
272
  img_r, pt, dh - pt, pl, dw - pl,
 
74
  # ── Vehicle config ──────────────────────────────────────────────────────────
75
  VEH_MODEL_TO_OUT: dict[int, int] = {0: 1, 1: 0, 2: 2, 3: 3}
76
  VEH_NUM_CLASSES = 4
77
+ # VEH_IMG_SIZE: now read dynamically from model input shape in __init__
78
  VEH_CONF_PER_CLASS = {0: 0.33, 1: 0.50, 2: 0.40, 3: 0.36}
79
  VEH_CONF_DEFAULT = 0.35
80
  VEH_TTA_CONF = 0.25
 
235
  providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
236
  )
237
  self.veh_input_name = self.veh_session.get_inputs()[0].name
238
+ veh_shape = self.veh_session.get_inputs()[0].shape
239
+ self.veh_h = int(veh_shape[2])
240
+ self.veh_w = int(veh_shape[3])
241
 
242
  # Person model (YOLO11s, 1 class)
243
  self.per_session = ort.InferenceSession(
 
266
 
267
  def _veh_letterbox(self, img):
268
  h, w = img.shape[:2]
269
+ r = min(self.veh_h / h, self.veh_w / w)
270
  nw, nh = int(round(w * r)), int(round(h * r))
271
  img_r = cv2.resize(img, (nw, nh), interpolation=cv2.INTER_LINEAR)
272
+ dw, dh = self.veh_w - nw, self.veh_h - nh
273
  pl, pt = dw // 2, dh // 2
274
  img_p = cv2.copyMakeBorder(
275
  img_r, pt, dh - pt, pl, dw - pl,
miner.py.bak_1280 ADDED
@@ -0,0 +1,562 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Score Vision SN44 β€” Unified miner v2 (2026-03-28).
3
+ Dual-model: vehicle (YOLO11s) + person (YOLO11s).
4
+ Optimized for latency: parallel threading + configurable TTA.
5
+
6
+ Vehicle model (vehicle_weights.onnx):
7
+ Trained classes: 0=car, 1=bus, 2=truck, 3=motorcycle
8
+ Remapped to manifest: 0=bus, 1=car, 2=truck, 3=motorcycle
9
+
10
+ Person model (person_weights.onnx):
11
+ Single class: 0=person
12
+
13
+ Both models run on every image. All detections merged.
14
+ cls_id 0 is shared: "bus" for vehicle eval, "person" for person eval.
15
+ Vehicle eval uses cls_id 0-3. Person eval uses cls_id 0 only.
16
+ """
17
+
18
+ import os
19
+ import ctypes
20
+ import glob as _glob
21
+ import logging as _logging
22
+
23
+ _cuda_log = _logging.getLogger(__name__)
24
+
25
+ def _preload_cuda_libs():
26
+ """Pre-load CUDA libs from pip nvidia packages so onnxruntime-gpu finds them."""
27
+ try:
28
+ lib_dirs = []
29
+ for mod_name in ['nvidia.cudnn', 'nvidia.cublas']:
30
+ try:
31
+ mod = __import__(mod_name, fromlist=['__file__'])
32
+ lib_dir = os.path.join(os.path.dirname(mod.__file__), 'lib')
33
+ if os.path.isdir(lib_dir):
34
+ lib_dirs.append(lib_dir)
35
+ except ImportError:
36
+ pass
37
+ if not lib_dirs:
38
+ return
39
+ # Set LD_LIBRARY_PATH for subprocesses
40
+ existing = os.environ.get('LD_LIBRARY_PATH', '')
41
+ os.environ['LD_LIBRARY_PATH'] = ':'.join(lib_dirs + ([existing] if existing else []))
42
+ # Pre-load .so files with RTLD_GLOBAL so dlopen() finds them
43
+ for lib_dir in lib_dirs:
44
+ for so in sorted(_glob.glob(os.path.join(lib_dir, 'lib*.so*'))):
45
+ try:
46
+ ctypes.CDLL(so, mode=ctypes.RTLD_GLOBAL)
47
+ _cuda_log.info(f'Preloaded CUDA lib: {os.path.basename(so)}')
48
+ except OSError:
49
+ pass
50
+ except Exception as e:
51
+ _cuda_log.warning(f'CUDA preload error: {e}')
52
+
53
+ _preload_cuda_libs()
54
+
55
+
56
+ from pathlib import Path
57
+ import math
58
+ import time
59
+ import logging
60
+
61
+ import cv2
62
+ import numpy as np
63
+ import onnxruntime as ort
64
+ from numpy import ndarray
65
+ from pydantic import BaseModel
66
+
67
+ import json
68
+ import threading
69
+ from datetime import datetime, timezone
70
+ from concurrent.futures import ThreadPoolExecutor, as_completed
71
+
72
+ logger = logging.getLogger(__name__)
73
+
74
+ # ── Vehicle config ──────────────────────────────────────────────────────────
75
+ VEH_MODEL_TO_OUT: dict[int, int] = {0: 1, 1: 0, 2: 2, 3: 3}
76
+ VEH_NUM_CLASSES = 4
77
+ VEH_IMG_SIZE = 1280
78
+ VEH_CONF_PER_CLASS = {0: 0.33, 1: 0.50, 2: 0.40, 3: 0.36}
79
+ VEH_CONF_DEFAULT = 0.35
80
+ VEH_TTA_CONF = 0.25
81
+ VEH_WBF_IOU = 0.55
82
+
83
+ # ── Person config ───────────────────────────────────────────────────────────
84
+ PER_CONF = 0.35
85
+ PER_TTA_CONF = 0.25
86
+ PER_WBF_IOU = 0.45
87
+
88
+ # ── Shared ──────────────────────────────────────────────────────────────────
89
+ WBF_SKIP_THR = 0.0001
90
+
91
+ # ── Speed config ────────────────────────────────────────────────────────────
92
+ ENABLE_TTA = False # Set True to re-enable 2-pass TTA (doubles inference time)
93
+ ENABLE_PARALLEL = True # Run vehicle + person in parallel threads
94
+
95
+
96
+ def _wbf_multi(boxes_list, scores_list, labels_list, iou_thr=0.55, skip_thr=0.0001):
97
+ """Weighted Boxes Fusion (multi-class). Boxes in [0,1] normalized coords."""
98
+ if not boxes_list:
99
+ return np.empty((0, 4)), np.empty(0), np.empty(0)
100
+
101
+ all_b, all_s, all_l = [], [], []
102
+ for bx, sc, lb in zip(boxes_list, scores_list, labels_list):
103
+ for i in range(len(bx)):
104
+ if sc[i] < skip_thr:
105
+ continue
106
+ all_b.append(bx[i])
107
+ all_s.append(sc[i])
108
+ all_l.append(int(lb[i]))
109
+
110
+ if not all_b:
111
+ return np.empty((0, 4)), np.empty(0), np.empty(0)
112
+
113
+ all_b = np.array(all_b)
114
+ all_s = np.array(all_s)
115
+ all_l = np.array(all_l, dtype=int)
116
+
117
+ fused_b, fused_s, fused_l = [], [], []
118
+ for cls in np.unique(all_l):
119
+ m = all_l == cls
120
+ cb, cs = all_b[m], all_s[m]
121
+ order = cs.argsort()[::-1]
122
+ cb, cs = cb[order], cs[order]
123
+
124
+ clusters, cboxes = [], []
125
+ for i in range(len(cb)):
126
+ matched, best_iou = -1, iou_thr
127
+ for ci, cbox in enumerate(cboxes):
128
+ xx1 = max(cb[i, 0], cbox[0])
129
+ yy1 = max(cb[i, 1], cbox[1])
130
+ xx2 = min(cb[i, 2], cbox[2])
131
+ yy2 = min(cb[i, 3], cbox[3])
132
+ inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
133
+ a1 = (cb[i, 2] - cb[i, 0]) * (cb[i, 3] - cb[i, 1])
134
+ a2 = (cbox[2] - cbox[0]) * (cbox[3] - cbox[1])
135
+ iou = inter / (a1 + a2 - inter + 1e-9)
136
+ if iou > best_iou:
137
+ best_iou = iou
138
+ matched = ci
139
+ if matched >= 0:
140
+ clusters[matched].append(i)
141
+ idxs = clusters[matched]
142
+ w = cs[idxs]
143
+ cboxes[matched] = (cb[idxs] * w[:, None]).sum(0) / w.sum()
144
+ else:
145
+ clusters.append([i])
146
+ cboxes.append(cb[i].copy())
147
+
148
+ for ci, idxs in enumerate(clusters):
149
+ fused_b.append(cboxes[ci])
150
+ fused_s.append(cs[idxs].mean())
151
+ fused_l.append(cls)
152
+
153
+ if not fused_b:
154
+ return np.empty((0, 4)), np.empty(0), np.empty(0)
155
+ return np.array(fused_b), np.array(fused_s), np.array(fused_l)
156
+
157
+
158
+ def _wbf_single(boxes_list, scores_list, iou_thr=0.45, skip_thr=0.0001):
159
+ """Weighted Boxes Fusion (single-class). Boxes in [0,1] normalized coords."""
160
+ if not boxes_list:
161
+ return np.empty((0, 4)), np.empty(0)
162
+
163
+ all_b, all_s = [], []
164
+ for bx, sc in zip(boxes_list, scores_list):
165
+ for i in range(len(bx)):
166
+ if sc[i] < skip_thr:
167
+ continue
168
+ all_b.append(bx[i])
169
+ all_s.append(sc[i])
170
+
171
+ if not all_b:
172
+ return np.empty((0, 4)), np.empty(0)
173
+
174
+ all_b = np.array(all_b)
175
+ all_s = np.array(all_s)
176
+ order = all_s.argsort()[::-1]
177
+ all_b, all_s = all_b[order], all_s[order]
178
+
179
+ clusters, cboxes = [], []
180
+ for i in range(len(all_b)):
181
+ matched, best_iou = -1, iou_thr
182
+ for ci, cbox in enumerate(cboxes):
183
+ xx1 = max(all_b[i, 0], cbox[0])
184
+ yy1 = max(all_b[i, 1], cbox[1])
185
+ xx2 = min(all_b[i, 2], cbox[2])
186
+ yy2 = min(all_b[i, 3], cbox[3])
187
+ inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
188
+ a1 = (all_b[i, 2] - all_b[i, 0]) * (all_b[i, 3] - all_b[i, 1])
189
+ a2 = (cbox[2] - cbox[0]) * (cbox[3] - cbox[1])
190
+ iou = inter / (a1 + a2 - inter + 1e-9)
191
+ if iou > best_iou:
192
+ best_iou = iou
193
+ matched = ci
194
+ if matched >= 0:
195
+ clusters[matched].append(i)
196
+ idxs = clusters[matched]
197
+ w = all_s[idxs]
198
+ cboxes[matched] = (all_b[idxs] * w[:, None]).sum(0) / w.sum()
199
+ else:
200
+ clusters.append([i])
201
+ cboxes.append(all_b[i].copy())
202
+
203
+ fused_b, fused_s = [], []
204
+ for ci, idxs in enumerate(clusters):
205
+ fused_b.append(cboxes[ci])
206
+ fused_s.append(all_s[idxs].mean())
207
+
208
+ if not fused_b:
209
+ return np.empty((0, 4)), np.empty(0)
210
+ return np.array(fused_b), np.array(fused_s)
211
+
212
+
213
+ class BoundingBox(BaseModel):
214
+ x1: int
215
+ y1: int
216
+ x2: int
217
+ y2: int
218
+ cls_id: int
219
+ conf: float
220
+
221
+
222
+ class TVFrameResult(BaseModel):
223
+ frame_id: int
224
+ boxes: list[BoundingBox]
225
+ keypoints: list[tuple[int, int]]
226
+
227
+
228
+ class Miner:
229
+ def __init__(self, path_hf_repo: Path) -> None:
230
+ self.path_hf_repo = path_hf_repo
231
+
232
+ # Vehicle model (YOLO11s, 4 classes)
233
+ self.veh_session = ort.InferenceSession(
234
+ str(path_hf_repo / "vehicle_weights.onnx"),
235
+ providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
236
+ )
237
+ self.veh_input_name = self.veh_session.get_inputs()[0].name
238
+
239
+ # Person model (YOLO11s, 1 class)
240
+ self.per_session = ort.InferenceSession(
241
+ str(path_hf_repo / "person_weights.onnx"),
242
+ providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
243
+ )
244
+ self.per_input_name = self.per_session.get_inputs()[0].name
245
+ per_shape = self.per_session.get_inputs()[0].shape
246
+ self.per_h = int(per_shape[2])
247
+ self.per_w = int(per_shape[3])
248
+
249
+ # Thread pool for parallel inference
250
+ self._executor = ThreadPoolExecutor(max_workers=2)
251
+
252
+ # Log provider info
253
+ veh_prov = self.veh_session.get_providers()
254
+ per_prov = self.per_session.get_providers()
255
+ logger.info(f"Vehicle ORT providers: {veh_prov}")
256
+ logger.info(f"Person ORT providers: {per_prov}")
257
+ logger.info(f"TTA={ENABLE_TTA} PARALLEL={ENABLE_PARALLEL}")
258
+
259
+ def __repr__(self) -> str:
260
+ return "Unified Miner v2 β€” dual-model vehicle+person (parallel, TTA-configurable)"
261
+
262
+ # ── Vehicle preprocessing (letterbox) ───────────────────────────────────
263
+
264
+ def _veh_letterbox(self, img):
265
+ h, w = img.shape[:2]
266
+ r = min(VEH_IMG_SIZE / h, VEH_IMG_SIZE / w)
267
+ nw, nh = int(round(w * r)), int(round(h * r))
268
+ img_r = cv2.resize(img, (nw, nh), interpolation=cv2.INTER_LINEAR)
269
+ dw, dh = VEH_IMG_SIZE - nw, VEH_IMG_SIZE - nh
270
+ pl, pt = dw // 2, dh // 2
271
+ img_p = cv2.copyMakeBorder(
272
+ img_r, pt, dh - pt, pl, dw - pl,
273
+ cv2.BORDER_CONSTANT, value=(114, 114, 114),
274
+ )
275
+ return img_p, r, pl, pt
276
+
277
+ def _veh_preprocess(self, image_bgr):
278
+ img_p, ratio, pl, pt = self._veh_letterbox(image_bgr)
279
+ rgb = cv2.cvtColor(img_p, cv2.COLOR_BGR2RGB)
280
+ inp = rgb.astype(np.float32) / 255.0
281
+ inp = np.ascontiguousarray(inp.transpose(2, 0, 1)[np.newaxis])
282
+ return inp, ratio, pl, pt
283
+
284
+ def _veh_decode(self, raw, ratio, pl, pt, ow, oh, conf_thresh):
285
+ pred = raw[0]
286
+ if pred.shape[0] < pred.shape[1]:
287
+ pred = pred.T
288
+ cls_scores = pred[:, 4:]
289
+ cls_ids = np.argmax(cls_scores, axis=1)
290
+ confs = np.max(cls_scores, axis=1)
291
+ mask = confs >= conf_thresh
292
+ if not mask.any():
293
+ return np.empty((0, 4)), np.empty(0), np.empty(0, dtype=int)
294
+ bx, confs, cls_ids = pred[mask, :4], confs[mask], cls_ids[mask]
295
+ cx, cy, bw, bh = bx[:, 0], bx[:, 1], bx[:, 2], bx[:, 3]
296
+ x1 = np.clip((cx - bw / 2 - pl) / ratio, 0, ow)
297
+ y1 = np.clip((cy - bh / 2 - pt) / ratio, 0, oh)
298
+ x2 = np.clip((cx + bw / 2 - pl) / ratio, 0, ow)
299
+ y2 = np.clip((cy + bh / 2 - pt) / ratio, 0, oh)
300
+ return np.stack([x1, y1, x2, y2], axis=1), confs, cls_ids
301
+
302
+ def _veh_run_pass(self, image_bgr, conf_thresh):
303
+ oh, ow = image_bgr.shape[:2]
304
+ inp, ratio, pl, pt = self._veh_preprocess(image_bgr)
305
+ raw = self.veh_session.run(None, {self.veh_input_name: inp})[0]
306
+ return self._veh_decode(raw, ratio, pl, pt, ow, oh, conf_thresh)
307
+
308
+ def _infer_vehicle(self, image_bgr):
309
+ oh, ow = image_bgr.shape[:2]
310
+ all_b, all_s, all_l = [], [], []
311
+
312
+ def _collect(boxes, confs, cls_ids):
313
+ if len(boxes) == 0:
314
+ return
315
+ out_cls = np.array([VEH_MODEL_TO_OUT[int(c)] for c in cls_ids])
316
+ norm = boxes.copy()
317
+ norm[:, [0, 2]] /= ow
318
+ norm[:, [1, 3]] /= oh
319
+ norm = np.clip(norm, 0, 1)
320
+ all_b.append(norm)
321
+ all_s.append(confs)
322
+ all_l.append(out_cls)
323
+
324
+ if ENABLE_TTA:
325
+ # Pass 1: original
326
+ _collect(*self._veh_run_pass(image_bgr, VEH_TTA_CONF))
327
+ # Pass 2: hflip
328
+ flipped = cv2.flip(image_bgr, 1)
329
+ bx, sc, cl = self._veh_run_pass(flipped, VEH_TTA_CONF)
330
+ if len(bx):
331
+ bx[:, 0], bx[:, 2] = ow - bx[:, 2], ow - bx[:, 0]
332
+ _collect(bx, sc, cl)
333
+ else:
334
+ # Single pass β€” use per-class conf thresholds directly
335
+ bx, confs, cls_ids = self._veh_run_pass(image_bgr, VEH_TTA_CONF)
336
+ _collect(bx, confs, cls_ids)
337
+
338
+ if not all_b:
339
+ return []
340
+
341
+ if ENABLE_TTA:
342
+ fb, fs, fl = _wbf_multi(all_b, all_s, all_l, iou_thr=VEH_WBF_IOU, skip_thr=WBF_SKIP_THR)
343
+ else:
344
+ # No WBF needed for single pass, just concatenate
345
+ fb = np.concatenate(all_b, axis=0)
346
+ fs = np.concatenate(all_s, axis=0)
347
+ fl = np.concatenate(all_l, axis=0)
348
+
349
+ if len(fb) == 0:
350
+ return []
351
+
352
+ fb[:, [0, 2]] *= ow
353
+ fb[:, [1, 3]] *= oh
354
+
355
+ keep = np.array([
356
+ fs[i] >= VEH_CONF_PER_CLASS.get(int(fl[i]), VEH_CONF_DEFAULT)
357
+ for i in range(len(fs))
358
+ ])
359
+ if not keep.any():
360
+ return []
361
+ fb, fs, fl = fb[keep], fs[keep], fl[keep]
362
+
363
+ out = []
364
+ for i in range(len(fb)):
365
+ b = fb[i]
366
+ out.append(BoundingBox(
367
+ x1=max(0, min(ow, math.floor(b[0]))),
368
+ y1=max(0, min(oh, math.floor(b[1]))),
369
+ x2=max(0, min(ow, math.ceil(b[2]))),
370
+ y2=max(0, min(oh, math.ceil(b[3]))),
371
+ cls_id=int(fl[i]),
372
+ conf=max(0.0, min(1.0, float(fs[i]))),
373
+ ))
374
+ return out
375
+
376
+ # ── Person preprocessing (stretch resize) ──────────────────────────────
377
+
378
+ def _per_preprocess(self, image_bgr):
379
+ rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
380
+ resized = cv2.resize(rgb, (self.per_w, self.per_h))
381
+ x = resized.astype(np.float32) / 255.0
382
+ x = np.transpose(x, (2, 0, 1))[None, ...]
383
+ return x
384
+
385
+ def _per_decode(self, raw, oh, ow, conf_thresh):
386
+ pred = raw[0]
387
+ if pred.ndim != 2:
388
+ return np.empty((0, 4)), np.empty(0)
389
+ if pred.shape[0] < pred.shape[1]:
390
+ pred = pred.T
391
+ if pred.shape[1] < 5:
392
+ return np.empty((0, 4)), np.empty(0)
393
+ cls_scores = pred[:, 4:]
394
+ confs = np.max(cls_scores, axis=1)
395
+ keep = confs >= conf_thresh
396
+ boxes, confs = pred[keep, :4], confs[keep]
397
+ if len(boxes) == 0:
398
+ return np.empty((0, 4)), np.empty(0)
399
+ sx, sy = ow / float(self.per_w), oh / float(self.per_h)
400
+ cx, cy, bw, bh = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
401
+ x1 = np.clip((cx - bw / 2) * sx, 0, ow)
402
+ y1 = np.clip((cy - bh / 2) * sy, 0, oh)
403
+ x2 = np.clip((cx + bw / 2) * sx, 0, ow)
404
+ y2 = np.clip((cy + bh / 2) * sy, 0, oh)
405
+ return np.stack([x1, y1, x2, y2], axis=1), confs
406
+
407
+ def _per_run_pass(self, image_bgr, conf_thresh):
408
+ oh, ow = image_bgr.shape[:2]
409
+ inp = self._per_preprocess(image_bgr)
410
+ raw = self.per_session.run(None, {self.per_input_name: inp})[0]
411
+ return self._per_decode(raw, oh, ow, conf_thresh)
412
+
413
+ def _infer_person(self, image_bgr):
414
+ oh, ow = image_bgr.shape[:2]
415
+ all_b, all_s = [], []
416
+
417
+ def _collect(boxes, confs):
418
+ if len(boxes) == 0:
419
+ return
420
+ norm = boxes.copy()
421
+ norm[:, [0, 2]] /= ow
422
+ norm[:, [1, 3]] /= oh
423
+ norm = np.clip(norm, 0, 1)
424
+ all_b.append(norm)
425
+ all_s.append(confs)
426
+
427
+ if ENABLE_TTA:
428
+ # Pass 1: original
429
+ _collect(*self._per_run_pass(image_bgr, PER_TTA_CONF))
430
+ # Pass 2: hflip
431
+ flipped = cv2.flip(image_bgr, 1)
432
+ bx, sc = self._per_run_pass(flipped, PER_TTA_CONF)
433
+ if len(bx):
434
+ bx[:, 0], bx[:, 2] = ow - bx[:, 2], ow - bx[:, 0]
435
+ _collect(bx, sc)
436
+ else:
437
+ # Single pass
438
+ _collect(*self._per_run_pass(image_bgr, PER_CONF))
439
+
440
+ if not all_b:
441
+ return []
442
+
443
+ if ENABLE_TTA:
444
+ fb, fs = _wbf_single(all_b, all_s, iou_thr=PER_WBF_IOU, skip_thr=WBF_SKIP_THR)
445
+ else:
446
+ fb = np.concatenate(all_b, axis=0)
447
+ fs = np.concatenate(all_s, axis=0)
448
+
449
+ if len(fb) == 0:
450
+ return []
451
+
452
+ fb[:, [0, 2]] *= ow
453
+ fb[:, [1, 3]] *= oh
454
+
455
+ keep = fs >= PER_CONF
456
+ fb, fs = fb[keep], fs[keep]
457
+
458
+ out = []
459
+ for i in range(len(fb)):
460
+ b = fb[i]
461
+ out.append(BoundingBox(
462
+ x1=max(0, min(ow, math.floor(b[0]))),
463
+ y1=max(0, min(oh, math.floor(b[1]))),
464
+ x2=max(0, min(ow, math.ceil(b[2]))),
465
+ y2=max(0, min(oh, math.ceil(b[3]))),
466
+ cls_id=0,
467
+ conf=max(0.0, min(1.0, float(fs[i]))),
468
+ ))
469
+ return out
470
+
471
+ # ── Unified inference ───────────────────────────────────────────────────
472
+
473
+ def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
474
+ if ENABLE_PARALLEL:
475
+ # Run both models in parallel threads
476
+ veh_future = self._executor.submit(self._infer_vehicle, image_bgr)
477
+ per_future = self._executor.submit(self._infer_person, image_bgr)
478
+ vehicle_boxes = veh_future.result()
479
+ person_boxes = per_future.result()
480
+ else:
481
+ vehicle_boxes = self._infer_vehicle(image_bgr)
482
+ person_boxes = self._infer_person(image_bgr)
483
+ return vehicle_boxes + person_boxes
484
+
485
+ # -- Replay buffer -------------------------------------------------------
486
+ REPLAY_DIR = Path("/home/miner/replay_buffer")
487
+ REPLAY_MAX = 100
488
+
489
+ def _replay_save(self, batch_images, results):
490
+ """Save validator query images + our predictions to replay buffer (background)."""
491
+ try:
492
+ ts = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S_%f")
493
+ query_dir = self.REPLAY_DIR / ts
494
+ query_dir.mkdir(parents=True, exist_ok=True)
495
+
496
+ for i, img in enumerate(batch_images):
497
+ cv2.imwrite(str(query_dir / f"img_{i:03d}.jpg"), img,
498
+ [cv2.IMWRITE_JPEG_QUALITY, 95])
499
+
500
+ preds = []
501
+ for r in results:
502
+ preds.append({
503
+ "frame_id": r.frame_id,
504
+ "boxes": [b.model_dump() for b in r.boxes],
505
+ })
506
+ meta = {
507
+ "timestamp": ts,
508
+ "num_images": len(batch_images),
509
+ "image_shapes": [list(img.shape) for img in batch_images],
510
+ "predictions": preds,
511
+ }
512
+ (query_dir / "meta.json").write_text(json.dumps(meta, indent=2))
513
+ self._replay_prune()
514
+ except Exception:
515
+ pass
516
+
517
+ def _replay_prune(self):
518
+ """Keep only the most recent REPLAY_MAX queries."""
519
+ try:
520
+ dirs = sorted(
521
+ [d for d in self.REPLAY_DIR.iterdir() if d.is_dir()],
522
+ key=lambda d: d.name,
523
+ )
524
+ if len(dirs) > self.REPLAY_MAX:
525
+ import shutil
526
+ for old in dirs[: len(dirs) - self.REPLAY_MAX]:
527
+ shutil.rmtree(old, ignore_errors=True)
528
+ except Exception:
529
+ pass
530
+
531
+ def predict_batch(
532
+ self,
533
+ batch_images: list[ndarray],
534
+ offset: int,
535
+ n_keypoints: int,
536
+ ) -> list[TVFrameResult]:
537
+ t_start = time.perf_counter()
538
+
539
+ results: list[TVFrameResult] = []
540
+ for idx, image in enumerate(batch_images):
541
+ t_img = time.perf_counter()
542
+ boxes = self._infer_single(image)
543
+ dt_img = (time.perf_counter() - t_img) * 1000
544
+ logger.info(f"[miner] image {idx}: {len(boxes)} boxes in {dt_img:.0f}ms "
545
+ f"(shape={image.shape}, TTA={ENABLE_TTA}, PAR={ENABLE_PARALLEL})")
546
+ keypoints = [(0, 0) for _ in range(max(0, int(n_keypoints)))]
547
+ results.append(TVFrameResult(
548
+ frame_id=offset + idx, boxes=boxes, keypoints=keypoints,
549
+ ))
550
+
551
+ dt_total = (time.perf_counter() - t_start) * 1000
552
+ logger.info(f"[miner] predict_batch: {len(batch_images)} images, "
553
+ f"{sum(len(r.boxes) for r in results)} total boxes, {dt_total:.0f}ms")
554
+
555
+ # Save to replay buffer (background thread)
556
+ threading.Thread(
557
+ target=self._replay_save,
558
+ args=(batch_images, results),
559
+ daemon=True,
560
+ ).start()
561
+
562
+ return results
person_weights.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95cd302649a2572cd20c92c3e3abf9dd0be61339c2a2be3665afc66d76efdcf3
3
- size 10546588
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa6c46dca9dc995b8a641674e8f760b80d9af067bf2874e16a6addfd77a387bf
3
+ size 10042378
person_weights_1280.onnx.bak ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95cd302649a2572cd20c92c3e3abf9dd0be61339c2a2be3665afc66d76efdcf3
3
+ size 10546588
vehicle_weights.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3916408ec21f8c94358c18914f922814770b78557e52fe17ff7a9ee74339a5a
3
- size 19272252
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc35148056138102db485555b48704e485fcc917046c783ed3c30d27b7840a89
3
+ size 19019978
vehicle_weights_1280.onnx.bak ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3916408ec21f8c94358c18914f922814770b78557e52fe17ff7a9ee74339a5a
3
+ size 19272252