meaculpitt commited on
Commit
e0423f6
·
verified ·
1 Parent(s): aec8383

Delete miner.py.bak with huggingface_hub

Browse files
Files changed (1) hide show
  1. miner.py.bak +0 -408
miner.py.bak DELETED
@@ -1,408 +0,0 @@
1
- """
2
- Score Vision SN44 — Unified miner v1 (2026-03-27).
3
- Dual-model: vehicle (YOLO11s) + person (YOLO11s).
4
-
5
- Vehicle model (vehicle_weights.onnx):
6
- Trained classes: 0=car, 1=bus, 2=truck, 3=motorcycle
7
- Remapped to manifest: 0=bus, 1=car, 2=truck, 3=motorcycle
8
-
9
- Person model (person_weights.onnx):
10
- Single class: 0=person
11
-
12
- Both models run on every image. All detections merged.
13
- cls_id 0 is shared: "bus" for vehicle eval, "person" for person eval.
14
- Vehicle eval uses cls_id 0-3. Person eval uses cls_id 0 only.
15
- """
16
-
17
- from pathlib import Path
18
- import math
19
-
20
- import cv2
21
- import numpy as np
22
- import onnxruntime as ort
23
- from numpy import ndarray
24
- from pydantic import BaseModel
25
-
26
- # ── Vehicle config ──────────────────────────────────────────────────────────
27
- VEH_MODEL_TO_OUT: dict[int, int] = {0: 1, 1: 0, 2: 2, 3: 3}
28
- VEH_NUM_CLASSES = 4
29
- VEH_IMG_SIZE = 1280
30
- VEH_CONF_PER_CLASS = {0: 0.33, 1: 0.50, 2: 0.40, 3: 0.36}
31
- VEH_CONF_DEFAULT = 0.35
32
- VEH_TTA_CONF = 0.25
33
- VEH_WBF_IOU = 0.55
34
-
35
- # ── Person config ───────────────────────────────────────────────────────────
36
- PER_CONF = 0.35
37
- PER_TTA_CONF = 0.25
38
- PER_WBF_IOU = 0.45
39
-
40
- # ── Shared ──────────────────────────────────────────────────────────────────
41
- WBF_SKIP_THR = 0.0001
42
-
43
-
44
- def _wbf_multi(boxes_list, scores_list, labels_list, iou_thr=0.55, skip_thr=0.0001):
45
- """Weighted Boxes Fusion (multi-class). Boxes in [0,1] normalized coords."""
46
- if not boxes_list:
47
- return np.empty((0, 4)), np.empty(0), np.empty(0)
48
-
49
- all_b, all_s, all_l = [], [], []
50
- for bx, sc, lb in zip(boxes_list, scores_list, labels_list):
51
- for i in range(len(bx)):
52
- if sc[i] < skip_thr:
53
- continue
54
- all_b.append(bx[i])
55
- all_s.append(sc[i])
56
- all_l.append(int(lb[i]))
57
-
58
- if not all_b:
59
- return np.empty((0, 4)), np.empty(0), np.empty(0)
60
-
61
- all_b = np.array(all_b)
62
- all_s = np.array(all_s)
63
- all_l = np.array(all_l, dtype=int)
64
-
65
- fused_b, fused_s, fused_l = [], [], []
66
- for cls in np.unique(all_l):
67
- m = all_l == cls
68
- cb, cs = all_b[m], all_s[m]
69
- order = cs.argsort()[::-1]
70
- cb, cs = cb[order], cs[order]
71
-
72
- clusters, cboxes = [], []
73
- for i in range(len(cb)):
74
- matched, best_iou = -1, iou_thr
75
- for ci, cbox in enumerate(cboxes):
76
- xx1 = max(cb[i, 0], cbox[0])
77
- yy1 = max(cb[i, 1], cbox[1])
78
- xx2 = min(cb[i, 2], cbox[2])
79
- yy2 = min(cb[i, 3], cbox[3])
80
- inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
81
- a1 = (cb[i, 2] - cb[i, 0]) * (cb[i, 3] - cb[i, 1])
82
- a2 = (cbox[2] - cbox[0]) * (cbox[3] - cbox[1])
83
- iou = inter / (a1 + a2 - inter + 1e-9)
84
- if iou > best_iou:
85
- best_iou = iou
86
- matched = ci
87
- if matched >= 0:
88
- clusters[matched].append(i)
89
- idxs = clusters[matched]
90
- w = cs[idxs]
91
- cboxes[matched] = (cb[idxs] * w[:, None]).sum(0) / w.sum()
92
- else:
93
- clusters.append([i])
94
- cboxes.append(cb[i].copy())
95
-
96
- for ci, idxs in enumerate(clusters):
97
- fused_b.append(cboxes[ci])
98
- fused_s.append(cs[idxs].mean())
99
- fused_l.append(cls)
100
-
101
- if not fused_b:
102
- return np.empty((0, 4)), np.empty(0), np.empty(0)
103
- return np.array(fused_b), np.array(fused_s), np.array(fused_l)
104
-
105
-
106
- def _wbf_single(boxes_list, scores_list, iou_thr=0.45, skip_thr=0.0001):
107
- """Weighted Boxes Fusion (single-class). Boxes in [0,1] normalized coords."""
108
- if not boxes_list:
109
- return np.empty((0, 4)), np.empty(0)
110
-
111
- all_b, all_s = [], []
112
- for bx, sc in zip(boxes_list, scores_list):
113
- for i in range(len(bx)):
114
- if sc[i] < skip_thr:
115
- continue
116
- all_b.append(bx[i])
117
- all_s.append(sc[i])
118
-
119
- if not all_b:
120
- return np.empty((0, 4)), np.empty(0)
121
-
122
- all_b = np.array(all_b)
123
- all_s = np.array(all_s)
124
- order = all_s.argsort()[::-1]
125
- all_b, all_s = all_b[order], all_s[order]
126
-
127
- clusters, cboxes = [], []
128
- for i in range(len(all_b)):
129
- matched, best_iou = -1, iou_thr
130
- for ci, cbox in enumerate(cboxes):
131
- xx1 = max(all_b[i, 0], cbox[0])
132
- yy1 = max(all_b[i, 1], cbox[1])
133
- xx2 = min(all_b[i, 2], cbox[2])
134
- yy2 = min(all_b[i, 3], cbox[3])
135
- inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
136
- a1 = (all_b[i, 2] - all_b[i, 0]) * (all_b[i, 3] - all_b[i, 1])
137
- a2 = (cbox[2] - cbox[0]) * (cbox[3] - cbox[1])
138
- iou = inter / (a1 + a2 - inter + 1e-9)
139
- if iou > best_iou:
140
- best_iou = iou
141
- matched = ci
142
- if matched >= 0:
143
- clusters[matched].append(i)
144
- idxs = clusters[matched]
145
- w = all_s[idxs]
146
- cboxes[matched] = (all_b[idxs] * w[:, None]).sum(0) / w.sum()
147
- else:
148
- clusters.append([i])
149
- cboxes.append(all_b[i].copy())
150
-
151
- fused_b, fused_s = [], []
152
- for ci, idxs in enumerate(clusters):
153
- fused_b.append(cboxes[ci])
154
- fused_s.append(all_s[idxs].mean())
155
-
156
- if not fused_b:
157
- return np.empty((0, 4)), np.empty(0)
158
- return np.array(fused_b), np.array(fused_s)
159
-
160
-
161
- class BoundingBox(BaseModel):
162
- x1: int
163
- y1: int
164
- x2: int
165
- y2: int
166
- cls_id: int
167
- conf: float
168
-
169
-
170
- class TVFrameResult(BaseModel):
171
- frame_id: int
172
- boxes: list[BoundingBox]
173
- keypoints: list[tuple[int, int]]
174
-
175
-
176
- class Miner:
177
- def __init__(self, path_hf_repo: Path) -> None:
178
- self.path_hf_repo = path_hf_repo
179
-
180
- # Vehicle model (YOLO11s, 4 classes)
181
- self.veh_session = ort.InferenceSession(
182
- str(path_hf_repo / "vehicle_weights.onnx"),
183
- providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
184
- )
185
- self.veh_input_name = self.veh_session.get_inputs()[0].name
186
-
187
- # Person model (YOLO11s, 1 class)
188
- self.per_session = ort.InferenceSession(
189
- str(path_hf_repo / "person_weights.onnx"),
190
- providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
191
- )
192
- self.per_input_name = self.per_session.get_inputs()[0].name
193
- per_shape = self.per_session.get_inputs()[0].shape
194
- self.per_h = int(per_shape[2])
195
- self.per_w = int(per_shape[3])
196
-
197
- def __repr__(self) -> str:
198
- return "Unified Miner v1 — dual-model vehicle+person"
199
-
200
- # ── Vehicle preprocessing (letterbox) ───────────────────────────────────
201
-
202
- def _veh_letterbox(self, img):
203
- h, w = img.shape[:2]
204
- r = min(VEH_IMG_SIZE / h, VEH_IMG_SIZE / w)
205
- nw, nh = int(round(w * r)), int(round(h * r))
206
- img_r = cv2.resize(img, (nw, nh), interpolation=cv2.INTER_LINEAR)
207
- dw, dh = VEH_IMG_SIZE - nw, VEH_IMG_SIZE - nh
208
- pl, pt = dw // 2, dh // 2
209
- img_p = cv2.copyMakeBorder(
210
- img_r, pt, dh - pt, pl, dw - pl,
211
- cv2.BORDER_CONSTANT, value=(114, 114, 114),
212
- )
213
- return img_p, r, pl, pt
214
-
215
- def _veh_preprocess(self, image_bgr):
216
- img_p, ratio, pl, pt = self._veh_letterbox(image_bgr)
217
- rgb = cv2.cvtColor(img_p, cv2.COLOR_BGR2RGB)
218
- inp = rgb.astype(np.float32) / 255.0
219
- inp = np.ascontiguousarray(inp.transpose(2, 0, 1)[np.newaxis])
220
- return inp, ratio, pl, pt
221
-
222
- def _veh_decode(self, raw, ratio, pl, pt, ow, oh, conf_thresh):
223
- pred = raw[0]
224
- if pred.shape[0] < pred.shape[1]:
225
- pred = pred.T
226
- cls_scores = pred[:, 4:]
227
- cls_ids = np.argmax(cls_scores, axis=1)
228
- confs = np.max(cls_scores, axis=1)
229
- mask = confs >= conf_thresh
230
- if not mask.any():
231
- return np.empty((0, 4)), np.empty(0), np.empty(0, dtype=int)
232
- bx, confs, cls_ids = pred[mask, :4], confs[mask], cls_ids[mask]
233
- cx, cy, bw, bh = bx[:, 0], bx[:, 1], bx[:, 2], bx[:, 3]
234
- x1 = np.clip((cx - bw / 2 - pl) / ratio, 0, ow)
235
- y1 = np.clip((cy - bh / 2 - pt) / ratio, 0, oh)
236
- x2 = np.clip((cx + bw / 2 - pl) / ratio, 0, ow)
237
- y2 = np.clip((cy + bh / 2 - pt) / ratio, 0, oh)
238
- return np.stack([x1, y1, x2, y2], axis=1), confs, cls_ids
239
-
240
- def _veh_run_pass(self, image_bgr, conf_thresh):
241
- oh, ow = image_bgr.shape[:2]
242
- inp, ratio, pl, pt = self._veh_preprocess(image_bgr)
243
- raw = self.veh_session.run(None, {self.veh_input_name: inp})[0]
244
- return self._veh_decode(raw, ratio, pl, pt, ow, oh, conf_thresh)
245
-
246
- def _infer_vehicle(self, image_bgr):
247
- oh, ow = image_bgr.shape[:2]
248
- all_b, all_s, all_l = [], [], []
249
-
250
- def _collect(boxes, confs, cls_ids):
251
- if len(boxes) == 0:
252
- return
253
- out_cls = np.array([VEH_MODEL_TO_OUT[int(c)] for c in cls_ids])
254
- norm = boxes.copy()
255
- norm[:, [0, 2]] /= ow
256
- norm[:, [1, 3]] /= oh
257
- norm = np.clip(norm, 0, 1)
258
- all_b.append(norm)
259
- all_s.append(confs)
260
- all_l.append(out_cls)
261
-
262
- # Pass 1: original
263
- _collect(*self._veh_run_pass(image_bgr, VEH_TTA_CONF))
264
- # Pass 2: hflip
265
- flipped = cv2.flip(image_bgr, 1)
266
- bx, sc, cl = self._veh_run_pass(flipped, VEH_TTA_CONF)
267
- if len(bx):
268
- bx[:, 0], bx[:, 2] = ow - bx[:, 2], ow - bx[:, 0]
269
- _collect(bx, sc, cl)
270
-
271
- if not all_b:
272
- return []
273
-
274
- fb, fs, fl = _wbf_multi(all_b, all_s, all_l, iou_thr=VEH_WBF_IOU, skip_thr=WBF_SKIP_THR)
275
- if len(fb) == 0:
276
- return []
277
-
278
- fb[:, [0, 2]] *= ow
279
- fb[:, [1, 3]] *= oh
280
-
281
- keep = np.array([
282
- fs[i] >= VEH_CONF_PER_CLASS.get(int(fl[i]), VEH_CONF_DEFAULT)
283
- for i in range(len(fs))
284
- ])
285
- if not keep.any():
286
- return []
287
- fb, fs, fl = fb[keep], fs[keep], fl[keep]
288
-
289
- out = []
290
- for i in range(len(fb)):
291
- b = fb[i]
292
- out.append(BoundingBox(
293
- x1=max(0, min(ow, math.floor(b[0]))),
294
- y1=max(0, min(oh, math.floor(b[1]))),
295
- x2=max(0, min(ow, math.ceil(b[2]))),
296
- y2=max(0, min(oh, math.ceil(b[3]))),
297
- cls_id=int(fl[i]),
298
- conf=max(0.0, min(1.0, float(fs[i]))),
299
- ))
300
- return out
301
-
302
- # ── Person preprocessing (stretch resize) ──────────────────────────────
303
-
304
- def _per_preprocess(self, image_bgr):
305
- rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
306
- resized = cv2.resize(rgb, (self.per_w, self.per_h))
307
- x = resized.astype(np.float32) / 255.0
308
- x = np.transpose(x, (2, 0, 1))[None, ...]
309
- return x
310
-
311
- def _per_decode(self, raw, oh, ow, conf_thresh):
312
- pred = raw[0]
313
- if pred.ndim != 2:
314
- return np.empty((0, 4)), np.empty(0)
315
- if pred.shape[0] < pred.shape[1]:
316
- pred = pred.T
317
- if pred.shape[1] < 5:
318
- return np.empty((0, 4)), np.empty(0)
319
- cls_scores = pred[:, 4:]
320
- confs = np.max(cls_scores, axis=1)
321
- keep = confs >= conf_thresh
322
- boxes, confs = pred[keep, :4], confs[keep]
323
- if len(boxes) == 0:
324
- return np.empty((0, 4)), np.empty(0)
325
- sx, sy = ow / float(self.per_w), oh / float(self.per_h)
326
- cx, cy, bw, bh = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
327
- x1 = np.clip((cx - bw / 2) * sx, 0, ow)
328
- y1 = np.clip((cy - bh / 2) * sy, 0, oh)
329
- x2 = np.clip((cx + bw / 2) * sx, 0, ow)
330
- y2 = np.clip((cy + bh / 2) * sy, 0, oh)
331
- return np.stack([x1, y1, x2, y2], axis=1), confs
332
-
333
- def _per_run_pass(self, image_bgr, conf_thresh):
334
- oh, ow = image_bgr.shape[:2]
335
- inp = self._per_preprocess(image_bgr)
336
- raw = self.per_session.run(None, {self.per_input_name: inp})[0]
337
- return self._per_decode(raw, oh, ow, conf_thresh)
338
-
339
- def _infer_person(self, image_bgr):
340
- oh, ow = image_bgr.shape[:2]
341
- all_b, all_s = [], []
342
-
343
- def _collect(boxes, confs):
344
- if len(boxes) == 0:
345
- return
346
- norm = boxes.copy()
347
- norm[:, [0, 2]] /= ow
348
- norm[:, [1, 3]] /= oh
349
- norm = np.clip(norm, 0, 1)
350
- all_b.append(norm)
351
- all_s.append(confs)
352
-
353
- # Pass 1: original
354
- _collect(*self._per_run_pass(image_bgr, PER_TTA_CONF))
355
- # Pass 2: hflip
356
- flipped = cv2.flip(image_bgr, 1)
357
- bx, sc = self._per_run_pass(flipped, PER_TTA_CONF)
358
- if len(bx):
359
- bx[:, 0], bx[:, 2] = ow - bx[:, 2], ow - bx[:, 0]
360
- _collect(bx, sc)
361
-
362
- if not all_b:
363
- return []
364
-
365
- fb, fs = _wbf_single(all_b, all_s, iou_thr=PER_WBF_IOU, skip_thr=WBF_SKIP_THR)
366
- if len(fb) == 0:
367
- return []
368
-
369
- fb[:, [0, 2]] *= ow
370
- fb[:, [1, 3]] *= oh
371
-
372
- keep = fs >= PER_CONF
373
- fb, fs = fb[keep], fs[keep]
374
-
375
- out = []
376
- for i in range(len(fb)):
377
- b = fb[i]
378
- out.append(BoundingBox(
379
- x1=max(0, min(ow, math.floor(b[0]))),
380
- y1=max(0, min(oh, math.floor(b[1]))),
381
- x2=max(0, min(ow, math.ceil(b[2]))),
382
- y2=max(0, min(oh, math.ceil(b[3]))),
383
- cls_id=0,
384
- conf=max(0.0, min(1.0, float(fs[i]))),
385
- ))
386
- return out
387
-
388
- # ── Unified inference ───────────────────────────────────────────────────
389
-
390
- def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
391
- vehicle_boxes = self._infer_vehicle(image_bgr)
392
- person_boxes = self._infer_person(image_bgr)
393
- return vehicle_boxes + person_boxes
394
-
395
- def predict_batch(
396
- self,
397
- batch_images: list[ndarray],
398
- offset: int,
399
- n_keypoints: int,
400
- ) -> list[TVFrameResult]:
401
- results: list[TVFrameResult] = []
402
- for idx, image in enumerate(batch_images):
403
- boxes = self._infer_single(image)
404
- keypoints = [(0, 0) for _ in range(max(0, int(n_keypoints)))]
405
- results.append(TVFrameResult(
406
- frame_id=offset + idx, boxes=boxes, keypoints=keypoints,
407
- ))
408
- return results