meaculpitt commited on
Commit
acc1f0d
·
verified ·
1 Parent(s): 8d82790

Delete miner.py.bak_v1 with huggingface_hub

Browse files
Files changed (1) hide show
  1. miner.py.bak_v1 +0 -470
miner.py.bak_v1 DELETED
@@ -1,470 +0,0 @@
1
- """
2
- Score Vision SN44 — Unified miner v1 (2026-03-27).
3
- Dual-model: vehicle (YOLO11s) + person (YOLO11s).
4
-
5
- Vehicle model (vehicle_weights.onnx):
6
- Trained classes: 0=car, 1=bus, 2=truck, 3=motorcycle
7
- Remapped to manifest: 0=bus, 1=car, 2=truck, 3=motorcycle
8
-
9
- Person model (person_weights.onnx):
10
- Single class: 0=person
11
-
12
- Both models run on every image. All detections merged.
13
- cls_id 0 is shared: "bus" for vehicle eval, "person" for person eval.
14
- Vehicle eval uses cls_id 0-3. Person eval uses cls_id 0 only.
15
- """
16
-
17
- from pathlib import Path
18
- import math
19
-
20
- import cv2
21
- import numpy as np
22
- import onnxruntime as ort
23
- from numpy import ndarray
24
- from pydantic import BaseModel
25
-
26
- import json
27
- import threading
28
- from datetime import datetime, timezone
29
-
30
- # ── Vehicle config ──────────────────────────────────────────────────────────
31
- VEH_MODEL_TO_OUT: dict[int, int] = {0: 1, 1: 0, 2: 2, 3: 3}
32
- VEH_NUM_CLASSES = 4
33
- VEH_IMG_SIZE = 1280
34
- VEH_CONF_PER_CLASS = {0: 0.33, 1: 0.50, 2: 0.40, 3: 0.36}
35
- VEH_CONF_DEFAULT = 0.35
36
- VEH_TTA_CONF = 0.25
37
- VEH_WBF_IOU = 0.55
38
-
39
- # ── Person config ───────────────────────────────────────────────────────────
40
- PER_CONF = 0.35
41
- PER_TTA_CONF = 0.25
42
- PER_WBF_IOU = 0.45
43
-
44
- # ── Shared ──────────────────────────────────────────────────────────────────
45
- WBF_SKIP_THR = 0.0001
46
-
47
-
48
- def _wbf_multi(boxes_list, scores_list, labels_list, iou_thr=0.55, skip_thr=0.0001):
49
- """Weighted Boxes Fusion (multi-class). Boxes in [0,1] normalized coords."""
50
- if not boxes_list:
51
- return np.empty((0, 4)), np.empty(0), np.empty(0)
52
-
53
- all_b, all_s, all_l = [], [], []
54
- for bx, sc, lb in zip(boxes_list, scores_list, labels_list):
55
- for i in range(len(bx)):
56
- if sc[i] < skip_thr:
57
- continue
58
- all_b.append(bx[i])
59
- all_s.append(sc[i])
60
- all_l.append(int(lb[i]))
61
-
62
- if not all_b:
63
- return np.empty((0, 4)), np.empty(0), np.empty(0)
64
-
65
- all_b = np.array(all_b)
66
- all_s = np.array(all_s)
67
- all_l = np.array(all_l, dtype=int)
68
-
69
- fused_b, fused_s, fused_l = [], [], []
70
- for cls in np.unique(all_l):
71
- m = all_l == cls
72
- cb, cs = all_b[m], all_s[m]
73
- order = cs.argsort()[::-1]
74
- cb, cs = cb[order], cs[order]
75
-
76
- clusters, cboxes = [], []
77
- for i in range(len(cb)):
78
- matched, best_iou = -1, iou_thr
79
- for ci, cbox in enumerate(cboxes):
80
- xx1 = max(cb[i, 0], cbox[0])
81
- yy1 = max(cb[i, 1], cbox[1])
82
- xx2 = min(cb[i, 2], cbox[2])
83
- yy2 = min(cb[i, 3], cbox[3])
84
- inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
85
- a1 = (cb[i, 2] - cb[i, 0]) * (cb[i, 3] - cb[i, 1])
86
- a2 = (cbox[2] - cbox[0]) * (cbox[3] - cbox[1])
87
- iou = inter / (a1 + a2 - inter + 1e-9)
88
- if iou > best_iou:
89
- best_iou = iou
90
- matched = ci
91
- if matched >= 0:
92
- clusters[matched].append(i)
93
- idxs = clusters[matched]
94
- w = cs[idxs]
95
- cboxes[matched] = (cb[idxs] * w[:, None]).sum(0) / w.sum()
96
- else:
97
- clusters.append([i])
98
- cboxes.append(cb[i].copy())
99
-
100
- for ci, idxs in enumerate(clusters):
101
- fused_b.append(cboxes[ci])
102
- fused_s.append(cs[idxs].mean())
103
- fused_l.append(cls)
104
-
105
- if not fused_b:
106
- return np.empty((0, 4)), np.empty(0), np.empty(0)
107
- return np.array(fused_b), np.array(fused_s), np.array(fused_l)
108
-
109
-
110
- def _wbf_single(boxes_list, scores_list, iou_thr=0.45, skip_thr=0.0001):
111
- """Weighted Boxes Fusion (single-class). Boxes in [0,1] normalized coords."""
112
- if not boxes_list:
113
- return np.empty((0, 4)), np.empty(0)
114
-
115
- all_b, all_s = [], []
116
- for bx, sc in zip(boxes_list, scores_list):
117
- for i in range(len(bx)):
118
- if sc[i] < skip_thr:
119
- continue
120
- all_b.append(bx[i])
121
- all_s.append(sc[i])
122
-
123
- if not all_b:
124
- return np.empty((0, 4)), np.empty(0)
125
-
126
- all_b = np.array(all_b)
127
- all_s = np.array(all_s)
128
- order = all_s.argsort()[::-1]
129
- all_b, all_s = all_b[order], all_s[order]
130
-
131
- clusters, cboxes = [], []
132
- for i in range(len(all_b)):
133
- matched, best_iou = -1, iou_thr
134
- for ci, cbox in enumerate(cboxes):
135
- xx1 = max(all_b[i, 0], cbox[0])
136
- yy1 = max(all_b[i, 1], cbox[1])
137
- xx2 = min(all_b[i, 2], cbox[2])
138
- yy2 = min(all_b[i, 3], cbox[3])
139
- inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
140
- a1 = (all_b[i, 2] - all_b[i, 0]) * (all_b[i, 3] - all_b[i, 1])
141
- a2 = (cbox[2] - cbox[0]) * (cbox[3] - cbox[1])
142
- iou = inter / (a1 + a2 - inter + 1e-9)
143
- if iou > best_iou:
144
- best_iou = iou
145
- matched = ci
146
- if matched >= 0:
147
- clusters[matched].append(i)
148
- idxs = clusters[matched]
149
- w = all_s[idxs]
150
- cboxes[matched] = (all_b[idxs] * w[:, None]).sum(0) / w.sum()
151
- else:
152
- clusters.append([i])
153
- cboxes.append(all_b[i].copy())
154
-
155
- fused_b, fused_s = [], []
156
- for ci, idxs in enumerate(clusters):
157
- fused_b.append(cboxes[ci])
158
- fused_s.append(all_s[idxs].mean())
159
-
160
- if not fused_b:
161
- return np.empty((0, 4)), np.empty(0)
162
- return np.array(fused_b), np.array(fused_s)
163
-
164
-
165
- class BoundingBox(BaseModel):
166
- x1: int
167
- y1: int
168
- x2: int
169
- y2: int
170
- cls_id: int
171
- conf: float
172
-
173
-
174
- class TVFrameResult(BaseModel):
175
- frame_id: int
176
- boxes: list[BoundingBox]
177
- keypoints: list[tuple[int, int]]
178
-
179
-
180
- class Miner:
181
- def __init__(self, path_hf_repo: Path) -> None:
182
- self.path_hf_repo = path_hf_repo
183
-
184
- # Vehicle model (YOLO11s, 4 classes)
185
- self.veh_session = ort.InferenceSession(
186
- str(path_hf_repo / "vehicle_weights.onnx"),
187
- providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
188
- )
189
- self.veh_input_name = self.veh_session.get_inputs()[0].name
190
-
191
- # Person model (YOLO11s, 1 class)
192
- self.per_session = ort.InferenceSession(
193
- str(path_hf_repo / "person_weights.onnx"),
194
- providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
195
- )
196
- self.per_input_name = self.per_session.get_inputs()[0].name
197
- per_shape = self.per_session.get_inputs()[0].shape
198
- self.per_h = int(per_shape[2])
199
- self.per_w = int(per_shape[3])
200
-
201
- def __repr__(self) -> str:
202
- return "Unified Miner v1 — dual-model vehicle+person"
203
-
204
- # ── Vehicle preprocessing (letterbox) ───────────────────────────────────
205
-
206
- def _veh_letterbox(self, img):
207
- h, w = img.shape[:2]
208
- r = min(VEH_IMG_SIZE / h, VEH_IMG_SIZE / w)
209
- nw, nh = int(round(w * r)), int(round(h * r))
210
- img_r = cv2.resize(img, (nw, nh), interpolation=cv2.INTER_LINEAR)
211
- dw, dh = VEH_IMG_SIZE - nw, VEH_IMG_SIZE - nh
212
- pl, pt = dw // 2, dh // 2
213
- img_p = cv2.copyMakeBorder(
214
- img_r, pt, dh - pt, pl, dw - pl,
215
- cv2.BORDER_CONSTANT, value=(114, 114, 114),
216
- )
217
- return img_p, r, pl, pt
218
-
219
- def _veh_preprocess(self, image_bgr):
220
- img_p, ratio, pl, pt = self._veh_letterbox(image_bgr)
221
- rgb = cv2.cvtColor(img_p, cv2.COLOR_BGR2RGB)
222
- inp = rgb.astype(np.float32) / 255.0
223
- inp = np.ascontiguousarray(inp.transpose(2, 0, 1)[np.newaxis])
224
- return inp, ratio, pl, pt
225
-
226
- def _veh_decode(self, raw, ratio, pl, pt, ow, oh, conf_thresh):
227
- pred = raw[0]
228
- if pred.shape[0] < pred.shape[1]:
229
- pred = pred.T
230
- cls_scores = pred[:, 4:]
231
- cls_ids = np.argmax(cls_scores, axis=1)
232
- confs = np.max(cls_scores, axis=1)
233
- mask = confs >= conf_thresh
234
- if not mask.any():
235
- return np.empty((0, 4)), np.empty(0), np.empty(0, dtype=int)
236
- bx, confs, cls_ids = pred[mask, :4], confs[mask], cls_ids[mask]
237
- cx, cy, bw, bh = bx[:, 0], bx[:, 1], bx[:, 2], bx[:, 3]
238
- x1 = np.clip((cx - bw / 2 - pl) / ratio, 0, ow)
239
- y1 = np.clip((cy - bh / 2 - pt) / ratio, 0, oh)
240
- x2 = np.clip((cx + bw / 2 - pl) / ratio, 0, ow)
241
- y2 = np.clip((cy + bh / 2 - pt) / ratio, 0, oh)
242
- return np.stack([x1, y1, x2, y2], axis=1), confs, cls_ids
243
-
244
- def _veh_run_pass(self, image_bgr, conf_thresh):
245
- oh, ow = image_bgr.shape[:2]
246
- inp, ratio, pl, pt = self._veh_preprocess(image_bgr)
247
- raw = self.veh_session.run(None, {self.veh_input_name: inp})[0]
248
- return self._veh_decode(raw, ratio, pl, pt, ow, oh, conf_thresh)
249
-
250
- def _infer_vehicle(self, image_bgr):
251
- oh, ow = image_bgr.shape[:2]
252
- all_b, all_s, all_l = [], [], []
253
-
254
- def _collect(boxes, confs, cls_ids):
255
- if len(boxes) == 0:
256
- return
257
- out_cls = np.array([VEH_MODEL_TO_OUT[int(c)] for c in cls_ids])
258
- norm = boxes.copy()
259
- norm[:, [0, 2]] /= ow
260
- norm[:, [1, 3]] /= oh
261
- norm = np.clip(norm, 0, 1)
262
- all_b.append(norm)
263
- all_s.append(confs)
264
- all_l.append(out_cls)
265
-
266
- # Pass 1: original
267
- _collect(*self._veh_run_pass(image_bgr, VEH_TTA_CONF))
268
- # Pass 2: hflip
269
- flipped = cv2.flip(image_bgr, 1)
270
- bx, sc, cl = self._veh_run_pass(flipped, VEH_TTA_CONF)
271
- if len(bx):
272
- bx[:, 0], bx[:, 2] = ow - bx[:, 2], ow - bx[:, 0]
273
- _collect(bx, sc, cl)
274
-
275
- if not all_b:
276
- return []
277
-
278
- fb, fs, fl = _wbf_multi(all_b, all_s, all_l, iou_thr=VEH_WBF_IOU, skip_thr=WBF_SKIP_THR)
279
- if len(fb) == 0:
280
- return []
281
-
282
- fb[:, [0, 2]] *= ow
283
- fb[:, [1, 3]] *= oh
284
-
285
- keep = np.array([
286
- fs[i] >= VEH_CONF_PER_CLASS.get(int(fl[i]), VEH_CONF_DEFAULT)
287
- for i in range(len(fs))
288
- ])
289
- if not keep.any():
290
- return []
291
- fb, fs, fl = fb[keep], fs[keep], fl[keep]
292
-
293
- out = []
294
- for i in range(len(fb)):
295
- b = fb[i]
296
- out.append(BoundingBox(
297
- x1=max(0, min(ow, math.floor(b[0]))),
298
- y1=max(0, min(oh, math.floor(b[1]))),
299
- x2=max(0, min(ow, math.ceil(b[2]))),
300
- y2=max(0, min(oh, math.ceil(b[3]))),
301
- cls_id=int(fl[i]),
302
- conf=max(0.0, min(1.0, float(fs[i]))),
303
- ))
304
- return out
305
-
306
- # ── Person preprocessing (stretch resize) ──────────────────────────────
307
-
308
- def _per_preprocess(self, image_bgr):
309
- rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
310
- resized = cv2.resize(rgb, (self.per_w, self.per_h))
311
- x = resized.astype(np.float32) / 255.0
312
- x = np.transpose(x, (2, 0, 1))[None, ...]
313
- return x
314
-
315
- def _per_decode(self, raw, oh, ow, conf_thresh):
316
- pred = raw[0]
317
- if pred.ndim != 2:
318
- return np.empty((0, 4)), np.empty(0)
319
- if pred.shape[0] < pred.shape[1]:
320
- pred = pred.T
321
- if pred.shape[1] < 5:
322
- return np.empty((0, 4)), np.empty(0)
323
- cls_scores = pred[:, 4:]
324
- confs = np.max(cls_scores, axis=1)
325
- keep = confs >= conf_thresh
326
- boxes, confs = pred[keep, :4], confs[keep]
327
- if len(boxes) == 0:
328
- return np.empty((0, 4)), np.empty(0)
329
- sx, sy = ow / float(self.per_w), oh / float(self.per_h)
330
- cx, cy, bw, bh = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
331
- x1 = np.clip((cx - bw / 2) * sx, 0, ow)
332
- y1 = np.clip((cy - bh / 2) * sy, 0, oh)
333
- x2 = np.clip((cx + bw / 2) * sx, 0, ow)
334
- y2 = np.clip((cy + bh / 2) * sy, 0, oh)
335
- return np.stack([x1, y1, x2, y2], axis=1), confs
336
-
337
- def _per_run_pass(self, image_bgr, conf_thresh):
338
- oh, ow = image_bgr.shape[:2]
339
- inp = self._per_preprocess(image_bgr)
340
- raw = self.per_session.run(None, {self.per_input_name: inp})[0]
341
- return self._per_decode(raw, oh, ow, conf_thresh)
342
-
343
- def _infer_person(self, image_bgr):
344
- oh, ow = image_bgr.shape[:2]
345
- all_b, all_s = [], []
346
-
347
- def _collect(boxes, confs):
348
- if len(boxes) == 0:
349
- return
350
- norm = boxes.copy()
351
- norm[:, [0, 2]] /= ow
352
- norm[:, [1, 3]] /= oh
353
- norm = np.clip(norm, 0, 1)
354
- all_b.append(norm)
355
- all_s.append(confs)
356
-
357
- # Pass 1: original
358
- _collect(*self._per_run_pass(image_bgr, PER_TTA_CONF))
359
- # Pass 2: hflip
360
- flipped = cv2.flip(image_bgr, 1)
361
- bx, sc = self._per_run_pass(flipped, PER_TTA_CONF)
362
- if len(bx):
363
- bx[:, 0], bx[:, 2] = ow - bx[:, 2], ow - bx[:, 0]
364
- _collect(bx, sc)
365
-
366
- if not all_b:
367
- return []
368
-
369
- fb, fs = _wbf_single(all_b, all_s, iou_thr=PER_WBF_IOU, skip_thr=WBF_SKIP_THR)
370
- if len(fb) == 0:
371
- return []
372
-
373
- fb[:, [0, 2]] *= ow
374
- fb[:, [1, 3]] *= oh
375
-
376
- keep = fs >= PER_CONF
377
- fb, fs = fb[keep], fs[keep]
378
-
379
- out = []
380
- for i in range(len(fb)):
381
- b = fb[i]
382
- out.append(BoundingBox(
383
- x1=max(0, min(ow, math.floor(b[0]))),
384
- y1=max(0, min(oh, math.floor(b[1]))),
385
- x2=max(0, min(ow, math.ceil(b[2]))),
386
- y2=max(0, min(oh, math.ceil(b[3]))),
387
- cls_id=0,
388
- conf=max(0.0, min(1.0, float(fs[i]))),
389
- ))
390
- return out
391
-
392
- # ── Unified inference ───────────────────────────────────────────────────
393
-
394
- def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
395
- vehicle_boxes = self._infer_vehicle(image_bgr)
396
- person_boxes = self._infer_person(image_bgr)
397
- return vehicle_boxes + person_boxes
398
-
399
-
400
- # -- Replay buffer -------------------------------------------------------
401
- REPLAY_DIR = Path("/home/miner/replay_buffer")
402
- REPLAY_MAX = 100
403
-
404
- def _replay_save(self, batch_images, results):
405
- """Save validator query images + our predictions to replay buffer (background)."""
406
- try:
407
- ts = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S_%f")
408
- query_dir = self.REPLAY_DIR / ts
409
- query_dir.mkdir(parents=True, exist_ok=True)
410
-
411
- # Save each image as JPEG
412
- for i, img in enumerate(batch_images):
413
- cv2.imwrite(str(query_dir / f"img_{i:03d}.jpg"), img,
414
- [cv2.IMWRITE_JPEG_QUALITY, 95])
415
-
416
- # Save predictions as JSON
417
- preds = []
418
- for r in results:
419
- preds.append({
420
- "frame_id": r.frame_id,
421
- "boxes": [b.model_dump() for b in r.boxes],
422
- })
423
- meta = {
424
- "timestamp": ts,
425
- "num_images": len(batch_images),
426
- "image_shapes": [list(img.shape) for img in batch_images],
427
- "predictions": preds,
428
- }
429
- (query_dir / "meta.json").write_text(json.dumps(meta, indent=2))
430
-
431
- # Prune old entries
432
- self._replay_prune()
433
- except Exception:
434
- pass # never break inference
435
-
436
- def _replay_prune(self):
437
- """Keep only the most recent REPLAY_MAX queries."""
438
- try:
439
- dirs = sorted(
440
- [d for d in self.REPLAY_DIR.iterdir() if d.is_dir()],
441
- key=lambda d: d.name,
442
- )
443
- if len(dirs) > self.REPLAY_MAX:
444
- import shutil
445
- for old in dirs[: len(dirs) - self.REPLAY_MAX]:
446
- shutil.rmtree(old, ignore_errors=True)
447
- except Exception:
448
- pass
449
-
450
- def predict_batch(
451
- self,
452
- batch_images: list[ndarray],
453
- offset: int,
454
- n_keypoints: int,
455
- ) -> list[TVFrameResult]:
456
- results: list[TVFrameResult] = []
457
- for idx, image in enumerate(batch_images):
458
- boxes = self._infer_single(image)
459
- keypoints = [(0, 0) for _ in range(max(0, int(n_keypoints)))]
460
- results.append(TVFrameResult(
461
- frame_id=offset + idx, boxes=boxes, keypoints=keypoints,
462
- ))
463
- # Save to replay buffer (background thread -- no latency impact)
464
- threading.Thread(
465
- target=self._replay_save,
466
- args=(batch_images, results),
467
- daemon=True,
468
- ).start()
469
-
470
- return results