meaculpitt commited on
Commit
2fcf3b1
·
verified ·
1 Parent(s): 37095dc

deploy push for crime (deploy)

Browse files
Files changed (2) hide show
  1. miner.py +219 -185
  2. weights_rfdetr.onnx +3 -0
miner.py CHANGED
@@ -1,18 +1,25 @@
1
- # build-marker: v1-yolo26s-1280-tta
2
- """SN44 crime detection miner — single-element chute for manak0/Detect-crime.
3
-
4
- Adapted from beverage v5 miner.py with these crime-specific changes:
5
- - class_names = ["balaclava","hoodie","glove","bat","spray paint","graffiti"]
6
- - cls_remap = identity (model trained natively in validator class order, no remap needed)
7
- - conf_threshold = 0.52 (alfred-aligned, slightly looser than beverage's 0.55)
8
- - iou_thresh = 0.4 (slightly tighter than beverage's 0.5)
9
- - min_box_area = 196 (14x14 px², larger than beverage's 100; kills tiny FPs aggressively)
10
- - weights.onnx is yolo26s e2e at 1280x1280 input
11
-
12
- Pipeline:
13
- preprocess (letterbox 1280, cubic upscale) -> ONNX (e2e [1,300,6])
14
- -> conf filter -> per-class hard NMS -> cross-class dedup -> geometry filter
15
- -> TTA (h-flip + conf-boost on consensus) -> BoundingBox list
 
 
 
 
 
 
 
16
  """
17
  import math
18
  from pathlib import Path
@@ -39,78 +46,47 @@ class TVFrameResult(BaseModel):
39
  keypoints: list[tuple[int, int]]
40
 
41
 
42
- class Miner:
43
- """yolo26s e2e ONNX miner for manak0/Detect-crime.
44
- Chute platform calls predict_batch(batch_images, offset, n_keypoints).
45
- """
46
 
47
- def __init__(self, path_hf_repo) -> None:
48
- self.path_hf_repo = Path(path_hf_repo)
49
 
50
- # Validator class order — model trained natively in this order so identity remap.
 
 
 
 
 
51
  self.class_names = ["balaclava", "hoodie", "glove", "bat", "spray paint", "graffiti"]
52
- self.cls_remap = np.arange(len(self.class_names), dtype=np.int32)
53
-
54
- try:
55
- ort.preload_dlls()
56
- except Exception:
57
- pass
58
 
59
  sess_options = ort.SessionOptions()
60
  sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
61
-
62
  try:
63
  self.session = ort.InferenceSession(
64
- str(self.path_hf_repo / "weights.onnx"),
65
  sess_options=sess_options,
66
  providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
67
  )
68
  except Exception:
69
  self.session = ort.InferenceSession(
70
- str(self.path_hf_repo / "weights.onnx"),
71
  sess_options=sess_options,
72
  providers=["CPUExecutionProvider"],
73
  )
74
-
75
  self.input_name = self.session.get_inputs()[0].name
76
  self.output_names = [o.name for o in self.session.get_outputs()]
77
-
78
- # Match exported ONNX resolution.
79
  self.input_h = 1280
80
  self.input_w = 1280
81
-
82
- # alfred-aligned crime thresholds.
83
  self.conf_threshold = 0.52
84
- self.iou_thresh = 0.4 # per-class hard NMS
85
- self.cross_iou_thresh = 0.7 # cross-class dedup
86
  self.max_det = 150
87
  self.use_tta = True
88
-
89
- # Geometry filters (alfred crime values).
90
- self.min_box_area = 196 # 14x14 px²
91
  self.min_side = 8
92
  self.max_aspect_ratio = 8.0
93
 
94
- # GPU warmup.
95
- warm = np.zeros((self.input_h, self.input_w, 3), dtype=np.uint8)
96
- for _ in range(5):
97
- try:
98
- self._infer_single(warm)
99
- except Exception:
100
- break
101
-
102
- def __repr__(self) -> str:
103
- return (
104
- f"CrimeMiner v1 input={self.input_h}x{self.input_w} "
105
- f"classes={len(self.class_names)} use_tta={self.use_tta} "
106
- f"providers={self.session.get_providers()}"
107
- )
108
-
109
- # ---------------------------------------------------------------- preproc
110
- def _letterbox(self, image: ndarray) -> tuple[ndarray, float, tuple[float, float]]:
111
- """Aspect-preserving resize + 114-grey pad to (input_h, input_w).
112
- Cubic when upscaling (small-object fidelity), linear when downscaling.
113
- """
114
  h, w = image.shape[:2]
115
  ratio = min(self.input_w / w, self.input_h / h)
116
  nw, nh = int(round(w * ratio)), int(round(h * ratio))
@@ -122,51 +98,41 @@ class Miner:
122
  canvas = np.full((self.input_h, self.input_w, 3), 114, dtype=np.uint8)
123
  dy = (self.input_h - nh) // 2
124
  dx = (self.input_w - nw) // 2
125
- canvas[dy:dy + nh, dx:dx + nw] = resized
126
  return canvas, ratio, (float(dx), float(dy))
127
 
128
- def _preprocess(self, image_bgr: ndarray):
129
  canvas, ratio, pad = self._letterbox(image_bgr)
130
  rgb = cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB)
131
  x = (rgb.astype(np.float32) / 255.0).transpose(2, 0, 1)[None, ...]
132
  return np.ascontiguousarray(x, dtype=np.float32), ratio, pad
133
 
134
- # ---------------------------------------------------------------- nms helpers
135
  @staticmethod
136
- def _hard_nms(boxes: ndarray, scores: ndarray, iou_thresh: float) -> ndarray:
137
  n = len(boxes)
138
- if n == 0:
139
- return np.array([], dtype=np.intp)
140
  order = np.argsort(scores)[::-1]
141
- keep: list[int] = []
142
- suppressed = np.zeros(n, dtype=bool)
143
  for i in range(n):
144
  idx = order[i]
145
- if suppressed[idx]:
146
- continue
147
  keep.append(int(idx))
148
  bi = boxes[idx]
149
  for k in range(i + 1, n):
150
  jdx = order[k]
151
- if suppressed[jdx]:
152
- continue
153
  bj = boxes[jdx]
154
- xx1 = max(bi[0], bj[0]); yy1 = max(bi[1], bj[1])
155
- xx2 = min(bi[2], bj[2]); yy2 = min(bi[3], bj[3])
156
- inter = max(0.0, xx2 - xx1) * max(0.0, yy2 - yy1)
157
- ai = (bi[2] - bi[0]) * (bi[3] - bi[1])
158
- aj = (bj[2] - bj[0]) * (bj[3] - bj[1])
159
  iou = inter / (ai + aj - inter + 1e-7)
160
- if iou > iou_thresh:
161
- suppressed[jdx] = True
162
  return np.array(keep, dtype=np.intp)
163
 
164
- def _per_class_hard_nms(
165
- self, boxes: ndarray, scores: ndarray, cls_ids: ndarray, iou_thresh: float
166
- ) -> ndarray:
167
- if len(boxes) == 0:
168
- return np.array([], dtype=np.intp)
169
- all_keep: list[int] = []
170
  for c in np.unique(cls_ids):
171
  mask = cls_ids == c
172
  indices = np.where(mask)[0]
@@ -176,168 +142,236 @@ class Miner:
176
  return np.array(all_keep, dtype=np.intp)
177
 
178
  @staticmethod
179
- def _cross_class_dedup(
180
- boxes: ndarray, scores: ndarray, cls_ids: ndarray, iou_thresh: float
181
- ) -> tuple[ndarray, ndarray, ndarray]:
182
- """Suppress high-overlap duplicates across classes (FP reducer)."""
183
  n = len(boxes)
184
- if n <= 1:
185
- return boxes, scores, cls_ids
186
- areas = np.maximum(0.0, boxes[:, 2] - boxes[:, 0]) * np.maximum(0.0, boxes[:, 3] - boxes[:, 1])
187
  order = np.lexsort((-scores, -areas))
188
- suppressed = np.zeros(n, dtype=bool)
189
- keep: list[int] = []
190
  for i in order:
191
- if suppressed[i]:
192
- continue
193
  keep.append(int(i))
194
  bi = boxes[i]
195
  xx1 = np.maximum(bi[0], boxes[:, 0]); yy1 = np.maximum(bi[1], boxes[:, 1])
196
  xx2 = np.minimum(bi[2], boxes[:, 2]); yy2 = np.minimum(bi[3], boxes[:, 3])
197
- inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
198
- ai = max(1e-7, float((bi[2] - bi[0]) * (bi[3] - bi[1])))
199
  iou = inter / (ai + areas - inter + 1e-7)
200
- dup = iou > iou_thresh
201
- dup[i] = False
202
  suppressed |= dup
203
  kept = np.array(keep, dtype=np.intp)
204
  return boxes[kept], scores[kept], cls_ids[kept]
205
 
206
  @staticmethod
207
- def _max_score_per_cluster(
208
- coords: ndarray, scores: ndarray, keep_idx: ndarray, iou_thresh: float
209
- ) -> ndarray:
210
- """For each kept box, return max original score among any overlapping cluster member."""
211
- if len(keep_idx) == 0:
212
- return np.array([], dtype=np.float32)
213
  out = np.empty(len(keep_idx), dtype=np.float32)
214
  for j, idx in enumerate(keep_idx):
215
  bi = coords[idx]
216
  xx1 = np.maximum(bi[0], coords[:, 0]); yy1 = np.maximum(bi[1], coords[:, 1])
217
  xx2 = np.minimum(bi[2], coords[:, 2]); yy2 = np.minimum(bi[3], coords[:, 3])
218
- inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
219
- ai = (bi[2] - bi[0]) * (bi[3] - bi[1])
220
- aj = (coords[:, 2] - coords[:, 0]) * (coords[:, 3] - coords[:, 1])
221
  iou = inter / (ai + aj - inter + 1e-7)
222
  out[j] = float(np.max(scores[iou >= iou_thresh]))
223
  return out
224
 
225
- # ---------------------------------------------------------------- inference
226
- def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
227
  inp, ratio, (dx, dy) = self._preprocess(image_bgr)
228
  out = self.session.run(self.output_names, {self.input_name: inp})[0]
229
- if out.ndim == 3:
230
- out = out[0]
231
-
232
  confs = out[:, 4].astype(np.float32)
233
  keep = confs >= self.conf_threshold
234
- if not keep.any():
235
- return []
236
  out = out[keep]
237
-
238
  boxes = out[:, :4].astype(np.float32).copy()
239
  confs = out[:, 4].astype(np.float32)
240
  cls_ids = self.cls_remap[out[:, 5].astype(np.int32)]
241
-
242
- # Reverse letterbox: model-space xyxy -> original-image xyxy
243
  boxes[:, [0, 2]] = (boxes[:, [0, 2]] - dx) / ratio
244
  boxes[:, [1, 3]] = (boxes[:, [1, 3]] - dy) / ratio
245
-
246
- orig_h, orig_w = image_bgr.shape[:2]
247
- boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, orig_w - 1)
248
- boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, orig_h - 1)
249
-
250
  if len(boxes) > 1:
251
  keep_idx = self._per_class_hard_nms(boxes, confs, cls_ids, self.iou_thresh)
252
  keep_idx = keep_idx[: self.max_det]
253
- boxes = boxes[keep_idx]
254
- confs = confs[keep_idx]
255
- cls_ids = cls_ids[keep_idx]
256
- boxes, confs, cls_ids = self._cross_class_dedup(
257
- boxes, confs, cls_ids, self.cross_iou_thresh
258
- )
259
-
260
- return self._to_boundingboxes(boxes, confs, cls_ids, orig_w, orig_h)
261
 
262
- def _infer_tta(self, image_bgr: ndarray) -> list[BoundingBox]:
263
- """H-flip TTA: union(orig, flipped) -> per-class NMS -> conf-boost."""
264
  boxes_orig = self._infer_single(image_bgr)
265
-
266
  h, w = image_bgr.shape[:2]
267
  flipped = cv2.flip(image_bgr, 1)
268
  boxes_flip_raw = self._infer_single(flipped)
269
- boxes_flip = [
270
- BoundingBox(x1=w - b.x2, y1=b.y1, x2=w - b.x1, y2=b.y2,
271
- cls_id=b.cls_id, conf=b.conf)
272
- for b in boxes_flip_raw
273
- ]
274
-
275
  all_boxes = boxes_orig + boxes_flip
276
- if not all_boxes:
277
- return []
278
-
279
  coords = np.array([[b.x1, b.y1, b.x2, b.y2] for b in all_boxes], dtype=np.float32)
280
  scores = np.array([b.conf for b in all_boxes], dtype=np.float32)
281
  cls_ids = np.array([b.cls_id for b in all_boxes], dtype=np.int32)
282
-
283
  keep_idx = self._per_class_hard_nms(coords, scores, cls_ids, self.iou_thresh)
284
- if len(keep_idx) == 0:
285
- return []
286
  keep_idx = keep_idx[: self.max_det]
287
  boosted = self._max_score_per_cluster(coords, scores, keep_idx, self.iou_thresh)
288
-
289
- out_boxes: list[BoundingBox] = []
290
  for j, idx in enumerate(keep_idx):
291
  b = all_boxes[idx]
292
- out_boxes.append(BoundingBox(
293
- x1=b.x1, y1=b.y1, x2=b.x2, y2=b.y2,
294
- cls_id=b.cls_id,
295
- conf=max(0.0, min(1.0, float(boosted[j]))),
296
- ))
297
- return out_boxes
298
 
299
- def _to_boundingboxes(
300
- self, boxes: ndarray, confs: ndarray, cls_ids: ndarray,
301
- orig_w: int, orig_h: int,
302
- ) -> list[BoundingBox]:
303
- out: list[BoundingBox] = []
304
  for i in range(len(boxes)):
305
  x1, y1, x2, y2 = boxes[i]
306
  ix1 = max(0, min(orig_w, math.floor(x1)))
307
  iy1 = max(0, min(orig_h, math.floor(y1)))
308
  ix2 = max(0, min(orig_w, math.ceil(x2)))
309
  iy2 = max(0, min(orig_h, math.ceil(y2)))
310
- if ix2 <= ix1 or iy2 <= iy1:
311
- continue
312
  bw, bh = ix2 - ix1, iy2 - iy1
313
- if bw * bh < self.min_box_area:
314
- continue
315
- if min(bw, bh) < self.min_side:
316
- continue
317
  ar = max(bw / max(bh, 1), bh / max(bw, 1))
318
- if ar > self.max_aspect_ratio:
319
- continue
320
- out.append(BoundingBox(
321
- x1=ix1, y1=iy1, x2=ix2, y2=iy2,
322
- cls_id=int(cls_ids[i]),
323
- conf=max(0.0, min(1.0, float(confs[i]))),
324
- ))
325
  return out
326
 
327
- # ---------------------------------------------------------------- entry
328
- def predict_batch(
329
- self,
330
- batch_images: list[ndarray],
331
- offset: int,
332
- n_keypoints: int,
333
- ) -> list[TVFrameResult]:
334
- infer = self._infer_tta if self.use_tta else self._infer_single
335
- results: list[TVFrameResult] = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  for idx, image in enumerate(batch_images):
337
- boxes = infer(image)
 
 
 
 
338
  results.append(TVFrameResult(
339
  frame_id=offset + idx,
340
- boxes=boxes,
341
  keypoints=[(0, 0) for _ in range(max(0, int(n_keypoints)))],
342
  ))
343
  return results
 
1
+ # build-marker: v2-ensemble-alfred-rfdetr
2
+ """SN44 crime detection miner — ENSEMBLE of alfred yolo26n + RF-DETR base.
3
+
4
+ Composes two internal miners with different preprocess/inference pipelines:
5
+ _AlfredMiner: yolo26n e2e ONNX, letterbox 1280 + /255, TTA (h-flip + conf boost)
6
+ _RFDETRMiner: rfdetr base e2e ONNX, stretch 1288 + ImageNet normalize, no TTA
7
+
8
+ Class routing (final union after per-class NMS@IoU=0.5, alfred wins conflicts):
9
+ cls0 balaclava : BOTH (alfred priority on conflicts)
10
+ cls1 hoodie : BOTH (alfred priority on conflicts)
11
+ cls2 glove : RF-DETR only
12
+ cls3 bat : RF-DETR only
13
+ cls4 spray paint: RF-DETR only
14
+ cls5 graffiti : alfred only
15
+
16
+ Conf threshold 0.52 is applied INSIDE each internal miner; the union is the
17
+ already-thresholded boxes from each. This matches alfred's existing per-class
18
+ calibration (TTA conf-boost happens against the 0.52 threshold).
19
+
20
+ ONNX file names expected in path_hf_repo:
21
+ weights.onnx - alfred yolo26n e2e [1,300,6] in input-pixel coords (1280)
22
+ weights_rfdetr.onnx - RF-DETR base e2e [1,300,6] in input-pixel coords (1288)
23
  """
24
  import math
25
  from pathlib import Path
 
46
  keypoints: list[tuple[int, int]]
47
 
48
 
49
+ _IMAGENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
50
+ _IMAGENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32)
 
 
51
 
 
 
52
 
53
+ # ============================================================ ALFRED PATH
54
+ # Verbatim alfred-style pipeline (letterbox + TTA). Returns list[BoundingBox]
55
+ # already conf-filtered at 0.52, geometry-filtered, NMS'd, cross-class deduped.
56
+ class _AlfredMiner:
57
+ def __init__(self, path_hf_repo: Path):
58
+ self.path_hf_repo = path_hf_repo
59
  self.class_names = ["balaclava", "hoodie", "glove", "bat", "spray paint", "graffiti"]
60
+ self.cls_remap = np.arange(6, dtype=np.int32)
 
 
 
 
 
61
 
62
  sess_options = ort.SessionOptions()
63
  sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
 
64
  try:
65
  self.session = ort.InferenceSession(
66
+ str(path_hf_repo / "weights.onnx"),
67
  sess_options=sess_options,
68
  providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
69
  )
70
  except Exception:
71
  self.session = ort.InferenceSession(
72
+ str(path_hf_repo / "weights.onnx"),
73
  sess_options=sess_options,
74
  providers=["CPUExecutionProvider"],
75
  )
 
76
  self.input_name = self.session.get_inputs()[0].name
77
  self.output_names = [o.name for o in self.session.get_outputs()]
 
 
78
  self.input_h = 1280
79
  self.input_w = 1280
 
 
80
  self.conf_threshold = 0.52
81
+ self.iou_thresh = 0.4
82
+ self.cross_iou_thresh = 0.7
83
  self.max_det = 150
84
  self.use_tta = True
85
+ self.min_box_area = 196
 
 
86
  self.min_side = 8
87
  self.max_aspect_ratio = 8.0
88
 
89
+ def _letterbox(self, image):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  h, w = image.shape[:2]
91
  ratio = min(self.input_w / w, self.input_h / h)
92
  nw, nh = int(round(w * ratio)), int(round(h * ratio))
 
98
  canvas = np.full((self.input_h, self.input_w, 3), 114, dtype=np.uint8)
99
  dy = (self.input_h - nh) // 2
100
  dx = (self.input_w - nw) // 2
101
+ canvas[dy:dy+nh, dx:dx+nw] = resized
102
  return canvas, ratio, (float(dx), float(dy))
103
 
104
+ def _preprocess(self, image_bgr):
105
  canvas, ratio, pad = self._letterbox(image_bgr)
106
  rgb = cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB)
107
  x = (rgb.astype(np.float32) / 255.0).transpose(2, 0, 1)[None, ...]
108
  return np.ascontiguousarray(x, dtype=np.float32), ratio, pad
109
 
 
110
  @staticmethod
111
+ def _hard_nms(boxes, scores, iou_thresh):
112
  n = len(boxes)
113
+ if n == 0: return np.array([], dtype=np.intp)
 
114
  order = np.argsort(scores)[::-1]
115
+ keep, suppressed = [], np.zeros(n, dtype=bool)
 
116
  for i in range(n):
117
  idx = order[i]
118
+ if suppressed[idx]: continue
 
119
  keep.append(int(idx))
120
  bi = boxes[idx]
121
  for k in range(i + 1, n):
122
  jdx = order[k]
123
+ if suppressed[jdx]: continue
 
124
  bj = boxes[jdx]
125
+ xx1, yy1 = max(bi[0], bj[0]), max(bi[1], bj[1])
126
+ xx2, yy2 = min(bi[2], bj[2]), min(bi[3], bj[3])
127
+ inter = max(0.0, xx2-xx1) * max(0.0, yy2-yy1)
128
+ ai = (bi[2]-bi[0])*(bi[3]-bi[1]); aj = (bj[2]-bj[0])*(bj[3]-bj[1])
 
129
  iou = inter / (ai + aj - inter + 1e-7)
130
+ if iou > iou_thresh: suppressed[jdx] = True
 
131
  return np.array(keep, dtype=np.intp)
132
 
133
+ def _per_class_hard_nms(self, boxes, scores, cls_ids, iou_thresh):
134
+ if len(boxes) == 0: return np.array([], dtype=np.intp)
135
+ all_keep = []
 
 
 
136
  for c in np.unique(cls_ids):
137
  mask = cls_ids == c
138
  indices = np.where(mask)[0]
 
142
  return np.array(all_keep, dtype=np.intp)
143
 
144
  @staticmethod
145
+ def _cross_class_dedup(boxes, scores, cls_ids, iou_thresh):
 
 
 
146
  n = len(boxes)
147
+ if n <= 1: return boxes, scores, cls_ids
148
+ areas = np.maximum(0.0, boxes[:, 2]-boxes[:, 0]) * np.maximum(0.0, boxes[:, 3]-boxes[:, 1])
 
149
  order = np.lexsort((-scores, -areas))
150
+ suppressed = np.zeros(n, dtype=bool); keep = []
 
151
  for i in order:
152
+ if suppressed[i]: continue
 
153
  keep.append(int(i))
154
  bi = boxes[i]
155
  xx1 = np.maximum(bi[0], boxes[:, 0]); yy1 = np.maximum(bi[1], boxes[:, 1])
156
  xx2 = np.minimum(bi[2], boxes[:, 2]); yy2 = np.minimum(bi[3], boxes[:, 3])
157
+ inter = np.maximum(0.0, xx2-xx1) * np.maximum(0.0, yy2-yy1)
158
+ ai = max(1e-7, float((bi[2]-bi[0])*(bi[3]-bi[1])))
159
  iou = inter / (ai + areas - inter + 1e-7)
160
+ dup = iou > iou_thresh; dup[i] = False
 
161
  suppressed |= dup
162
  kept = np.array(keep, dtype=np.intp)
163
  return boxes[kept], scores[kept], cls_ids[kept]
164
 
165
  @staticmethod
166
+ def _max_score_per_cluster(coords, scores, keep_idx, iou_thresh):
167
+ if len(keep_idx) == 0: return np.array([], dtype=np.float32)
 
 
 
 
168
  out = np.empty(len(keep_idx), dtype=np.float32)
169
  for j, idx in enumerate(keep_idx):
170
  bi = coords[idx]
171
  xx1 = np.maximum(bi[0], coords[:, 0]); yy1 = np.maximum(bi[1], coords[:, 1])
172
  xx2 = np.minimum(bi[2], coords[:, 2]); yy2 = np.minimum(bi[3], coords[:, 3])
173
+ inter = np.maximum(0.0, xx2-xx1) * np.maximum(0.0, yy2-yy1)
174
+ ai = (bi[2]-bi[0])*(bi[3]-bi[1])
175
+ aj = (coords[:, 2]-coords[:, 0]) * (coords[:, 3]-coords[:, 1])
176
  iou = inter / (ai + aj - inter + 1e-7)
177
  out[j] = float(np.max(scores[iou >= iou_thresh]))
178
  return out
179
 
180
+ def _infer_single(self, image_bgr):
 
181
  inp, ratio, (dx, dy) = self._preprocess(image_bgr)
182
  out = self.session.run(self.output_names, {self.input_name: inp})[0]
183
+ if out.ndim == 3: out = out[0]
 
 
184
  confs = out[:, 4].astype(np.float32)
185
  keep = confs >= self.conf_threshold
186
+ if not keep.any(): return []
 
187
  out = out[keep]
 
188
  boxes = out[:, :4].astype(np.float32).copy()
189
  confs = out[:, 4].astype(np.float32)
190
  cls_ids = self.cls_remap[out[:, 5].astype(np.int32)]
 
 
191
  boxes[:, [0, 2]] = (boxes[:, [0, 2]] - dx) / ratio
192
  boxes[:, [1, 3]] = (boxes[:, [1, 3]] - dy) / ratio
193
+ oh, ow = image_bgr.shape[:2]
194
+ boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, ow - 1)
195
+ boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, oh - 1)
 
 
196
  if len(boxes) > 1:
197
  keep_idx = self._per_class_hard_nms(boxes, confs, cls_ids, self.iou_thresh)
198
  keep_idx = keep_idx[: self.max_det]
199
+ boxes, confs, cls_ids = boxes[keep_idx], confs[keep_idx], cls_ids[keep_idx]
200
+ boxes, confs, cls_ids = self._cross_class_dedup(boxes, confs, cls_ids, self.cross_iou_thresh)
201
+ return self._to_boundingboxes(boxes, confs, cls_ids, ow, oh)
 
 
 
 
 
202
 
203
+ def _infer_tta(self, image_bgr):
 
204
  boxes_orig = self._infer_single(image_bgr)
 
205
  h, w = image_bgr.shape[:2]
206
  flipped = cv2.flip(image_bgr, 1)
207
  boxes_flip_raw = self._infer_single(flipped)
208
+ boxes_flip = [BoundingBox(x1=w-b.x2, y1=b.y1, x2=w-b.x1, y2=b.y2, cls_id=b.cls_id, conf=b.conf)
209
+ for b in boxes_flip_raw]
 
 
 
 
210
  all_boxes = boxes_orig + boxes_flip
211
+ if not all_boxes: return []
 
 
212
  coords = np.array([[b.x1, b.y1, b.x2, b.y2] for b in all_boxes], dtype=np.float32)
213
  scores = np.array([b.conf for b in all_boxes], dtype=np.float32)
214
  cls_ids = np.array([b.cls_id for b in all_boxes], dtype=np.int32)
 
215
  keep_idx = self._per_class_hard_nms(coords, scores, cls_ids, self.iou_thresh)
216
+ if len(keep_idx) == 0: return []
 
217
  keep_idx = keep_idx[: self.max_det]
218
  boosted = self._max_score_per_cluster(coords, scores, keep_idx, self.iou_thresh)
219
+ out = []
 
220
  for j, idx in enumerate(keep_idx):
221
  b = all_boxes[idx]
222
+ out.append(BoundingBox(x1=b.x1, y1=b.y1, x2=b.x2, y2=b.y2, cls_id=b.cls_id,
223
+ conf=max(0.0, min(1.0, float(boosted[j])))))
224
+ return out
 
 
 
225
 
226
+ def _to_boundingboxes(self, boxes, confs, cls_ids, orig_w, orig_h):
227
+ out = []
 
 
 
228
  for i in range(len(boxes)):
229
  x1, y1, x2, y2 = boxes[i]
230
  ix1 = max(0, min(orig_w, math.floor(x1)))
231
  iy1 = max(0, min(orig_h, math.floor(y1)))
232
  ix2 = max(0, min(orig_w, math.ceil(x2)))
233
  iy2 = max(0, min(orig_h, math.ceil(y2)))
234
+ if ix2 <= ix1 or iy2 <= iy1: continue
 
235
  bw, bh = ix2 - ix1, iy2 - iy1
236
+ if bw * bh < self.min_box_area: continue
237
+ if min(bw, bh) < self.min_side: continue
 
 
238
  ar = max(bw / max(bh, 1), bh / max(bw, 1))
239
+ if ar > self.max_aspect_ratio: continue
240
+ out.append(BoundingBox(x1=ix1, y1=iy1, x2=ix2, y2=iy2, cls_id=int(cls_ids[i]),
241
+ conf=max(0.0, min(1.0, float(confs[i])))))
 
 
 
 
242
  return out
243
 
244
+ def predict_one(self, image_bgr):
245
+ return self._infer_tta(image_bgr) if self.use_tta else self._infer_single(image_bgr)
246
+
247
+
248
+ # ============================================================ RFDETR PATH
249
+ class _RFDETRMiner:
250
+ def __init__(self, path_hf_repo: Path):
251
+ sess_options = ort.SessionOptions()
252
+ sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
253
+ try:
254
+ self.session = ort.InferenceSession(
255
+ str(path_hf_repo / "weights_rfdetr.onnx"),
256
+ sess_options=sess_options,
257
+ providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
258
+ )
259
+ except Exception:
260
+ self.session = ort.InferenceSession(
261
+ str(path_hf_repo / "weights_rfdetr.onnx"),
262
+ sess_options=sess_options,
263
+ providers=["CPUExecutionProvider"],
264
+ )
265
+ self.input_name = self.session.get_inputs()[0].name
266
+ self.output_names = [o.name for o in self.session.get_outputs()]
267
+ self.input_h = 1288
268
+ self.input_w = 1288
269
+ self.conf_threshold = 0.52
270
+ self.min_box_area = 196
271
+ self.min_side = 8
272
+ self.max_aspect_ratio = 8.0
273
+
274
+ def predict_one(self, image_bgr):
275
+ oh, ow = image_bgr.shape[:2]
276
+ rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
277
+ resized = cv2.resize(rgb, (self.input_w, self.input_h), interpolation=cv2.INTER_LINEAR)
278
+ x = resized.astype(np.float32) / 255.0
279
+ x = (x - _IMAGENET_MEAN) / _IMAGENET_STD
280
+ x = np.ascontiguousarray(np.transpose(x, (2, 0, 1))[None, ...].astype(np.float32))
281
+ out = self.session.run(self.output_names, {self.input_name: x})[0]
282
+ if out.ndim == 3: out = out[0]
283
+ confs = out[:, 4].astype(np.float32)
284
+ keep = confs >= self.conf_threshold
285
+ if not keep.any(): return []
286
+ out = out[keep]
287
+ boxes = out[:, :4].astype(np.float32).copy()
288
+ confs = out[:, 4].astype(np.float32)
289
+ cls_ids = out[:, 5].astype(np.int32)
290
+ sx = ow / float(self.input_w); sy = oh / float(self.input_h)
291
+ boxes[:, [0, 2]] *= sx; boxes[:, [1, 3]] *= sy
292
+ boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, ow - 1)
293
+ boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, oh - 1)
294
+ out_boxes = []
295
+ for i in range(len(boxes)):
296
+ x1, y1, x2, y2 = boxes[i]
297
+ ix1 = max(0, min(ow, math.floor(x1))); iy1 = max(0, min(oh, math.floor(y1)))
298
+ ix2 = max(0, min(ow, math.ceil(x2))); iy2 = max(0, min(oh, math.ceil(y2)))
299
+ if ix2 <= ix1 or iy2 <= iy1: continue
300
+ bw, bh = ix2 - ix1, iy2 - iy1
301
+ if bw * bh < self.min_box_area: continue
302
+ if min(bw, bh) < self.min_side: continue
303
+ ar = max(bw / max(bh, 1), bh / max(bw, 1))
304
+ if ar > self.max_aspect_ratio: continue
305
+ out_boxes.append(BoundingBox(x1=ix1, y1=iy1, x2=ix2, y2=iy2,
306
+ cls_id=int(cls_ids[i]),
307
+ conf=max(0.0, min(1.0, float(confs[i])))))
308
+ return out_boxes
309
+
310
+
311
+ # ============================================================ ENSEMBLE PUBLIC
312
+ class Miner:
313
+ """Public ensemble miner — chute calls predict_batch(...)."""
314
+
315
+ def __init__(self, path_hf_repo) -> None:
316
+ self.path_hf_repo = Path(path_hf_repo)
317
+ self.class_names = ["balaclava", "hoodie", "glove", "bat", "spray paint", "graffiti"]
318
+ try:
319
+ ort.preload_dlls()
320
+ except Exception:
321
+ pass
322
+ self.alfred = _AlfredMiner(self.path_hf_repo)
323
+ self.rfdetr = _RFDETRMiner(self.path_hf_repo)
324
+ self.alfred_classes = {0, 1, 5}
325
+ self.rfdetr_classes = {0, 1, 2, 3, 4}
326
+ self.merge_iou = 0.5
327
+ # Warmup
328
+ warm = np.zeros((1280, 1280, 3), dtype=np.uint8)
329
+ for _ in range(2):
330
+ try: self.alfred.predict_one(warm)
331
+ except Exception: break
332
+ for _ in range(2):
333
+ try: self.rfdetr.predict_one(warm)
334
+ except Exception: break
335
+
336
+ def __repr__(self):
337
+ return (f"CrimeEnsembleMiner v2 alfred(yolo26n@1280, TTA) + "
338
+ f"rfdetr(base@1288) conf>=0.52 merge_iou={self.merge_iou}")
339
+
340
+ @staticmethod
341
+ def _box_iou(a: BoundingBox, b: BoundingBox) -> float:
342
+ xx1 = max(a.x1, b.x1); yy1 = max(a.y1, b.y1)
343
+ xx2 = min(a.x2, b.x2); yy2 = min(a.y2, b.y2)
344
+ inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
345
+ ai = (a.x2 - a.x1) * (a.y2 - a.y1)
346
+ bi = (b.x2 - b.x1) * (b.y2 - b.y1)
347
+ return inter / (ai + bi - inter + 1e-7)
348
+
349
+ def _merge(self, alfred_boxes: list, rfdetr_boxes: list) -> list:
350
+ """Per-class union: alfred always kept; rfdetr kept ONLY if not overlapping
351
+ an alfred same-class box at IoU >= merge_iou. cls 2/3/4 are rfdetr-only
352
+ (no alfred boxes there); cls 5 is alfred-only (no rfdetr boxes there);
353
+ cls 0/1 see both — alfred priority."""
354
+ kept = list(alfred_boxes)
355
+ for rb in rfdetr_boxes:
356
+ collide = False
357
+ for ab in alfred_boxes:
358
+ if ab.cls_id == rb.cls_id and self._box_iou(ab, rb) >= self.merge_iou:
359
+ collide = True; break
360
+ if not collide:
361
+ kept.append(rb)
362
+ return kept
363
+
364
+ def predict_batch(self, batch_images, offset, n_keypoints):
365
+ results = []
366
  for idx, image in enumerate(batch_images):
367
+ a_all = self.alfred.predict_one(image)
368
+ r_all = self.rfdetr.predict_one(image)
369
+ a_keep = [b for b in a_all if b.cls_id in self.alfred_classes]
370
+ r_keep = [b for b in r_all if b.cls_id in self.rfdetr_classes]
371
+ merged = self._merge(a_keep, r_keep)
372
  results.append(TVFrameResult(
373
  frame_id=offset + idx,
374
+ boxes=merged,
375
  keypoints=[(0, 0) for _ in range(max(0, int(n_keypoints)))],
376
  ))
377
  return results
weights_rfdetr.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee45832e55a37b358946f22213c7c129e085d23302400e039a1f256c53482062
3
+ size 108130685