SuperBitDev commited on
Commit
b2ceac9
·
verified ·
1 Parent(s): 5ff9e11

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. miner.py +90 -15
miner.py CHANGED
@@ -18,7 +18,7 @@ class BoundingBox(BaseModel):
18
 
19
 
20
  class TVFrameResult(BaseModel):
21
- frame_id: int
22
  boxes: list[BoundingBox]
23
  keypoints: list[tuple[int, int]]
24
 
@@ -71,11 +71,18 @@ class Miner:
71
  self.input_height = self._safe_dim(self.input_shape[2], default=1280)
72
  self.input_width = self._safe_dim(self.input_shape[3], default=1280)
73
 
74
- self.conf_thres = 0.0
75
- self.iou_thres = 0.6
76
- self.max_det = 300
 
 
77
  self.use_tta = True
78
 
 
 
 
 
 
79
  print(f"✅ ONNX model loaded from: {model_path}")
80
  print(f"✅ ONNX providers: {self.session.get_providers()}")
81
  print(f"✅ ONNX input: name={self.input_name}, shape={self.input_shape}")
@@ -264,6 +271,45 @@ class Miner:
264
  suppressed[jdx] = True
265
  return np.array(keep)
266
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  @staticmethod
268
  def _max_score_per_cluster(
269
  coords: np.ndarray,
@@ -336,10 +382,25 @@ class Miner:
336
  boxes /= ratio
337
  boxes = self._clip_boxes(boxes, (orig_w, orig_h))
338
 
339
- if apply_optional_dedup and len(boxes) > 1:
340
- keep_idx, scores = self._soft_nms(boxes, scores)
341
- boxes = boxes[keep_idx]
342
- cls_ids = cls_ids[keep_idx]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
 
344
  results: list[BoundingBox] = []
345
  for box, conf, cls_id in zip(boxes, scores, cls_ids):
@@ -408,11 +469,11 @@ class Miner:
408
  return []
409
 
410
  boxes = self._xywh_to_xyxy(boxes_xywh)
411
- keep_idx, scores = self._soft_nms(boxes, scores)
412
- keep_idx = keep_idx[: self.max_det]
413
- scores = scores[: self.max_det]
414
 
 
 
415
  boxes = boxes[keep_idx]
 
416
  cls_ids = cls_ids[keep_idx]
417
 
418
  pad_w, pad_h = pad
@@ -423,6 +484,12 @@ class Miner:
423
  boxes /= ratio
424
  boxes = self._clip_boxes(boxes, (orig_w, orig_h))
425
 
 
 
 
 
 
 
426
  results: list[BoundingBox] = []
427
  for box, conf, cls_id in zip(boxes, scores, cls_ids):
428
  x1, y1, x2, y2 = box.tolist()
@@ -493,7 +560,11 @@ class Miner:
493
  return self._postprocess(det_output, ratio, pad, orig_size)
494
 
495
  def _predict_tta(self, image: np.ndarray) -> list[BoundingBox]:
496
- """Horizontal-flip TTA: merge original + flipped via hard NMS."""
 
 
 
 
497
  boxes_orig = self._predict_single(image)
498
 
499
  flipped = cv2.flip(image, 1)
@@ -521,9 +592,13 @@ class Miner:
521
  if len(hard_keep) == 0:
522
  return []
523
 
524
- # _hard_nms already orders kept indices by descending score.
525
  hard_keep = hard_keep[: self.max_det]
526
 
 
 
 
 
 
527
  return [
528
  BoundingBox(
529
  x1=all_boxes[i].x1,
@@ -531,9 +606,9 @@ class Miner:
531
  x2=all_boxes[i].x2,
532
  y2=all_boxes[i].y2,
533
  cls_id=all_boxes[i].cls_id,
534
- conf=float(scores[i]),
535
  )
536
- for i in hard_keep
537
  ]
538
 
539
  def predict_batch(
 
18
 
19
 
20
  class TVFrameResult(BaseModel):
21
+ frame_id: int12
22
  boxes: list[BoundingBox]
23
  keypoints: list[tuple[int, int]]
24
 
 
71
  self.input_height = self._safe_dim(self.input_shape[2], default=1280)
72
  self.input_width = self._safe_dim(self.input_shape[3], default=1280)
73
 
74
+ # Tuned for validator scoring: reduce FP (FALSE_POSITIVE pillar),
75
+ # preserve recall (MAP50, RECALL), improve precision.
76
+ self.conf_thres = 0.2 # Higher = fewer FP, slightly lower recall
77
+ self.iou_thres = 0.5 # Lower = suppress duplicate detections (FP)
78
+ self.max_det = 100 # Cap detections; sports ~20-30 persons
79
  self.use_tta = True
80
 
81
+ # Box sanity: filter tiny/spurious detections (common FP source)
82
+ self.min_box_area = 12 * 12 # ~144 px²
83
+ self.min_side = 8
84
+ self.max_aspect_ratio = 8.0
85
+
86
  print(f"✅ ONNX model loaded from: {model_path}")
87
  print(f"✅ ONNX providers: {self.session.get_providers()}")
88
  print(f"✅ ONNX input: name={self.input_name}, shape={self.input_shape}")
 
271
  suppressed[jdx] = True
272
  return np.array(keep)
273
 
274
+ def _filter_sane_boxes(
275
+ self,
276
+ boxes: np.ndarray,
277
+ scores: np.ndarray,
278
+ cls_ids: np.ndarray,
279
+ orig_size: tuple[int, int],
280
+ ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
281
+ """Filter out tiny, degenerate, or implausible boxes (common FP)."""
282
+ if len(boxes) == 0:
283
+ return boxes, scores, cls_ids
284
+ orig_w, orig_h = orig_size
285
+ image_area = float(orig_w * orig_h)
286
+ keep = []
287
+ for i, box in enumerate(boxes):
288
+ x1, y1, x2, y2 = box.tolist()
289
+ bw = x2 - x1
290
+ bh = y2 - y1
291
+ if bw <= 0 or bh <= 0:
292
+ continue
293
+ if bw < self.min_side or bh < self.min_side:
294
+ continue
295
+ area = bw * bh
296
+ if area < self.min_box_area:
297
+ continue
298
+ if area > 0.95 * image_area:
299
+ continue
300
+ ar = max(bw / max(bh, 1e-6), bh / max(bw, 1e-6))
301
+ if ar > self.max_aspect_ratio:
302
+ continue
303
+ keep.append(i)
304
+ if not keep:
305
+ return (
306
+ np.empty((0, 4), dtype=np.float32),
307
+ np.empty((0,), dtype=np.float32),
308
+ np.empty((0,), dtype=np.int32),
309
+ )
310
+ k = np.array(keep, dtype=np.intp)
311
+ return boxes[k], scores[k], cls_ids[k]
312
+
313
  @staticmethod
314
  def _max_score_per_cluster(
315
  coords: np.ndarray,
 
382
  boxes /= ratio
383
  boxes = self._clip_boxes(boxes, (orig_w, orig_h))
384
 
385
+ # Box sanity filter (reduces FP)
386
+ boxes, scores, cls_ids = self._filter_sane_boxes(
387
+ boxes, scores, cls_ids, orig_size
388
+ )
389
+ if len(boxes) == 0:
390
+ return []
391
+
392
+ # NMS to remove duplicates (model may output overlapping boxes)
393
+ if len(boxes) > 1:
394
+ if apply_optional_dedup:
395
+ keep_idx, scores = self._soft_nms(boxes, scores)
396
+ boxes = boxes[keep_idx]
397
+ cls_ids = cls_ids[keep_idx]
398
+ else:
399
+ keep_idx = self._hard_nms(boxes, scores, self.iou_thres)
400
+ keep_idx = keep_idx[: self.max_det]
401
+ boxes = boxes[keep_idx]
402
+ scores = scores[keep_idx]
403
+ cls_ids = cls_ids[keep_idx]
404
 
405
  results: list[BoundingBox] = []
406
  for box, conf, cls_id in zip(boxes, scores, cls_ids):
 
469
  return []
470
 
471
  boxes = self._xywh_to_xyxy(boxes_xywh)
 
 
 
472
 
473
+ keep_idx = self._hard_nms(boxes, scores, self.iou_thres)
474
+ keep_idx = keep_idx[: self.max_det]
475
  boxes = boxes[keep_idx]
476
+ scores = scores[keep_idx]
477
  cls_ids = cls_ids[keep_idx]
478
 
479
  pad_w, pad_h = pad
 
484
  boxes /= ratio
485
  boxes = self._clip_boxes(boxes, (orig_w, orig_h))
486
 
487
+ boxes, scores, cls_ids = self._filter_sane_boxes(
488
+ boxes, scores, cls_ids, (orig_w, orig_h)
489
+ )
490
+ if len(boxes) == 0:
491
+ return []
492
+
493
  results: list[BoundingBox] = []
494
  for box, conf, cls_id in zip(boxes, scores, cls_ids):
495
  x1, y1, x2, y2 = box.tolist()
 
560
  return self._postprocess(det_output, ratio, pad, orig_size)
561
 
562
  def _predict_tta(self, image: np.ndarray) -> list[BoundingBox]:
563
+ """
564
+ Horizontal-flip TTA: merge original + flipped via hard NMS.
565
+ Boost confidence for consensus detections (both views agree) to improve
566
+ mAP: validator sorts by confidence, so higher conf for TP helps PR curve.
567
+ """
568
  boxes_orig = self._predict_single(image)
569
 
570
  flipped = cv2.flip(image, 1)
 
592
  if len(hard_keep) == 0:
593
  return []
594
 
 
595
  hard_keep = hard_keep[: self.max_det]
596
 
597
+ # Boost confidence when both views agree (overlapping detections)
598
+ boosted = self._max_score_per_cluster(
599
+ coords, scores, hard_keep, self.iou_thres
600
+ )
601
+
602
  return [
603
  BoundingBox(
604
  x1=all_boxes[i].x1,
 
606
  x2=all_boxes[i].x2,
607
  y2=all_boxes[i].y2,
608
  cls_id=all_boxes[i].cls_id,
609
+ conf=float(boosted[j]),
610
  )
611
+ for j, i in enumerate(hard_keep)
612
  ]
613
 
614
  def predict_batch(