meaculpitt commited on
Commit
c0918f0
Β·
verified Β·
1 Parent(s): 2461028

scorevision: push artifact

Browse files
Files changed (1) hide show
  1. miner.py +24 -39
miner.py CHANGED
@@ -189,7 +189,7 @@ VEH_NMS_IOU = 0.50
189
  VEH_CLASS_CONF: dict[int, float] = {
190
  1: 0.45, # car β€” most FP-prone class (75% of training data, overconfident)
191
  2: 0.45, # truck β€” moderate raise
192
- 3: 0.35, # motorcycle β€” keep lower (small targets, easy to miss)
193
  4: 1.0, # bus β€” effectively suppressed (not scored anyway)
194
  }
195
 
@@ -215,7 +215,7 @@ VEH_MIN_WH = 8
215
  VEH_MIN_AREA = 100
216
  VEH_MAX_ASPECT = 8.0
217
  VEH_MAX_AREA_RATIO = 0.95
218
- VEH_MAX_DET = 150
219
 
220
  # ── Vehicle parts confirmation config ────────────────────────────────────
221
  # Cross-validates vehicle detections using person detections, OpenCV analysis,
@@ -245,7 +245,7 @@ VEH_PARTS_PLATE_MIN_PX = 120 # only check plates on medium+ vehicles # Min
245
  VEH_PARTS_PLATE_CONF = 0.35 # Min plate detection confidence
246
 
247
  # ── Person config (TTA consensus) ───────────────────────────────────────────
248
- PER_CONF_LOW = 0.45
249
  PER_CONF_HIGH = 0.58
250
  PER_CONSENSUS_IOU = 0.50
251
  PER_RTF_BUDGET = 8.0
@@ -259,8 +259,9 @@ PER_MAX_AREA_RATIO = 0.80
259
  # ── Person tiling config (SAHI-inspired) ────────────────────────────────────
260
  PER_TILE_OVERLAP = 0.20 # 20% overlap between tiles
261
  PER_TILE_MIN_DIM_RATIO = 1.15 # tile when image dim > model_dim * this (~1104px for 960 model)
262
- PER_TILE_CONF = 0.40 # lower threshold for tile passes (NMS handles FP)
263
  PER_NMS_IOU = 0.50 # NMS IoU for merging across passes (max-conf wins)
 
264
 
265
  # ── Pose FP filter + box refinement config ──────────────────────────────────
266
  POSE_CONF_THRESH = 0.25 # Minimum confidence for pose detection
@@ -1703,13 +1704,12 @@ class Miner:
1703
  def _infer_person(self, image_bgr):
1704
  """Person detection with SAHI-inspired tiled inference + dynamic NMS.
1705
 
1706
- Pipeline:
1707
  1. Full-image pass at native 960px
1708
- 2. 2 tiled passes (higher effective resolution for small/distant people)
1709
- 3. Flip TTA pass if time budget allows
1710
- 4. Dynamic NMS merge (adapts IoU threshold to scene density)
1711
- 5. Sanity filters
1712
- 6. Pose FP filter + box refinement
1713
  """
1714
  oh, ow = image_bgr.shape[:2]
1715
  t_start = time.monotonic()
@@ -1726,35 +1726,14 @@ class Miner:
1726
 
1727
  elapsed_pass1 = time.monotonic() - t_start
1728
 
1729
- # Pass 2-3: tiled passes
1730
- tiles = self._generate_tiles(oh, ow)
1731
- if len(tiles) > 1 and elapsed_pass1 < PER_RTF_BUDGET / 4:
1732
- for tile_region in tiles[1:]:
1733
- if time.monotonic() - t_start > PER_RTF_BUDGET * 0.6:
1734
- break
1735
- boxes_t, confs_t = self._per_run_tile(
1736
- image_bgr, tile_region, PER_TILE_CONF)
1737
- if len(boxes_t) > 0:
1738
- all_boxes.append(boxes_t)
1739
- all_confs.append(confs_t)
1740
-
1741
- # Pass 4: flip TTA if time allows
1742
- if time.monotonic() - t_start < PER_RTF_BUDGET / 4:
1743
- flipped = cv2.flip(image_bgr, 1)
1744
- boxes_flip, confs_flip = self._per_run_pass(flipped, PER_CONF_LOW)
1745
- if len(boxes_flip) > 0:
1746
- boxes_flip[:, 0], boxes_flip[:, 2] = (
1747
- ow - boxes_flip[:, 2], ow - boxes_flip[:, 0])
1748
- all_boxes.append(boxes_flip)
1749
- all_confs.append(confs_flip)
1750
-
1751
- # Pass 5: CLAHE enhanced preprocessing pass
1752
- if time.monotonic() - t_start < PER_RTF_BUDGET / 4:
1753
- enhanced = self._per_enhance(image_bgr)
1754
- boxes_enh, confs_enh = self._per_run_pass(enhanced, PER_CONF_LOW)
1755
- if len(boxes_enh) > 0:
1756
- all_boxes.append(boxes_enh)
1757
- all_confs.append(confs_enh)
1758
 
1759
  if not all_boxes:
1760
  return []
@@ -1766,6 +1745,12 @@ class Miner:
1766
  nms_iou = 0.60 if n_raw > 30 else (0.40 if n_raw < 10 else PER_NMS_IOU)
1767
  merged_b, merged_s = self._nms_max_conf(merged_b, merged_s, nms_iou)
1768
 
 
 
 
 
 
 
1769
  if len(merged_b) == 0:
1770
  return []
1771
 
 
189
  VEH_CLASS_CONF: dict[int, float] = {
190
  1: 0.45, # car β€” most FP-prone class (75% of training data, overconfident)
191
  2: 0.45, # truck β€” moderate raise
192
+ 3: 0.45, # motorcycle β€” raised from 0.35 to reduce FP (small targets, easy to miss)
193
  4: 1.0, # bus β€” effectively suppressed (not scored anyway)
194
  }
195
 
 
215
  VEH_MIN_AREA = 100
216
  VEH_MAX_ASPECT = 8.0
217
  VEH_MAX_AREA_RATIO = 0.95
218
+ VEH_MAX_DET = 40
219
 
220
  # ── Vehicle parts confirmation config ────────────────────────────────────
221
  # Cross-validates vehicle detections using person detections, OpenCV analysis,
 
245
  VEH_PARTS_PLATE_CONF = 0.35 # Min plate detection confidence
246
 
247
  # ── Person config (TTA consensus) ───────────────────────────────────────────
248
+ PER_CONF_LOW = 0.55
249
  PER_CONF_HIGH = 0.58
250
  PER_CONSENSUS_IOU = 0.50
251
  PER_RTF_BUDGET = 8.0
 
259
  # ── Person tiling config (SAHI-inspired) ────────────────────────────────────
260
  PER_TILE_OVERLAP = 0.20 # 20% overlap between tiles
261
  PER_TILE_MIN_DIM_RATIO = 1.15 # tile when image dim > model_dim * this (~1104px for 960 model)
262
+ PER_TILE_CONF = 0.55 # raised from 0.40 to match PER_CONF_LOW
263
  PER_NMS_IOU = 0.50 # NMS IoU for merging across passes (max-conf wins)
264
+ PER_MAX_DET = 15 # hard cap on person detections per image
265
 
266
  # ── Pose FP filter + box refinement config ──────────────────────────────────
267
  POSE_CONF_THRESH = 0.25 # Minimum confidence for pose detection
 
1704
  def _infer_person(self, image_bgr):
1705
  """Person detection with SAHI-inspired tiled inference + dynamic NMS.
1706
 
1707
+ Pipeline (2-pass, optimized for RTF):
1708
  1. Full-image pass at native 960px
1709
+ 2. Flip TTA pass
1710
+ 3. Dynamic NMS merge (adapts IoU threshold to scene density)
1711
+ 4. Sanity filters + PER_MAX_DET cap
1712
+ 5. Pose FP filter + box refinement (if time allows)
 
1713
  """
1714
  oh, ow = image_bgr.shape[:2]
1715
  t_start = time.monotonic()
 
1726
 
1727
  elapsed_pass1 = time.monotonic() - t_start
1728
 
1729
+ # Pass 2: flip TTA (always run β€” only 2 passes total for RTF safety)
1730
+ flipped = cv2.flip(image_bgr, 1)
1731
+ boxes_flip, confs_flip = self._per_run_pass(flipped, PER_CONF_LOW)
1732
+ if len(boxes_flip) > 0:
1733
+ boxes_flip[:, 0], boxes_flip[:, 2] = (
1734
+ ow - boxes_flip[:, 2], ow - boxes_flip[:, 0])
1735
+ all_boxes.append(boxes_flip)
1736
+ all_confs.append(confs_flip)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1737
 
1738
  if not all_boxes:
1739
  return []
 
1745
  nms_iou = 0.60 if n_raw > 30 else (0.40 if n_raw < 10 else PER_NMS_IOU)
1746
  merged_b, merged_s = self._nms_max_conf(merged_b, merged_s, nms_iou)
1747
 
1748
+ # Hard cap on max detections (FP protection)
1749
+ if len(merged_s) > PER_MAX_DET:
1750
+ top_idx = np.argsort(merged_s)[-PER_MAX_DET:]
1751
+ merged_b = merged_b[top_idx]
1752
+ merged_s = merged_s[top_idx]
1753
+
1754
  if len(merged_b) == 0:
1755
  return []
1756