meaculpitt
/

ScoreVision

@@ -1,5 +1,5 @@
 """
-Score Vision SN44 — Unified miner v3.22.1 (2026-04-06). Hotfix: post-merge confidence floor for person pipeline. Soft-NMS + blur/perspective penalties could decay box conf below PER_CONF_LOW=0.60, causing FP blowout on crowd scenes (block 7907400: 8 FPs, FP_score=0.20). Fix: re-apply PER_CONF_LOW after all penalties.
 Tri-model: vehicle (YOLO11m INT8 1280) + person (YOLO12s FP16 960 TRT) + petrol (end2end 640).
 Pose model: YOLOv8n-pose FP16 640 for false-positive filtering + keypoint box refinement.
 Vehicle weights loaded from secondary HF repo (meaculpitt/ScoreVision-Vehicle).
@@ -211,7 +211,7 @@ VEH_NMS_IOU = 0.50
 # ── Per-class vehicle confidence thresholds (output cls_id) ────────────────
 # Raising from uniform 0.35: reduces FP (avg 4.1 FFPI → target <2.0)
 VEH_CLASS_CONF: dict[int, float] = {
-    1: 0.50,    # car — raised from 0.45, most FP-prone class (75% of training data)
     2: 0.45,    # truck — keep
     3: 0.50,    # motorcycle — raised from 0.45, small targets prone to FP
     0: 0.45,    # bus — keep
@@ -235,7 +235,7 @@ VEH_CLASS_MIN_AREA: dict[int, int] = {
 }
 # ── Vehicle box sanity filters (global fallbacks) ─────────────────────────
-VEH_MIN_WH = 8
 VEH_MIN_AREA = 100
 VEH_MAX_ASPECT = 8.0
 VEH_MAX_AREA_RATIO = 0.95
@@ -265,7 +265,7 @@ VEH_PARTS_WINDOW_MIN_PEAKS = 3   # Min periodic edge peaks for window confirmati
 # Motorcycle rider pose
 VEH_PARTS_RIDER_LEAN_DEG = 15.0  # Min torso lean from vertical (degrees) for rider pose
 # Plate detection thresholds
-VEH_PARTS_PLATE_MIN_PX = 120  # only check plates on medium+ vehicles      # Min vehicle width for plate detection
 VEH_PARTS_PLATE_CONF = 0.35      # Min plate detection confidence
 # ── Person config (TTA consensus) ───────────────────────────────────────────
@@ -298,6 +298,15 @@ PER_MAX_DET = 100               # Loose safety ceiling ONLY — not a count cap.
                                 # FP cases where NMS has already failed. Previous values (10 spec'd, 50 first
                                 # fix) were too tight. See FAILURE_ANALYSIS.md (2026-04-05).
 # ── Frame quality gating (Laplacian variance) ───────────────────────────────
 PER_BLUR_THRESHOLD = 50.0       # Laplacian variance below this = severely blurry
 PER_BLUR_CONF_PENALTY = 0.85    # multiply confs by this for blurry frames (reduce FP)
@@ -593,6 +602,21 @@ class Miner:
         self.veh_h = int(veh_shape[2])
         self.veh_w = int(veh_shape[3])
         # Person model — CUDA immediately, TRT engine builds in background
         per_onnx = str(path_hf_repo / "person_weights.onnx")
         self.per_session = ort.InferenceSession(
@@ -800,32 +824,25 @@ class Miner:
         y2 = np.clip((cy + bh / 2 - pt) / ratio, 0, oh)
         return np.stack([x1, y1, x2, y2], axis=1), confs, cls_ids
-    def _veh_run_pass(self, image_bgr, conf_thresh):
         oh, ow = image_bgr.shape[:2]
         inp, ratio, pl, pt = self._veh_preprocess(image_bgr)
-        raw = self.veh_session.run(None, {self.veh_input_name: inp})[0]
         return self._veh_decode(raw, ratio, pl, pt, ow, oh, conf_thresh)
-    def _infer_vehicle(self, image_bgr):
-        """Vehicle detection: 1280px with optional flip TTA, per-class NMS + conf + aspect filter.
-        Pipeline (v3.22 — flip TTA re-enabled, RTF budget allows it):
-        1. Primary pass at VEH_CONF_THRES
-        2. Optional flip TTA pass at VEH_TTA_CONF (if ENABLE_TTA)
-        3. Remap classes, per-class NMS
-        4. Per-class confidence filter (higher thresholds reduce FP)
-        5. Per-class aspect ratio filter
-        6. All 4 classes scored (v3.20: bus unsuppressed, cls_id=0)
-        """
         oh, ow = image_bgr.shape[:2]
         # Primary pass
-        boxes, confs, cls_ids = self._veh_run_pass(image_bgr, VEH_CONF_THRES)
         # Flip TTA pass — horizontal flip, mirror boxes back
         if ENABLE_TTA:
             flipped = cv2.flip(image_bgr, 1)
-            f_boxes, f_confs, f_cls = self._veh_run_pass(flipped, VEH_TTA_CONF)
             if len(f_boxes) > 0:
                 # Mirror x-coords: x1'=ow-x2, x2'=ow-x1
                 f_boxes[:, 0], f_boxes[:, 2] = ow - f_boxes[:, 2], ow - f_boxes[:, 0]
@@ -912,6 +929,26 @@ class Miner:
             ))
         return out
     # ── Vehicle parts confirmation ───────────────────────────────────────
     @staticmethod
@@ -1871,65 +1908,125 @@ class Miner:
     # ── Person inference with SAHI tiling ────────────────────────────────
     def _infer_person(self, image_bgr):
-        """Person detection with SAHI-inspired tiled inference + dynamic NMS.
-        Pipeline (2-pass, optimized for RTF):
-        1. Full-image pass at native 960px
         2. Flip TTA pass
-        3. Dynamic NMS merge (adapts IoU threshold to scene density)
-        4. Sanity filters + PER_MAX_DET safety ceiling (conf-floor is the real filter)
-        5. Pose FP filter + box refinement (if time allows)
         """
         oh, ow = image_bgr.shape[:2]
         t_start = time.monotonic()
-        # Frame quality gating — detect severely blurry frames
         blur_score = self._frame_blur_score(image_bgr)
         is_blurry = blur_score < PER_BLUR_THRESHOLD
-        # Collect all boxes in original pixel coords
-        all_boxes = []   # list of [N, 4] arrays
-        all_confs = []   # list of [N] arrays
-        # Pass 1: full image at native 960px
-        boxes_full, confs_full = self._per_run_pass(image_bgr, PER_CONF_LOW)
-        if len(boxes_full) > 0:
-            all_boxes.append(boxes_full)
-            all_confs.append(confs_full)
-        elapsed_pass1 = time.monotonic() - t_start
-        # Pass 2: flip TTA (always run — only 2 passes total for RTF safety)
         flipped = cv2.flip(image_bgr, 1)
-        boxes_flip, confs_flip = self._per_run_pass(flipped, PER_CONF_LOW)
         if len(boxes_flip) > 0:
             boxes_flip[:, 0], boxes_flip[:, 2] = (
                 ow - boxes_flip[:, 2], ow - boxes_flip[:, 0])
-            all_boxes.append(boxes_flip)
-            all_confs.append(confs_flip)
-        # Pass 3: CLAHE enhanced pass (low-contrast frames only, time-gated)
-        if time.monotonic() - t_start < PER_RTF_BUDGET * 0.5:
-            enhanced = self._per_enhance(image_bgr)
-            if enhanced is not image_bgr:  # CLAHE was applied (low contrast)
-                boxes_enh, confs_enh = self._per_run_pass(enhanced, PER_CONF_LOW)
-                if len(boxes_enh) > 0:
-                    all_boxes.append(boxes_enh)
-                    all_confs.append(confs_enh)
-        if not all_boxes:
             return []
-        # Dynamic NMS: adapt IoU threshold to scene density
-        merged_b = np.concatenate(all_boxes)
-        merged_s = np.concatenate(all_confs)
-        n_raw = len(merged_s)
-        nms_iou = 0.60 if n_raw > 30 else (0.40 if n_raw < 10 else PER_NMS_IOU)
-        merged_b, merged_s = self._nms_max_conf(merged_b, merged_s, nms_iou)
-        # Safety ceiling (not a count cap). PER_CONF_LOW=0.60 is the real filter.
-        # This only activates on pathological runaway-FP cases (>50 boxes post-NMS).
         if len(merged_s) > PER_MAX_DET:
             top_idx = np.argsort(merged_s)[-PER_MAX_DET:]
             merged_b = merged_b[top_idx]
@@ -1938,17 +2035,17 @@ class Miner:
         if len(merged_b) == 0:
             return []
-        # Blur confidence penalty — reduce FP on severely blurry frames
         if is_blurry:
             merged_s = merged_s * PER_BLUR_CONF_PENALTY
-        # Perspective scaling penalty — reduce conf for size-anomalous detections
         merged_s = self._perspective_penalty(merged_b, merged_s, oh)
-        # Post-merge confidence floor — re-apply after soft-NMS decay + penalties
-        keep = merged_s >= PER_CONF_LOW
-        merged_b = merged_b[keep]
-        merged_s = merged_s[keep]
         # Sanity filters
         img_area = float(oh * ow)
@@ -2021,10 +2118,11 @@ class Miner:
             return self._infer_person(image_bgr)
         if element_hint == 'vehicle':
-            # Skip _vehicle_parts_confirm — it needs person_boxes for driver/rider
-            # confirmation which aren't available in vehicle-only mode. Without person
-            # detections, large vehicles with conf < 0.55 get falsely suppressed.
-            return self._infer_vehicle(image_bgr)
         if element_hint == 'petrol' and self.petrol_session:
             return self._infer_petrol(image_bgr)

 """
+Score Vision SN44 — Unified miner v3.23 (2026-04-06). TTA consensus (person), FP32 fallback (vehicle), parts_confirm on vehicle challenges, car conf 0.60, VEH_MIN_WH=20. Person: DMSC19-inspired graduated consensus replaces soft-NMS (both=0.50, orig=0.60, flip=0.75). Vehicle: FP32 retry on ≤1 box, parts_confirm with empty person_boxes, plate 80px, car 0.60.
 Tri-model: vehicle (YOLO11m INT8 1280) + person (YOLO12s FP16 960 TRT) + petrol (end2end 640).
 Pose model: YOLOv8n-pose FP16 640 for false-positive filtering + keypoint box refinement.
 Vehicle weights loaded from secondary HF repo (meaculpitt/ScoreVision-Vehicle).
 # ── Per-class vehicle confidence thresholds (output cls_id) ────────────────
 # Raising from uniform 0.35: reduces FP (avg 4.1 FFPI → target <2.0)
 VEH_CLASS_CONF: dict[int, float] = {
+    1: 0.60,    # car — raised from 0.50, most FP-prone class (75% of training data)
     2: 0.45,    # truck — keep
     3: 0.50,    # motorcycle — raised from 0.45, small targets prone to FP
     0: 0.45,    # bus — keep
 }
 # ── Vehicle box sanity filters (global fallbacks) ─────────────────────────
+VEH_MIN_WH = 20  # was 8. Kills tiny horizon artifacts (confirmed: h<25 extras on block 7900800)
 VEH_MIN_AREA = 100
 VEH_MAX_ASPECT = 8.0
 VEH_MAX_AREA_RATIO = 0.95
 # Motorcycle rider pose
 VEH_PARTS_RIDER_LEAN_DEG = 15.0  # Min torso lean from vertical (degrees) for rider pose
 # Plate detection thresholds
+VEH_PARTS_PLATE_MIN_PX = 80   # plates visible at ~80px vehicle width (was 120)
 VEH_PARTS_PLATE_CONF = 0.35      # Min plate detection confidence
 # ── Person config (TTA consensus) ───────────────────────────────────────────
                                 # FP cases where NMS has already failed. Previous values (10 spec'd, 50 first
                                 # fix) were too tight. See FAILURE_ANALYSIS.md (2026-04-05).
+# ── TTA consensus thresholds (DMSC19-inspired graduated approach) ────────────
+# Cross-view confirmation eliminates the soft-NMS confidence decay bug.
+# Instead of concatenate+soft-NMS (which decayed confs below floor), we match
+# boxes across original+flip views and apply graduated confidence thresholds.
+PER_TTA_MATCH_IOU = 0.50        # IoU threshold for cross-view box matching
+PER_TTA_CONF_BOTH = 0.50        # Confirmed by both views: lower threshold (high confidence)
+PER_TTA_CONF_ORIG = 0.60        # Original-only: standard threshold (PER_CONF_LOW)
+PER_TTA_CONF_FLIP = 0.75        # Flip-only: strict (flip-only detections are likely FP)
 # ── Frame quality gating (Laplacian variance) ───────────────────────────────
 PER_BLUR_THRESHOLD = 50.0       # Laplacian variance below this = severely blurry
 PER_BLUR_CONF_PENALTY = 0.85    # multiply confs by this for blurry frames (reduce FP)
         self.veh_h = int(veh_shape[2])
         self.veh_w = int(veh_shape[3])
+        # FP32 fallback session for INT8 degradation recovery (block 7905900: 1-box failure)
+        self.veh_session_fp32 = None
+        try:
+            veh_fp32 = str(veh_path / "vehicle_weights_fp32.onnx") if veh_path else None
+            if veh_fp32 and Path(veh_fp32).exists():
+                self.veh_session_fp32 = ort.InferenceSession(
+                    veh_fp32,
+                    providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
+                )
+                logger.info("[init] Vehicle FP32 fallback model loaded")
+            else:
+                logger.info("[init] Vehicle FP32 fallback not available")
+        except Exception as e:
+            logger.warning(f"[init] Vehicle FP32 fallback failed: {e}")
         # Person model — CUDA immediately, TRT engine builds in background
         per_onnx = str(path_hf_repo / "person_weights.onnx")
         self.per_session = ort.InferenceSession(
         y2 = np.clip((cy + bh / 2 - pt) / ratio, 0, oh)
         return np.stack([x1, y1, x2, y2], axis=1), confs, cls_ids
+    def _veh_run_pass(self, image_bgr, conf_thresh, session=None):
+        if session is None:
+            session = self.veh_session
         oh, ow = image_bgr.shape[:2]
         inp, ratio, pl, pt = self._veh_preprocess(image_bgr)
+        raw = session.run(None, {self.veh_input_name: inp})[0]
         return self._veh_decode(raw, ratio, pl, pt, ow, oh, conf_thresh)
+    def _infer_vehicle_core(self, image_bgr, session=None):
+        """Core vehicle detection pipeline. session param allows FP32 fallback."""
         oh, ow = image_bgr.shape[:2]
         # Primary pass
+        boxes, confs, cls_ids = self._veh_run_pass(image_bgr, VEH_CONF_THRES, session)
         # Flip TTA pass — horizontal flip, mirror boxes back
         if ENABLE_TTA:
             flipped = cv2.flip(image_bgr, 1)
+            f_boxes, f_confs, f_cls = self._veh_run_pass(flipped, VEH_TTA_CONF, session)
             if len(f_boxes) > 0:
                 # Mirror x-coords: x1'=ow-x2, x2'=ow-x1
                 f_boxes[:, 0], f_boxes[:, 2] = ow - f_boxes[:, 2], ow - f_boxes[:, 0]
             ))
         return out
+    def _infer_vehicle(self, image_bgr):
+        """Vehicle detection with FP32 fallback on degraded INT8 output.
+        Runs INT8 model first. If it returns ≤1 box (likely INT8 degradation,
+        see block 7905900), retries with FP32 model and uses whichever
+        produced more detections.
+        """
+        boxes = self._infer_vehicle_core(image_bgr, self.veh_session)
+        if len(boxes) <= 1 and self.veh_session_fp32:
+            boxes_fp32 = self._infer_vehicle_core(image_bgr, self.veh_session_fp32)
+            if len(boxes_fp32) > len(boxes):
+                logger.warning(
+                    f"[vehicle] INT8 degraded ({len(boxes)} boxes), "
+                    f"FP32 fallback recovered ({len(boxes_fp32)} boxes)"
+                )
+                return boxes_fp32
+        return boxes
     # ── Vehicle parts confirmation ───────────────────────────────────────
     @staticmethod
     # ── Person inference with SAHI tiling ────────────────────────────────
+    @staticmethod
+    def _match_boxes_iou(boxes_a, boxes_b, iou_thr):
+        """Match boxes from two sets by IoU. Returns (matched_pairs, unmatched_a, unmatched_b).
+        matched_pairs: list of (idx_a, idx_b, iou) tuples
+        unmatched_a: list of indices in boxes_a with no match
+        unmatched_b: list of indices in boxes_b with no match
+        """
+        if len(boxes_a) == 0:
+            return [], [], list(range(len(boxes_b)))
+        if len(boxes_b) == 0:
+            return [], list(range(len(boxes_a))), []
+        matched_pairs = []
+        used_b = set()
+        for i in range(len(boxes_a)):
+            best_iou = 0
+            best_j = -1
+            for j in range(len(boxes_b)):
+                if j in used_b:
+                    continue
+                xx1 = max(boxes_a[i, 0], boxes_b[j, 0])
+                yy1 = max(boxes_a[i, 1], boxes_b[j, 1])
+                xx2 = min(boxes_a[i, 2], boxes_b[j, 2])
+                yy2 = min(boxes_a[i, 3], boxes_b[j, 3])
+                inter = max(0.0, xx2 - xx1) * max(0.0, yy2 - yy1)
+                a1 = (boxes_a[i, 2] - boxes_a[i, 0]) * (boxes_a[i, 3] - boxes_a[i, 1])
+                a2 = (boxes_b[j, 2] - boxes_b[j, 0]) * (boxes_b[j, 3] - boxes_b[j, 1])
+                iou = inter / (a1 + a2 - inter + 1e-9)
+                if iou > best_iou:
+                    best_iou = iou
+                    best_j = j
+            if best_iou >= iou_thr:
+                matched_pairs.append((i, best_j, best_iou))
+                used_b.add(best_j)
+        matched_a = {p[0] for p in matched_pairs}
+        unmatched_a = [i for i in range(len(boxes_a)) if i not in matched_a]
+        unmatched_b = [j for j in range(len(boxes_b)) if j not in used_b]
+        return matched_pairs, unmatched_a, unmatched_b
     def _infer_person(self, image_bgr):
+        """Person detection with TTA consensus merging.
+        Pipeline (v3.23 — replaces concatenate+soft-NMS with consensus merging):
+        1. Original pass at native 960px
         2. Flip TTA pass
+        3. Match boxes across views (IoU >= PER_TTA_MATCH_IOU)
+        4. Graduated confidence thresholds:
+           - Confirmed by both views: keep at PER_TTA_CONF_BOTH (0.50)
+           - Original-only: keep at PER_TTA_CONF_ORIG (0.60)
+           - Flip-only: keep at PER_TTA_CONF_FLIP (0.75)
+        5. Hard NMS on merged result
+        6. Sanity filters + safety ceiling
+        7. Pose FP filter + box refinement (if time allows)
         """
         oh, ow = image_bgr.shape[:2]
         t_start = time.monotonic()
+        # Frame quality gating
         blur_score = self._frame_blur_score(image_bgr)
         is_blurry = blur_score < PER_BLUR_THRESHOLD
+        # Pass 1: original image
+        boxes_orig, confs_orig = self._per_run_pass(image_bgr, PER_TTA_CONF_BOTH)
+        # Pass 2: horizontal flip
         flipped = cv2.flip(image_bgr, 1)
+        boxes_flip, confs_flip = self._per_run_pass(flipped, PER_TTA_CONF_BOTH)
         if len(boxes_flip) > 0:
             boxes_flip[:, 0], boxes_flip[:, 2] = (
                 ow - boxes_flip[:, 2], ow - boxes_flip[:, 0])
+        if len(boxes_orig) == 0 and len(boxes_flip) == 0:
             return []
+        # TTA consensus: match boxes across views
+        matched, unmatched_o, unmatched_f = self._match_boxes_iou(
+            boxes_orig, boxes_flip, PER_TTA_MATCH_IOU)
+        # Build merged result with graduated thresholds
+        merged_b = []
+        merged_s = []
+        # Confirmed by both views: keep original box, use max confidence, threshold=0.50
+        for i_o, i_f, iou in matched:
+            conf = max(float(confs_orig[i_o]), float(confs_flip[i_f]))
+            if conf >= PER_TTA_CONF_BOTH:
+                merged_b.append(boxes_orig[i_o])
+                merged_s.append(conf)
+        # Original-only: need higher confidence (0.60)
+        for i_o in unmatched_o:
+            if confs_orig[i_o] >= PER_TTA_CONF_ORIG:
+                merged_b.append(boxes_orig[i_o])
+                merged_s.append(float(confs_orig[i_o]))
+        # Flip-only: strict threshold (0.75) — flip-only detections are likely FP
+        for i_f in unmatched_f:
+            if confs_flip[i_f] >= PER_TTA_CONF_FLIP:
+                merged_b.append(boxes_flip[i_f])
+                merged_s.append(float(confs_flip[i_f]))
+        if not merged_b:
+            return []
+        merged_b = np.array(merged_b)
+        merged_s = np.array(merged_s)
+        # Hard NMS on merged result (no soft-NMS — no confidence decay)
+        keep = _nms_per_class_boost(
+            merged_b, merged_s,
+            np.zeros(len(merged_s), dtype=int),  # single class
+            iou_thr=PER_NMS_IOU)
+        merged_b, merged_s = keep[0], keep[1]
+        # Safety ceiling
         if len(merged_s) > PER_MAX_DET:
             top_idx = np.argsort(merged_s)[-PER_MAX_DET:]
             merged_b = merged_b[top_idx]
         if len(merged_b) == 0:
             return []
+        # Blur confidence penalty
         if is_blurry:
             merged_s = merged_s * PER_BLUR_CONF_PENALTY
+        # Perspective scaling penalty
         merged_s = self._perspective_penalty(merged_b, merged_s, oh)
+        # Final confidence floor (catches blur/perspective decay edge cases)
+        keep_mask = merged_s >= PER_TTA_CONF_BOTH
+        merged_b = merged_b[keep_mask]
+        merged_s = merged_s[keep_mask]
         # Sanity filters
         img_area = float(oh * ow)
             return self._infer_person(image_bgr)
         if element_hint == 'vehicle':
+            # Run vehicle detection + parts confirmation with empty person_boxes.
+            # Plate/headlight/window checks fire normally; driver/rider overlap
+            # check finds no matches (boost=0) but doesn't suppress.
+            vehicle_boxes = self._infer_vehicle(image_bgr)
+            return self._vehicle_parts_confirm(vehicle_boxes, [], image_bgr)
         if element_hint == 'petrol' and self.petrol_session:
             return self._infer_petrol(image_bgr)