scorevision: push artifact
Browse files
miner.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
"""
|
| 2 |
-
Score Vision SN44 β Unified miner v3.
|
| 3 |
Tri-model: vehicle (YOLO11m INT8 1280) + person (YOLO12s FP16 960 TRT) + petrol (end2end 640).
|
| 4 |
Pose model: YOLOv8n-pose FP16 640 for false-positive filtering + keypoint box refinement.
|
| 5 |
Vehicle weights loaded from secondary HF repo (meaculpitt/ScoreVision-Vehicle).
|
|
@@ -211,7 +211,7 @@ VEH_NMS_IOU = 0.50
|
|
| 211 |
# ββ Per-class vehicle confidence thresholds (output cls_id) ββββββββββββββββ
|
| 212 |
# Raising from uniform 0.35: reduces FP (avg 4.1 FFPI β target <2.0)
|
| 213 |
VEH_CLASS_CONF: dict[int, float] = {
|
| 214 |
-
1: 0.
|
| 215 |
2: 0.45, # truck β keep
|
| 216 |
3: 0.50, # motorcycle β raised from 0.45, small targets prone to FP
|
| 217 |
0: 0.45, # bus β keep
|
|
@@ -235,7 +235,7 @@ VEH_CLASS_MIN_AREA: dict[int, int] = {
|
|
| 235 |
}
|
| 236 |
|
| 237 |
# ββ Vehicle box sanity filters (global fallbacks) βββββββββββββββββββββββββ
|
| 238 |
-
VEH_MIN_WH = 8
|
| 239 |
VEH_MIN_AREA = 100
|
| 240 |
VEH_MAX_ASPECT = 8.0
|
| 241 |
VEH_MAX_AREA_RATIO = 0.95
|
|
@@ -265,7 +265,7 @@ VEH_PARTS_WINDOW_MIN_PEAKS = 3 # Min periodic edge peaks for window confirmati
|
|
| 265 |
# Motorcycle rider pose
|
| 266 |
VEH_PARTS_RIDER_LEAN_DEG = 15.0 # Min torso lean from vertical (degrees) for rider pose
|
| 267 |
# Plate detection thresholds
|
| 268 |
-
VEH_PARTS_PLATE_MIN_PX =
|
| 269 |
VEH_PARTS_PLATE_CONF = 0.35 # Min plate detection confidence
|
| 270 |
|
| 271 |
# ββ Person config (TTA consensus) βββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -298,6 +298,15 @@ PER_MAX_DET = 100 # Loose safety ceiling ONLY β not a count cap.
|
|
| 298 |
# FP cases where NMS has already failed. Previous values (10 spec'd, 50 first
|
| 299 |
# fix) were too tight. See FAILURE_ANALYSIS.md (2026-04-05).
|
| 300 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
# ββ Frame quality gating (Laplacian variance) βββββββββββββββββββββββββββββββ
|
| 302 |
PER_BLUR_THRESHOLD = 50.0 # Laplacian variance below this = severely blurry
|
| 303 |
PER_BLUR_CONF_PENALTY = 0.85 # multiply confs by this for blurry frames (reduce FP)
|
|
@@ -593,6 +602,21 @@ class Miner:
|
|
| 593 |
self.veh_h = int(veh_shape[2])
|
| 594 |
self.veh_w = int(veh_shape[3])
|
| 595 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 596 |
# Person model β CUDA immediately, TRT engine builds in background
|
| 597 |
per_onnx = str(path_hf_repo / "person_weights.onnx")
|
| 598 |
self.per_session = ort.InferenceSession(
|
|
@@ -800,32 +824,25 @@ class Miner:
|
|
| 800 |
y2 = np.clip((cy + bh / 2 - pt) / ratio, 0, oh)
|
| 801 |
return np.stack([x1, y1, x2, y2], axis=1), confs, cls_ids
|
| 802 |
|
| 803 |
-
def _veh_run_pass(self, image_bgr, conf_thresh):
|
|
|
|
|
|
|
| 804 |
oh, ow = image_bgr.shape[:2]
|
| 805 |
inp, ratio, pl, pt = self._veh_preprocess(image_bgr)
|
| 806 |
-
raw =
|
| 807 |
return self._veh_decode(raw, ratio, pl, pt, ow, oh, conf_thresh)
|
| 808 |
|
| 809 |
-
def
|
| 810 |
-
"""
|
| 811 |
-
|
| 812 |
-
Pipeline (v3.22 β flip TTA re-enabled, RTF budget allows it):
|
| 813 |
-
1. Primary pass at VEH_CONF_THRES
|
| 814 |
-
2. Optional flip TTA pass at VEH_TTA_CONF (if ENABLE_TTA)
|
| 815 |
-
3. Remap classes, per-class NMS
|
| 816 |
-
4. Per-class confidence filter (higher thresholds reduce FP)
|
| 817 |
-
5. Per-class aspect ratio filter
|
| 818 |
-
6. All 4 classes scored (v3.20: bus unsuppressed, cls_id=0)
|
| 819 |
-
"""
|
| 820 |
oh, ow = image_bgr.shape[:2]
|
| 821 |
|
| 822 |
# Primary pass
|
| 823 |
-
boxes, confs, cls_ids = self._veh_run_pass(image_bgr, VEH_CONF_THRES)
|
| 824 |
|
| 825 |
# Flip TTA pass β horizontal flip, mirror boxes back
|
| 826 |
if ENABLE_TTA:
|
| 827 |
flipped = cv2.flip(image_bgr, 1)
|
| 828 |
-
f_boxes, f_confs, f_cls = self._veh_run_pass(flipped, VEH_TTA_CONF)
|
| 829 |
if len(f_boxes) > 0:
|
| 830 |
# Mirror x-coords: x1'=ow-x2, x2'=ow-x1
|
| 831 |
f_boxes[:, 0], f_boxes[:, 2] = ow - f_boxes[:, 2], ow - f_boxes[:, 0]
|
|
@@ -912,6 +929,26 @@ class Miner:
|
|
| 912 |
))
|
| 913 |
return out
|
| 914 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 915 |
# ββ Vehicle parts confirmation βββββββββββββββββββββββββββββββββββββββ
|
| 916 |
|
| 917 |
@staticmethod
|
|
@@ -1871,65 +1908,125 @@ class Miner:
|
|
| 1871 |
|
| 1872 |
# ββ Person inference with SAHI tiling ββββββββββββββββββββββββββββββββ
|
| 1873 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1874 |
def _infer_person(self, image_bgr):
|
| 1875 |
-
"""Person detection with
|
| 1876 |
|
| 1877 |
-
Pipeline (
|
| 1878 |
-
1.
|
| 1879 |
2. Flip TTA pass
|
| 1880 |
-
3.
|
| 1881 |
-
4.
|
| 1882 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1883 |
"""
|
| 1884 |
oh, ow = image_bgr.shape[:2]
|
| 1885 |
t_start = time.monotonic()
|
| 1886 |
|
| 1887 |
-
# Frame quality gating
|
| 1888 |
blur_score = self._frame_blur_score(image_bgr)
|
| 1889 |
is_blurry = blur_score < PER_BLUR_THRESHOLD
|
| 1890 |
|
| 1891 |
-
#
|
| 1892 |
-
|
| 1893 |
-
all_confs = [] # list of [N] arrays
|
| 1894 |
-
|
| 1895 |
-
# Pass 1: full image at native 960px
|
| 1896 |
-
boxes_full, confs_full = self._per_run_pass(image_bgr, PER_CONF_LOW)
|
| 1897 |
-
if len(boxes_full) > 0:
|
| 1898 |
-
all_boxes.append(boxes_full)
|
| 1899 |
-
all_confs.append(confs_full)
|
| 1900 |
|
| 1901 |
-
|
| 1902 |
-
|
| 1903 |
-
# Pass 2: flip TTA (always run β only 2 passes total for RTF safety)
|
| 1904 |
flipped = cv2.flip(image_bgr, 1)
|
| 1905 |
-
boxes_flip, confs_flip = self._per_run_pass(flipped,
|
| 1906 |
if len(boxes_flip) > 0:
|
| 1907 |
boxes_flip[:, 0], boxes_flip[:, 2] = (
|
| 1908 |
ow - boxes_flip[:, 2], ow - boxes_flip[:, 0])
|
| 1909 |
-
|
| 1910 |
-
|
| 1911 |
-
|
| 1912 |
-
# Pass 3: CLAHE enhanced pass (low-contrast frames only, time-gated)
|
| 1913 |
-
if time.monotonic() - t_start < PER_RTF_BUDGET * 0.5:
|
| 1914 |
-
enhanced = self._per_enhance(image_bgr)
|
| 1915 |
-
if enhanced is not image_bgr: # CLAHE was applied (low contrast)
|
| 1916 |
-
boxes_enh, confs_enh = self._per_run_pass(enhanced, PER_CONF_LOW)
|
| 1917 |
-
if len(boxes_enh) > 0:
|
| 1918 |
-
all_boxes.append(boxes_enh)
|
| 1919 |
-
all_confs.append(confs_enh)
|
| 1920 |
-
|
| 1921 |
-
if not all_boxes:
|
| 1922 |
return []
|
| 1923 |
|
| 1924 |
-
#
|
| 1925 |
-
|
| 1926 |
-
|
| 1927 |
-
|
| 1928 |
-
|
| 1929 |
-
merged_b
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1930 |
|
| 1931 |
-
#
|
| 1932 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1933 |
if len(merged_s) > PER_MAX_DET:
|
| 1934 |
top_idx = np.argsort(merged_s)[-PER_MAX_DET:]
|
| 1935 |
merged_b = merged_b[top_idx]
|
|
@@ -1938,17 +2035,17 @@ class Miner:
|
|
| 1938 |
if len(merged_b) == 0:
|
| 1939 |
return []
|
| 1940 |
|
| 1941 |
-
# Blur confidence penalty
|
| 1942 |
if is_blurry:
|
| 1943 |
merged_s = merged_s * PER_BLUR_CONF_PENALTY
|
| 1944 |
|
| 1945 |
-
# Perspective scaling penalty
|
| 1946 |
merged_s = self._perspective_penalty(merged_b, merged_s, oh)
|
| 1947 |
|
| 1948 |
-
#
|
| 1949 |
-
|
| 1950 |
-
merged_b = merged_b[
|
| 1951 |
-
merged_s = merged_s[
|
| 1952 |
|
| 1953 |
# Sanity filters
|
| 1954 |
img_area = float(oh * ow)
|
|
@@ -2021,10 +2118,11 @@ class Miner:
|
|
| 2021 |
return self._infer_person(image_bgr)
|
| 2022 |
|
| 2023 |
if element_hint == 'vehicle':
|
| 2024 |
-
#
|
| 2025 |
-
#
|
| 2026 |
-
#
|
| 2027 |
-
|
|
|
|
| 2028 |
|
| 2029 |
if element_hint == 'petrol' and self.petrol_session:
|
| 2030 |
return self._infer_petrol(image_bgr)
|
|
|
|
| 1 |
"""
|
| 2 |
+
Score Vision SN44 β Unified miner v3.23 (2026-04-06). TTA consensus (person), FP32 fallback (vehicle), parts_confirm on vehicle challenges, car conf 0.60, VEH_MIN_WH=20. Person: DMSC19-inspired graduated consensus replaces soft-NMS (both=0.50, orig=0.60, flip=0.75). Vehicle: FP32 retry on β€1 box, parts_confirm with empty person_boxes, plate 80px, car 0.60.
|
| 3 |
Tri-model: vehicle (YOLO11m INT8 1280) + person (YOLO12s FP16 960 TRT) + petrol (end2end 640).
|
| 4 |
Pose model: YOLOv8n-pose FP16 640 for false-positive filtering + keypoint box refinement.
|
| 5 |
Vehicle weights loaded from secondary HF repo (meaculpitt/ScoreVision-Vehicle).
|
|
|
|
| 211 |
# ββ Per-class vehicle confidence thresholds (output cls_id) ββββββββββββββββ
|
| 212 |
# Raising from uniform 0.35: reduces FP (avg 4.1 FFPI β target <2.0)
|
| 213 |
VEH_CLASS_CONF: dict[int, float] = {
|
| 214 |
+
1: 0.60, # car β raised from 0.50, most FP-prone class (75% of training data)
|
| 215 |
2: 0.45, # truck β keep
|
| 216 |
3: 0.50, # motorcycle β raised from 0.45, small targets prone to FP
|
| 217 |
0: 0.45, # bus β keep
|
|
|
|
| 235 |
}
|
| 236 |
|
| 237 |
# ββ Vehicle box sanity filters (global fallbacks) βββββββββββββββββββββββββ
|
| 238 |
+
VEH_MIN_WH = 20 # was 8. Kills tiny horizon artifacts (confirmed: h<25 extras on block 7900800)
|
| 239 |
VEH_MIN_AREA = 100
|
| 240 |
VEH_MAX_ASPECT = 8.0
|
| 241 |
VEH_MAX_AREA_RATIO = 0.95
|
|
|
|
| 265 |
# Motorcycle rider pose
|
| 266 |
VEH_PARTS_RIDER_LEAN_DEG = 15.0 # Min torso lean from vertical (degrees) for rider pose
|
| 267 |
# Plate detection thresholds
|
| 268 |
+
VEH_PARTS_PLATE_MIN_PX = 80 # plates visible at ~80px vehicle width (was 120)
|
| 269 |
VEH_PARTS_PLATE_CONF = 0.35 # Min plate detection confidence
|
| 270 |
|
| 271 |
# ββ Person config (TTA consensus) βββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 298 |
# FP cases where NMS has already failed. Previous values (10 spec'd, 50 first
|
| 299 |
# fix) were too tight. See FAILURE_ANALYSIS.md (2026-04-05).
|
| 300 |
|
| 301 |
+
# ββ TTA consensus thresholds (DMSC19-inspired graduated approach) ββββββββββββ
|
| 302 |
+
# Cross-view confirmation eliminates the soft-NMS confidence decay bug.
|
| 303 |
+
# Instead of concatenate+soft-NMS (which decayed confs below floor), we match
|
| 304 |
+
# boxes across original+flip views and apply graduated confidence thresholds.
|
| 305 |
+
PER_TTA_MATCH_IOU = 0.50 # IoU threshold for cross-view box matching
|
| 306 |
+
PER_TTA_CONF_BOTH = 0.50 # Confirmed by both views: lower threshold (high confidence)
|
| 307 |
+
PER_TTA_CONF_ORIG = 0.60 # Original-only: standard threshold (PER_CONF_LOW)
|
| 308 |
+
PER_TTA_CONF_FLIP = 0.75 # Flip-only: strict (flip-only detections are likely FP)
|
| 309 |
+
|
| 310 |
# ββ Frame quality gating (Laplacian variance) βββββββββββββββββββββββββββββββ
|
| 311 |
PER_BLUR_THRESHOLD = 50.0 # Laplacian variance below this = severely blurry
|
| 312 |
PER_BLUR_CONF_PENALTY = 0.85 # multiply confs by this for blurry frames (reduce FP)
|
|
|
|
| 602 |
self.veh_h = int(veh_shape[2])
|
| 603 |
self.veh_w = int(veh_shape[3])
|
| 604 |
|
| 605 |
+
# FP32 fallback session for INT8 degradation recovery (block 7905900: 1-box failure)
|
| 606 |
+
self.veh_session_fp32 = None
|
| 607 |
+
try:
|
| 608 |
+
veh_fp32 = str(veh_path / "vehicle_weights_fp32.onnx") if veh_path else None
|
| 609 |
+
if veh_fp32 and Path(veh_fp32).exists():
|
| 610 |
+
self.veh_session_fp32 = ort.InferenceSession(
|
| 611 |
+
veh_fp32,
|
| 612 |
+
providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
|
| 613 |
+
)
|
| 614 |
+
logger.info("[init] Vehicle FP32 fallback model loaded")
|
| 615 |
+
else:
|
| 616 |
+
logger.info("[init] Vehicle FP32 fallback not available")
|
| 617 |
+
except Exception as e:
|
| 618 |
+
logger.warning(f"[init] Vehicle FP32 fallback failed: {e}")
|
| 619 |
+
|
| 620 |
# Person model β CUDA immediately, TRT engine builds in background
|
| 621 |
per_onnx = str(path_hf_repo / "person_weights.onnx")
|
| 622 |
self.per_session = ort.InferenceSession(
|
|
|
|
| 824 |
y2 = np.clip((cy + bh / 2 - pt) / ratio, 0, oh)
|
| 825 |
return np.stack([x1, y1, x2, y2], axis=1), confs, cls_ids
|
| 826 |
|
| 827 |
+
def _veh_run_pass(self, image_bgr, conf_thresh, session=None):
|
| 828 |
+
if session is None:
|
| 829 |
+
session = self.veh_session
|
| 830 |
oh, ow = image_bgr.shape[:2]
|
| 831 |
inp, ratio, pl, pt = self._veh_preprocess(image_bgr)
|
| 832 |
+
raw = session.run(None, {self.veh_input_name: inp})[0]
|
| 833 |
return self._veh_decode(raw, ratio, pl, pt, ow, oh, conf_thresh)
|
| 834 |
|
| 835 |
+
def _infer_vehicle_core(self, image_bgr, session=None):
|
| 836 |
+
"""Core vehicle detection pipeline. session param allows FP32 fallback."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 837 |
oh, ow = image_bgr.shape[:2]
|
| 838 |
|
| 839 |
# Primary pass
|
| 840 |
+
boxes, confs, cls_ids = self._veh_run_pass(image_bgr, VEH_CONF_THRES, session)
|
| 841 |
|
| 842 |
# Flip TTA pass β horizontal flip, mirror boxes back
|
| 843 |
if ENABLE_TTA:
|
| 844 |
flipped = cv2.flip(image_bgr, 1)
|
| 845 |
+
f_boxes, f_confs, f_cls = self._veh_run_pass(flipped, VEH_TTA_CONF, session)
|
| 846 |
if len(f_boxes) > 0:
|
| 847 |
# Mirror x-coords: x1'=ow-x2, x2'=ow-x1
|
| 848 |
f_boxes[:, 0], f_boxes[:, 2] = ow - f_boxes[:, 2], ow - f_boxes[:, 0]
|
|
|
|
| 929 |
))
|
| 930 |
return out
|
| 931 |
|
| 932 |
+
def _infer_vehicle(self, image_bgr):
|
| 933 |
+
"""Vehicle detection with FP32 fallback on degraded INT8 output.
|
| 934 |
+
|
| 935 |
+
Runs INT8 model first. If it returns β€1 box (likely INT8 degradation,
|
| 936 |
+
see block 7905900), retries with FP32 model and uses whichever
|
| 937 |
+
produced more detections.
|
| 938 |
+
"""
|
| 939 |
+
boxes = self._infer_vehicle_core(image_bgr, self.veh_session)
|
| 940 |
+
|
| 941 |
+
if len(boxes) <= 1 and self.veh_session_fp32:
|
| 942 |
+
boxes_fp32 = self._infer_vehicle_core(image_bgr, self.veh_session_fp32)
|
| 943 |
+
if len(boxes_fp32) > len(boxes):
|
| 944 |
+
logger.warning(
|
| 945 |
+
f"[vehicle] INT8 degraded ({len(boxes)} boxes), "
|
| 946 |
+
f"FP32 fallback recovered ({len(boxes_fp32)} boxes)"
|
| 947 |
+
)
|
| 948 |
+
return boxes_fp32
|
| 949 |
+
|
| 950 |
+
return boxes
|
| 951 |
+
|
| 952 |
# ββ Vehicle parts confirmation βββββββββββββββββββββββββββββββββββββββ
|
| 953 |
|
| 954 |
@staticmethod
|
|
|
|
| 1908 |
|
| 1909 |
# ββ Person inference with SAHI tiling ββββββββββββββββββββββββββββββββ
|
| 1910 |
|
| 1911 |
+
@staticmethod
|
| 1912 |
+
def _match_boxes_iou(boxes_a, boxes_b, iou_thr):
|
| 1913 |
+
"""Match boxes from two sets by IoU. Returns (matched_pairs, unmatched_a, unmatched_b).
|
| 1914 |
+
|
| 1915 |
+
matched_pairs: list of (idx_a, idx_b, iou) tuples
|
| 1916 |
+
unmatched_a: list of indices in boxes_a with no match
|
| 1917 |
+
unmatched_b: list of indices in boxes_b with no match
|
| 1918 |
+
"""
|
| 1919 |
+
if len(boxes_a) == 0:
|
| 1920 |
+
return [], [], list(range(len(boxes_b)))
|
| 1921 |
+
if len(boxes_b) == 0:
|
| 1922 |
+
return [], list(range(len(boxes_a))), []
|
| 1923 |
+
|
| 1924 |
+
matched_pairs = []
|
| 1925 |
+
used_b = set()
|
| 1926 |
+
|
| 1927 |
+
for i in range(len(boxes_a)):
|
| 1928 |
+
best_iou = 0
|
| 1929 |
+
best_j = -1
|
| 1930 |
+
for j in range(len(boxes_b)):
|
| 1931 |
+
if j in used_b:
|
| 1932 |
+
continue
|
| 1933 |
+
xx1 = max(boxes_a[i, 0], boxes_b[j, 0])
|
| 1934 |
+
yy1 = max(boxes_a[i, 1], boxes_b[j, 1])
|
| 1935 |
+
xx2 = min(boxes_a[i, 2], boxes_b[j, 2])
|
| 1936 |
+
yy2 = min(boxes_a[i, 3], boxes_b[j, 3])
|
| 1937 |
+
inter = max(0.0, xx2 - xx1) * max(0.0, yy2 - yy1)
|
| 1938 |
+
a1 = (boxes_a[i, 2] - boxes_a[i, 0]) * (boxes_a[i, 3] - boxes_a[i, 1])
|
| 1939 |
+
a2 = (boxes_b[j, 2] - boxes_b[j, 0]) * (boxes_b[j, 3] - boxes_b[j, 1])
|
| 1940 |
+
iou = inter / (a1 + a2 - inter + 1e-9)
|
| 1941 |
+
if iou > best_iou:
|
| 1942 |
+
best_iou = iou
|
| 1943 |
+
best_j = j
|
| 1944 |
+
if best_iou >= iou_thr:
|
| 1945 |
+
matched_pairs.append((i, best_j, best_iou))
|
| 1946 |
+
used_b.add(best_j)
|
| 1947 |
+
|
| 1948 |
+
matched_a = {p[0] for p in matched_pairs}
|
| 1949 |
+
unmatched_a = [i for i in range(len(boxes_a)) if i not in matched_a]
|
| 1950 |
+
unmatched_b = [j for j in range(len(boxes_b)) if j not in used_b]
|
| 1951 |
+
|
| 1952 |
+
return matched_pairs, unmatched_a, unmatched_b
|
| 1953 |
+
|
| 1954 |
def _infer_person(self, image_bgr):
|
| 1955 |
+
"""Person detection with TTA consensus merging.
|
| 1956 |
|
| 1957 |
+
Pipeline (v3.23 β replaces concatenate+soft-NMS with consensus merging):
|
| 1958 |
+
1. Original pass at native 960px
|
| 1959 |
2. Flip TTA pass
|
| 1960 |
+
3. Match boxes across views (IoU >= PER_TTA_MATCH_IOU)
|
| 1961 |
+
4. Graduated confidence thresholds:
|
| 1962 |
+
- Confirmed by both views: keep at PER_TTA_CONF_BOTH (0.50)
|
| 1963 |
+
- Original-only: keep at PER_TTA_CONF_ORIG (0.60)
|
| 1964 |
+
- Flip-only: keep at PER_TTA_CONF_FLIP (0.75)
|
| 1965 |
+
5. Hard NMS on merged result
|
| 1966 |
+
6. Sanity filters + safety ceiling
|
| 1967 |
+
7. Pose FP filter + box refinement (if time allows)
|
| 1968 |
"""
|
| 1969 |
oh, ow = image_bgr.shape[:2]
|
| 1970 |
t_start = time.monotonic()
|
| 1971 |
|
| 1972 |
+
# Frame quality gating
|
| 1973 |
blur_score = self._frame_blur_score(image_bgr)
|
| 1974 |
is_blurry = blur_score < PER_BLUR_THRESHOLD
|
| 1975 |
|
| 1976 |
+
# Pass 1: original image
|
| 1977 |
+
boxes_orig, confs_orig = self._per_run_pass(image_bgr, PER_TTA_CONF_BOTH)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1978 |
|
| 1979 |
+
# Pass 2: horizontal flip
|
|
|
|
|
|
|
| 1980 |
flipped = cv2.flip(image_bgr, 1)
|
| 1981 |
+
boxes_flip, confs_flip = self._per_run_pass(flipped, PER_TTA_CONF_BOTH)
|
| 1982 |
if len(boxes_flip) > 0:
|
| 1983 |
boxes_flip[:, 0], boxes_flip[:, 2] = (
|
| 1984 |
ow - boxes_flip[:, 2], ow - boxes_flip[:, 0])
|
| 1985 |
+
|
| 1986 |
+
if len(boxes_orig) == 0 and len(boxes_flip) == 0:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1987 |
return []
|
| 1988 |
|
| 1989 |
+
# TTA consensus: match boxes across views
|
| 1990 |
+
matched, unmatched_o, unmatched_f = self._match_boxes_iou(
|
| 1991 |
+
boxes_orig, boxes_flip, PER_TTA_MATCH_IOU)
|
| 1992 |
+
|
| 1993 |
+
# Build merged result with graduated thresholds
|
| 1994 |
+
merged_b = []
|
| 1995 |
+
merged_s = []
|
| 1996 |
+
|
| 1997 |
+
# Confirmed by both views: keep original box, use max confidence, threshold=0.50
|
| 1998 |
+
for i_o, i_f, iou in matched:
|
| 1999 |
+
conf = max(float(confs_orig[i_o]), float(confs_flip[i_f]))
|
| 2000 |
+
if conf >= PER_TTA_CONF_BOTH:
|
| 2001 |
+
merged_b.append(boxes_orig[i_o])
|
| 2002 |
+
merged_s.append(conf)
|
| 2003 |
+
|
| 2004 |
+
# Original-only: need higher confidence (0.60)
|
| 2005 |
+
for i_o in unmatched_o:
|
| 2006 |
+
if confs_orig[i_o] >= PER_TTA_CONF_ORIG:
|
| 2007 |
+
merged_b.append(boxes_orig[i_o])
|
| 2008 |
+
merged_s.append(float(confs_orig[i_o]))
|
| 2009 |
+
|
| 2010 |
+
# Flip-only: strict threshold (0.75) β flip-only detections are likely FP
|
| 2011 |
+
for i_f in unmatched_f:
|
| 2012 |
+
if confs_flip[i_f] >= PER_TTA_CONF_FLIP:
|
| 2013 |
+
merged_b.append(boxes_flip[i_f])
|
| 2014 |
+
merged_s.append(float(confs_flip[i_f]))
|
| 2015 |
+
|
| 2016 |
+
if not merged_b:
|
| 2017 |
+
return []
|
| 2018 |
+
|
| 2019 |
+
merged_b = np.array(merged_b)
|
| 2020 |
+
merged_s = np.array(merged_s)
|
| 2021 |
|
| 2022 |
+
# Hard NMS on merged result (no soft-NMS β no confidence decay)
|
| 2023 |
+
keep = _nms_per_class_boost(
|
| 2024 |
+
merged_b, merged_s,
|
| 2025 |
+
np.zeros(len(merged_s), dtype=int), # single class
|
| 2026 |
+
iou_thr=PER_NMS_IOU)
|
| 2027 |
+
merged_b, merged_s = keep[0], keep[1]
|
| 2028 |
+
|
| 2029 |
+
# Safety ceiling
|
| 2030 |
if len(merged_s) > PER_MAX_DET:
|
| 2031 |
top_idx = np.argsort(merged_s)[-PER_MAX_DET:]
|
| 2032 |
merged_b = merged_b[top_idx]
|
|
|
|
| 2035 |
if len(merged_b) == 0:
|
| 2036 |
return []
|
| 2037 |
|
| 2038 |
+
# Blur confidence penalty
|
| 2039 |
if is_blurry:
|
| 2040 |
merged_s = merged_s * PER_BLUR_CONF_PENALTY
|
| 2041 |
|
| 2042 |
+
# Perspective scaling penalty
|
| 2043 |
merged_s = self._perspective_penalty(merged_b, merged_s, oh)
|
| 2044 |
|
| 2045 |
+
# Final confidence floor (catches blur/perspective decay edge cases)
|
| 2046 |
+
keep_mask = merged_s >= PER_TTA_CONF_BOTH
|
| 2047 |
+
merged_b = merged_b[keep_mask]
|
| 2048 |
+
merged_s = merged_s[keep_mask]
|
| 2049 |
|
| 2050 |
# Sanity filters
|
| 2051 |
img_area = float(oh * ow)
|
|
|
|
| 2118 |
return self._infer_person(image_bgr)
|
| 2119 |
|
| 2120 |
if element_hint == 'vehicle':
|
| 2121 |
+
# Run vehicle detection + parts confirmation with empty person_boxes.
|
| 2122 |
+
# Plate/headlight/window checks fire normally; driver/rider overlap
|
| 2123 |
+
# check finds no matches (boost=0) but doesn't suppress.
|
| 2124 |
+
vehicle_boxes = self._infer_vehicle(image_bgr)
|
| 2125 |
+
return self._vehicle_parts_confirm(vehicle_boxes, [], image_bgr)
|
| 2126 |
|
| 2127 |
if element_hint == 'petrol' and self.petrol_session:
|
| 2128 |
return self._infer_petrol(image_bgr)
|