meaculpitt
/

ScoreVision-Crime

@@ -1,25 +1,17 @@
-# build-marker: v3-ensemble-alfred-priority-all
-"""SN44 crime detection miner — ENSEMBLE of alfred yolo26n + RF-DETR base.
-Composes two internal miners with different preprocess/inference pipelines:
-  _AlfredMiner: yolo26n e2e ONNX, letterbox 1280 + /255, TTA (h-flip + conf boost)
-  _RFDETRMiner: rfdetr base e2e ONNX, stretch 1288 + ImageNet normalize, no TTA
-Class routing (final union after per-class NMS@IoU=0.5, alfred wins conflicts):
-  cls0 balaclava  : BOTH (alfred priority on conflicts)
-  cls1 hoodie     : BOTH (alfred priority on conflicts)
-  cls2 glove      : BOTH (alfred priority on conflicts)
-  cls3 bat        : BOTH (alfred priority on conflicts)
-  cls4 spray paint: BOTH (alfred priority on conflicts)
-  cls5 graffiti   : alfred only (RF-DETR can't read static walls)
-Conf threshold 0.52 is applied INSIDE each internal miner; the union is the
-already-thresholded boxes from each. This matches alfred's existing per-class
-calibration (TTA conf-boost happens against the 0.52 threshold).
-ONNX file names expected in path_hf_repo:
-  weights.onnx        - alfred yolo26n e2e [1,300,6] in input-pixel coords (1280)
-  weights_rfdetr.onnx - RF-DETR base e2e [1,300,6] in input-pixel coords (1288)
 """
 import math
 from pathlib import Path
@@ -46,46 +38,57 @@ class TVFrameResult(BaseModel):
     keypoints: list[tuple[int, int]]
-_IMAGENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
-_IMAGENET_STD  = np.array([0.229, 0.224, 0.225], dtype=np.float32)
-# ============================================================ ALFRED PATH
-# Verbatim alfred-style pipeline (letterbox + TTA). Returns list[BoundingBox]
-# already conf-filtered at 0.52, geometry-filtered, NMS'd, cross-class deduped.
-class _AlfredMiner:
-    def __init__(self, path_hf_repo: Path):
-        self.path_hf_repo = path_hf_repo
         self.class_names = ["balaclava", "hoodie", "glove", "bat", "spray paint", "graffiti"]
-        self.cls_remap = np.arange(6, dtype=np.int32)
         sess_options = ort.SessionOptions()
         sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
         try:
             self.session = ort.InferenceSession(
-                str(path_hf_repo / "weights.onnx"),
                 sess_options=sess_options,
                 providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
             )
         except Exception:
             self.session = ort.InferenceSession(
-                str(path_hf_repo / "weights.onnx"),
                 sess_options=sess_options,
                 providers=["CPUExecutionProvider"],
             )
         self.input_name = self.session.get_inputs()[0].name
         self.output_names = [o.name for o in self.session.get_outputs()]
         self.input_h = 1280
         self.input_w = 1280
         self.conf_threshold = 0.52
         self.iou_thresh = 0.4
         self.cross_iou_thresh = 0.7
         self.max_det = 150
-        self.use_tta = True
         self.min_box_area = 196
         self.min_side = 8
         self.max_aspect_ratio = 8.0
     def _letterbox(self, image):
         h, w = image.shape[:2]
         ratio = min(self.input_w / w, self.input_h / h)
@@ -162,21 +165,6 @@ class _AlfredMiner:
         kept = np.array(keep, dtype=np.intp)
         return boxes[kept], scores[kept], cls_ids[kept]
-    @staticmethod
-    def _max_score_per_cluster(coords, scores, keep_idx, iou_thresh):
-        if len(keep_idx) == 0: return np.array([], dtype=np.float32)
-        out = np.empty(len(keep_idx), dtype=np.float32)
-        for j, idx in enumerate(keep_idx):
-            bi = coords[idx]
-            xx1 = np.maximum(bi[0], coords[:, 0]); yy1 = np.maximum(bi[1], coords[:, 1])
-            xx2 = np.minimum(bi[2], coords[:, 2]); yy2 = np.minimum(bi[3], coords[:, 3])
-            inter = np.maximum(0.0, xx2-xx1) * np.maximum(0.0, yy2-yy1)
-            ai = (bi[2]-bi[0])*(bi[3]-bi[1])
-            aj = (coords[:, 2]-coords[:, 0]) * (coords[:, 3]-coords[:, 1])
-            iou = inter / (ai + aj - inter + 1e-7)
-            out[j] = float(np.max(scores[iou >= iou_thresh]))
-        return out
     def _infer_single(self, image_bgr):
         inp, ratio, (dx, dy) = self._preprocess(image_bgr)
         out = self.session.run(self.output_names, {self.input_name: inp})[0]
@@ -200,29 +188,6 @@ class _AlfredMiner:
             boxes, confs, cls_ids = self._cross_class_dedup(boxes, confs, cls_ids, self.cross_iou_thresh)
         return self._to_boundingboxes(boxes, confs, cls_ids, ow, oh)
-    def _infer_tta(self, image_bgr):
-        boxes_orig = self._infer_single(image_bgr)
-        h, w = image_bgr.shape[:2]
-        flipped = cv2.flip(image_bgr, 1)
-        boxes_flip_raw = self._infer_single(flipped)
-        boxes_flip = [BoundingBox(x1=w-b.x2, y1=b.y1, x2=w-b.x1, y2=b.y2, cls_id=b.cls_id, conf=b.conf)
-                      for b in boxes_flip_raw]
-        all_boxes = boxes_orig + boxes_flip
-        if not all_boxes: return []
-        coords = np.array([[b.x1, b.y1, b.x2, b.y2] for b in all_boxes], dtype=np.float32)
-        scores = np.array([b.conf for b in all_boxes], dtype=np.float32)
-        cls_ids = np.array([b.cls_id for b in all_boxes], dtype=np.int32)
-        keep_idx = self._per_class_hard_nms(coords, scores, cls_ids, self.iou_thresh)
-        if len(keep_idx) == 0: return []
-        keep_idx = keep_idx[: self.max_det]
-        boosted = self._max_score_per_cluster(coords, scores, keep_idx, self.iou_thresh)
-        out = []
-        for j, idx in enumerate(keep_idx):
-            b = all_boxes[idx]
-            out.append(BoundingBox(x1=b.x1, y1=b.y1, x2=b.x2, y2=b.y2, cls_id=b.cls_id,
-                                   conf=max(0.0, min(1.0, float(boosted[j])))))
-        return out
     def _to_boundingboxes(self, boxes, confs, cls_ids, orig_w, orig_h):
         out = []
         for i in range(len(boxes)):
@@ -241,143 +206,13 @@ class _AlfredMiner:
                                    conf=max(0.0, min(1.0, float(confs[i])))))
         return out
-    def predict_one(self, image_bgr):
-        return self._infer_tta(image_bgr) if self.use_tta else self._infer_single(image_bgr)
-# ============================================================ RFDETR PATH
-class _RFDETRMiner:
-    def __init__(self, path_hf_repo: Path):
-        sess_options = ort.SessionOptions()
-        sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-        try:
-            self.session = ort.InferenceSession(
-                str(path_hf_repo / "weights_rfdetr.onnx"),
-                sess_options=sess_options,
-                providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
-            )
-        except Exception:
-            self.session = ort.InferenceSession(
-                str(path_hf_repo / "weights_rfdetr.onnx"),
-                sess_options=sess_options,
-                providers=["CPUExecutionProvider"],
-            )
-        self.input_name = self.session.get_inputs()[0].name
-        self.output_names = [o.name for o in self.session.get_outputs()]
-        self.input_h = 1288
-        self.input_w = 1288
-        self.conf_threshold = 0.52
-        self.min_box_area = 196
-        self.min_side = 8
-        self.max_aspect_ratio = 8.0
-    def predict_one(self, image_bgr):
-        oh, ow = image_bgr.shape[:2]
-        rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
-        resized = cv2.resize(rgb, (self.input_w, self.input_h), interpolation=cv2.INTER_LINEAR)
-        x = resized.astype(np.float32) / 255.0
-        x = (x - _IMAGENET_MEAN) / _IMAGENET_STD
-        x = np.ascontiguousarray(np.transpose(x, (2, 0, 1))[None, ...].astype(np.float32))
-        out = self.session.run(self.output_names, {self.input_name: x})[0]
-        if out.ndim == 3: out = out[0]
-        confs = out[:, 4].astype(np.float32)
-        keep = confs >= self.conf_threshold
-        if not keep.any(): return []
-        out = out[keep]
-        boxes = out[:, :4].astype(np.float32).copy()
-        confs = out[:, 4].astype(np.float32)
-        cls_ids = out[:, 5].astype(np.int32)
-        sx = ow / float(self.input_w); sy = oh / float(self.input_h)
-        boxes[:, [0, 2]] *= sx; boxes[:, [1, 3]] *= sy
-        boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, ow - 1)
-        boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, oh - 1)
-        out_boxes = []
-        for i in range(len(boxes)):
-            x1, y1, x2, y2 = boxes[i]
-            ix1 = max(0, min(ow, math.floor(x1))); iy1 = max(0, min(oh, math.floor(y1)))
-            ix2 = max(0, min(ow, math.ceil(x2))); iy2 = max(0, min(oh, math.ceil(y2)))
-            if ix2 <= ix1 or iy2 <= iy1: continue
-            bw, bh = ix2 - ix1, iy2 - iy1
-            if bw * bh < self.min_box_area: continue
-            if min(bw, bh) < self.min_side: continue
-            ar = max(bw / max(bh, 1), bh / max(bw, 1))
-            if ar > self.max_aspect_ratio: continue
-            out_boxes.append(BoundingBox(x1=ix1, y1=iy1, x2=ix2, y2=iy2,
-                                         cls_id=int(cls_ids[i]),
-                                         conf=max(0.0, min(1.0, float(confs[i])))))
-        return out_boxes
-# ============================================================ ENSEMBLE PUBLIC
-class Miner:
-    """Public ensemble miner — chute calls predict_batch(...)."""
-    def __init__(self, path_hf_repo) -> None:
-        self.path_hf_repo = Path(path_hf_repo)
-        self.class_names = ["balaclava", "hoodie", "glove", "bat", "spray paint", "graffiti"]
-        try:
-            ort.preload_dlls()
-        except Exception:
-            pass
-        self.alfred = _AlfredMiner(self.path_hf_repo)
-        self.rfdetr = _RFDETRMiner(self.path_hf_repo)
-        # v3 (2026-05-04): all classes go through alfred (was {0,1,5}).
-        # cid 61709 post-mortem showed alfred returning correct gloves at
-        # conf 0.79/0.89 that the prior {0,1,5} filter dropped, costing the
-        # full validator score on that frame. RF-DETR remains additive on
-        # cls 0..4; cls 5 (graffiti) stays alfred-only since RF-DETR can't
-        # read static walls.
-        self.alfred_classes = {0, 1, 2, 3, 4, 5}
-        self.rfdetr_classes = {0, 1, 2, 3, 4}
-        self.merge_iou = 0.5
-        # Warmup
-        warm = np.zeros((1280, 1280, 3), dtype=np.uint8)
-        for _ in range(2):
-            try: self.alfred.predict_one(warm)
-            except Exception: break
-        for _ in range(2):
-            try: self.rfdetr.predict_one(warm)
-            except Exception: break
-    def __repr__(self):
-        return (f"CrimeEnsembleMiner v3  alfred(yolo26n@1280, TTA) + "
-                f"rfdetr(base@1288)  conf>=0.52  merge_iou={self.merge_iou}  "
-                f"alfred_priority_all_classes")
-    @staticmethod
-    def _box_iou(a: BoundingBox, b: BoundingBox) -> float:
-        xx1 = max(a.x1, b.x1); yy1 = max(a.y1, b.y1)
-        xx2 = min(a.x2, b.x2); yy2 = min(a.y2, b.y2)
-        inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
-        ai = (a.x2 - a.x1) * (a.y2 - a.y1)
-        bi = (b.x2 - b.x1) * (b.y2 - b.y1)
-        return inter / (ai + bi - inter + 1e-7)
-    def _merge(self, alfred_boxes: list, rfdetr_boxes: list) -> list:
-        """Per-class union: alfred always kept; rfdetr kept ONLY if not overlapping
-        an alfred same-class box at IoU >= merge_iou. cls 0..4 see both — alfred
-        priority on conflicts; cls 5 is alfred-only (no rfdetr boxes there)."""
-        kept = list(alfred_boxes)
-        for rb in rfdetr_boxes:
-            collide = False
-            for ab in alfred_boxes:
-                if ab.cls_id == rb.cls_id and self._box_iou(ab, rb) >= self.merge_iou:
-                    collide = True; break
-            if not collide:
-                kept.append(rb)
-        return kept
     def predict_batch(self, batch_images, offset, n_keypoints):
         results = []
         for idx, image in enumerate(batch_images):
-            a_all = self.alfred.predict_one(image)
-            r_all = self.rfdetr.predict_one(image)
-            a_keep = [b for b in a_all if b.cls_id in self.alfred_classes]
-            r_keep = [b for b in r_all if b.cls_id in self.rfdetr_classes]
-            merged = self._merge(a_keep, r_keep)
             results.append(TVFrameResult(
                 frame_id=offset + idx,
-                boxes=merged,
                 keypoints=[(0, 0) for _ in range(max(0, int(n_keypoints)))],
             ))
         return results

+# build-marker: v5-alfred-only-no-tta
+"""SN44 crime detection miner — ALFRED ONLY, no TTA, no RF-DETR.
+v5 (2026-05-04): drops the RF-DETR branch entirely. Component benchmarks showed
+RF-DETR was ~10× slower than alfred (8.2s vs 0.8s on CPU) and contributed zero
+observed scoring credit on cid 61709 (alfred alone returned the same 3 correct
+boxes that the alfred-competitor used to earn 0.8). Goal: get under the 5s
+validator gate with comfortable margin (target p95 < 2000ms e2e).
+Single ONNX file expected in path_hf_repo:
+  weights.onnx — alfred yolo26n e2e [1,300,6] in input-pixel coords (1280)
+Conf threshold 0.52, NMS IoU 0.4, min_box_area 196 — unchanged from v3/v4.
+All 6 classes routed through alfred (identity remap).
 """
 import math
 from pathlib import Path
     keypoints: list[tuple[int, int]]
+class Miner:
+    """Public miner — chute calls predict_batch(...). v5 is alfred-only,
+    single forward pass, no TTA, no RF-DETR."""
+    def __init__(self, path_hf_repo) -> None:
+        self.path_hf_repo = Path(path_hf_repo)
         self.class_names = ["balaclava", "hoodie", "glove", "bat", "spray paint", "graffiti"]
+        self.cls_remap = np.arange(6, dtype=np.int32)  # identity remap, all classes
+        try:
+            ort.preload_dlls()
+        except Exception:
+            pass
         sess_options = ort.SessionOptions()
         sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
         try:
             self.session = ort.InferenceSession(
+                str(self.path_hf_repo / "weights.onnx"),
                 sess_options=sess_options,
                 providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
             )
         except Exception:
             self.session = ort.InferenceSession(
+                str(self.path_hf_repo / "weights.onnx"),
                 sess_options=sess_options,
                 providers=["CPUExecutionProvider"],
             )
         self.input_name = self.session.get_inputs()[0].name
         self.output_names = [o.name for o in self.session.get_outputs()]
         self.input_h = 1280
         self.input_w = 1280
         self.conf_threshold = 0.52
         self.iou_thresh = 0.4
         self.cross_iou_thresh = 0.7
         self.max_det = 150
         self.min_box_area = 196
         self.min_side = 8
         self.max_aspect_ratio = 8.0
+        # Warmup
+        warm = np.zeros((1280, 1280, 3), dtype=np.uint8)
+        for _ in range(2):
+            try: self._infer_single(warm)
+            except Exception: break
+    def __repr__(self):
+        return (f"CrimeMiner v5  alfred-only(yolo26n@1280, NO TTA)  "
+                f"conf>=0.52  iou={self.iou_thresh}  min_area={self.min_box_area}")
     def _letterbox(self, image):
         h, w = image.shape[:2]
         ratio = min(self.input_w / w, self.input_h / h)
         kept = np.array(keep, dtype=np.intp)
         return boxes[kept], scores[kept], cls_ids[kept]
     def _infer_single(self, image_bgr):
         inp, ratio, (dx, dy) = self._preprocess(image_bgr)
         out = self.session.run(self.output_names, {self.input_name: inp})[0]
             boxes, confs, cls_ids = self._cross_class_dedup(boxes, confs, cls_ids, self.cross_iou_thresh)
         return self._to_boundingboxes(boxes, confs, cls_ids, ow, oh)
     def _to_boundingboxes(self, boxes, confs, cls_ids, orig_w, orig_h):
         out = []
         for i in range(len(boxes)):
                                    conf=max(0.0, min(1.0, float(confs[i])))))
         return out
     def predict_batch(self, batch_images, offset, n_keypoints):
         results = []
         for idx, image in enumerate(batch_images):
+            boxes = self._infer_single(image)
             results.append(TVFrameResult(
                 frame_id=offset + idx,
+                boxes=boxes,
                 keypoints=[(0, 0) for _ in range(max(0, int(n_keypoints)))],
             ))
         return results