licensy
/

ScoreVision

ONNX

Model card Files Files and versions

xet

Community

licensy commited on 23 days ago

Commit

40bc7a8

verified ·

1 Parent(s): ae788f0

scorevision: push artifact

Browse files

Files changed (1) hide show

miner.py +119 -176

miner.py CHANGED Viewed

@@ -1,21 +1,14 @@
-"""Plate-detection miner — v3 "plate_v3 + tight softnms".
-Base weights: plate_v3 (YOLO26s fine-tuned on Roboflow-filtered + 10x live pseudo-GT,
-resumed from plate_v2). fp16 end2end ONNX, static 1x3x1280x1280, ~19.4 MB.
-Weights: plate_v4 (resumed from plate_v3 + heavier CCTV aug + live×15 + 10× epochs).
-Bench on 221-shard pool: gated 0.436, mAP 0.980 (highest of all tested models).
-Beats plate_v3 (0.431), smile0123/m4 (0.406), 5GRAm (0.401), hermestech (0.418).
-Inference pipeline (bench-winner preset):
-  - Single full-image pass + hflip TTA + soft-NMS + hard-NMS
-  - softnms(conf=0.30, iou=0.45, sigma=0.5, max_det=16)
-  - Bench: gated 0.436, fp/img 0.51, ms_p95 ~160 locally (A4000)
-  - On pro_6000 + TEE: expect ~2-3s p95 including network/attest overhead
-Compared to:
-  plate_v2 best:     gated=0.424
-  hermestech best:   gated=0.422
-  5GRAm best:        gated=0.401
 """
 from pathlib import Path
 import math
@@ -52,8 +45,7 @@ class Miner:
         if cn_path.is_file():
             lines = cn_path.read_text(encoding="utf-8").splitlines()
             self.class_names = [
-                ln.strip()
-                for ln in lines
                 if ln.strip() and not ln.strip().startswith("#")
             ]
         else:
@@ -66,15 +58,11 @@ class Miner:
         except Exception as e:
             print(f"preload_dlls failed: {e}")
-        print("ORT available providers BEFORE session:", ort.get_available_providers())
         try:
             import torch
             if torch.cuda.is_available():
                 print(f"GPU: {torch.cuda.get_device_name(0)}")
                 print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
-            else:
-                print("GPU: CUDA not available via torch")
         except Exception as e:
             print(f"GPU detection failed: {e}")
@@ -83,21 +71,17 @@ class Miner:
         try:
             self.session = ort.InferenceSession(
-                str(model_path),
-                sess_options=sess_options,
                 providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
             )
-            print("Created ORT session with preferred CUDA provider list")
         except Exception as e:
             print(f"CUDA session creation failed, falling back to CPU: {e}")
             self.session = ort.InferenceSession(
-                str(model_path),
-                sess_options=sess_options,
                 providers=["CPUExecutionProvider"],
             )
-        print("ORT session providers:", self.session.get_providers())
         for inp in self.session.get_inputs():
             print("INPUT:", inp.name, inp.shape, inp.type)
         for out in self.session.get_outputs():
@@ -106,80 +90,55 @@ class Miner:
         self.input_name = self.session.get_inputs()[0].name
         self.output_names = [o.name for o in self.session.get_outputs()]
         self.input_shape = self.session.get_inputs()[0].shape
-        # plate_v3 export is fp16 static [1,3,1280,1280]
         self.input_dtype = (
-            np.float16
-            if "float16" in self.session.get_inputs()[0].type
             else np.float32
         )
         self.input_height = self._safe_dim(self.input_shape[2], default=SIZE)
         self.input_width = self._safe_dim(self.input_shape[3], default=SIZE)
-        # Tuned preset for plate_v3 — recall-biased variant.
-        # Bench softnms(c22,i.41,s.685) on 184-shard pool:
-        #   gated=0.440  mAP=0.978 (highest)  fp/img=0.38  ms_p95=157
-        # Switched from conf=0.30 after live data showed the tighter threshold
-        # missed borderline plates on shards where competitors scored 0.318.
-        # Trade: slightly higher fp/img on easy shards (capped by max_det), but
-        # recovers recall on hard shards where it matters most.
-        # plate_v4 bench winner: softnms(c30,md16) at gated=0.436, mAP=0.980
-        self.conf_thres = 0.30
-        self.iou_thres = 0.45
-        self.sigma = 0.5
-        self.max_det = 16
-        self.use_tta = True  # hflip TTA — bench-verified for mAP gain
         print(f"ONNX model loaded from: {model_path}")
-        print(f"ONNX providers: {self.session.get_providers()}")
-        print(f"ONNX input: name={self.input_name}, shape={self.input_shape}, dtype={self.input_dtype}")
-        print(f"Preset: conf={self.conf_thres} iou={self.iou_thres} sigma={self.sigma} max_det={self.max_det}")
     def __repr__(self) -> str:
-        return (
-            f"ONNXRuntime(session={type(self.session).__name__}, "
-            f"providers={self.session.get_providers()})"
-        )
     @staticmethod
     def _safe_dim(value, default: int) -> int:
         return value if isinstance(value, int) and value > 0 else default
-    # ---------- image preprocessing ----------
-    def _letterbox(
-        self,
-        image: ndarray,
-        new_shape: tuple[int, int],
-        color=(114, 114, 114),
-    ) -> tuple[ndarray, float, tuple[float, float]]:
         h, w = image.shape[:2]
         new_w, new_h = new_shape
         ratio = min(new_w / w, new_h / h)
-        resized_w = int(round(w * ratio))
-        resized_h = int(round(h * ratio))
-        if (resized_w, resized_h) != (w, h):
             interp = cv2.INTER_CUBIC if ratio > 1.0 else cv2.INTER_LINEAR
-            image = cv2.resize(image, (resized_w, resized_h), interpolation=interp)
-        dw = (new_w - resized_w) / 2.0
-        dh = (new_h - resized_h) / 2.0
-        left = int(round(dw - 0.1))
-        right = int(round(dw + 0.1))
-        top = int(round(dh - 0.1))
-        bottom = int(round(dh + 0.1))
-        padded = cv2.copyMakeBorder(
-            image, top, bottom, left, right,
-            borderType=cv2.BORDER_CONSTANT, value=color,
-        )
         return padded, ratio, (dw, dh)
-    def _preprocess(self, image: ndarray):
         img, ratio, pad = self._letterbox(image, (self.input_width, self.input_height))
         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
         img = np.transpose(img, (2, 0, 1))[None, ...]
         return np.ascontiguousarray(img, dtype=self.input_dtype), ratio, pad
     @staticmethod
-    def _clip_boxes(boxes: np.ndarray, image_size: tuple[int, int]) -> np.ndarray:
         w, h = image_size
         boxes[:, 0] = np.clip(boxes[:, 0], 0, w - 1)
         boxes[:, 1] = np.clip(boxes[:, 1], 0, h - 1)
@@ -187,20 +146,32 @@ class Miner:
         boxes[:, 3] = np.clip(boxes[:, 3], 0, h - 1)
         return boxes
-    # ---------- NMS primitives ----------
     @staticmethod
-    def _hard_nms(boxes: np.ndarray, scores: np.ndarray, iou_thresh: float) -> np.ndarray:
         N = len(boxes)
         if N == 0:
             return np.array([], dtype=np.intp)
-        boxes = np.asarray(boxes, dtype=np.float32)
-        scores = np.asarray(scores, dtype=np.float32)
         order = np.argsort(-scores)
-        keep: list[int] = []
         while len(order):
             i = int(order[0])
             keep.append(i)
-            if len(order) == 1:
                 break
             rest = order[1:]
             xx1 = np.maximum(boxes[i, 0], boxes[rest, 0])
@@ -208,52 +179,13 @@ class Miner:
             xx2 = np.minimum(boxes[i, 2], boxes[rest, 2])
             yy2 = np.minimum(boxes[i, 3], boxes[rest, 3])
             inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
-            area_i = (boxes[i, 2] - boxes[i, 0]) * (boxes[i, 3] - boxes[i, 1])
-            area_r = (boxes[rest, 2] - boxes[rest, 0]) * (boxes[rest, 3] - boxes[rest, 1])
-            iou = inter / (area_i + area_r - inter + 1e-7)
             order = rest[iou <= iou_thresh]
         return np.array(keep, dtype=np.intp)
-    def _soft_nms(
-        self,
-        boxes: np.ndarray,
-        scores: np.ndarray,
-        sigma: float,
-        score_thresh: float = 0.01,
-    ) -> tuple[np.ndarray, np.ndarray]:
-        N = len(boxes)
-        if N == 0:
-            return np.array([], dtype=np.intp), np.array([], dtype=np.float32)
-        boxes = boxes.astype(np.float32, copy=True)
-        scores = scores.astype(np.float32, copy=True)
-        order = np.arange(N)
-        for i in range(N):
-            max_pos = i + int(np.argmax(scores[i:]))
-            boxes[[i, max_pos]] = boxes[[max_pos, i]]
-            scores[[i, max_pos]] = scores[[max_pos, i]]
-            order[[i, max_pos]] = order[[max_pos, i]]
-            if i + 1 >= N:
-                break
-            xx1 = np.maximum(boxes[i, 0], boxes[i + 1:, 0])
-            yy1 = np.maximum(boxes[i, 1], boxes[i + 1:, 1])
-            xx2 = np.minimum(boxes[i, 2], boxes[i + 1:, 2])
-            yy2 = np.minimum(boxes[i, 3], boxes[i + 1:, 3])
-            inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
-            area_i = float(
-                (boxes[i, 2] - boxes[i, 0]) * (boxes[i, 3] - boxes[i, 1])
-            )
-            areas_j = (
-                np.maximum(0.0, boxes[i + 1:, 2] - boxes[i + 1:, 0])
-                * np.maximum(0.0, boxes[i + 1:, 3] - boxes[i + 1:, 1])
-            )
-            iou = inter / (area_i + areas_j - inter + 1e-7)
-            scores[i + 1:] *= np.exp(-(iou ** 2) / sigma)
-        mask = scores > score_thresh
-        return order[mask], scores[mask]
-    # ---------- raw-dets helper ----------
-    def _raw_dets(self, image: ndarray, conf: float) -> np.ndarray:
-        """Run a single forward pass and return [N, 5] dets in ORIGINAL image coords."""
         x, ratio, (dw, dh) = self._preprocess(image)
         out = self.session.run(self.output_names, {self.input_name: x})[0]
         if out.ndim == 3:
@@ -273,32 +205,51 @@ class Miner:
         boxes = self._clip_boxes(boxes, (ow, oh))
         return np.concatenate([boxes, scores[:, None]], axis=1)
-    # ---------- primary pass: soft-NMS + hflip TTA ----------
-    def _primary(self, image: ndarray) -> np.ndarray:
-        d1 = self._raw_dets(image, self.conf_thres)
-        if self.use_tta:
-            flipped = cv2.flip(image, 1)
-            d2 = self._raw_dets(flipped, self.conf_thres)
-            if len(d2):
-                w = image.shape[1]
-                x1 = w - d2[:, 2]
-                x2 = w - d2[:, 0]
-                d2 = np.stack([x1, d2[:, 1], x2, d2[:, 3], d2[:, 4]], axis=1)
-            all_d = np.concatenate([d1, d2], axis=0) if len(d2) else d1
-        else:
-            all_d = d1
-        if len(all_d) == 0:
-            return np.zeros((0, 5), dtype=np.float32)
-        # soft-NMS, then hard-NMS
-        keep_idx, scores = self._soft_nms(all_d[:, :4].copy(), all_d[:, 4].copy(), sigma=self.sigma)
-        if len(keep_idx) == 0:
             return np.zeros((0, 5), dtype=np.float32)
-        merged = np.concatenate([all_d[keep_idx, :4], scores[:, None]], axis=1)
-        keep = self._hard_nms(merged[:, :4], merged[:, 4], self.iou_thres)
-        merged = merged[keep]
-        if len(merged) > self.max_det:
-            merged = merged[np.argsort(-merged[:, 4])[: self.max_det]]
-        return merged
     # ---------- single-image predict ----------
     def _predict_single(self, image: ndarray) -> list[BoundingBox]:
@@ -309,32 +260,26 @@ class Miner:
         if image.dtype != np.uint8:
             image = image.astype(np.uint8)
-        dets = self._primary(image)
         results: list[BoundingBox] = []
         for row in dets:
             x1, y1, x2, y2, conf = row.tolist()
             if x2 <= x1 or y2 <= y1:
                 continue
-            results.append(
-                BoundingBox(
-                    x1=int(math.floor(x1)),
-                    y1=int(math.floor(y1)),
-                    x2=int(math.ceil(x2)),
-                    y2=int(math.ceil(y2)),
-                    cls_id=0,
-                    conf=float(conf),
-                )
-            )
         return results
     # ---------- chute entrypoint ----------
-    def predict_batch(
-        self,
-        batch_images: list[ndarray],
-        offset: int,
-        n_keypoints: int,
-    ) -> list[TVFrameResult]:
         results: list[TVFrameResult] = []
         for frame_number_in_batch, image in enumerate(batch_images):
             try:
@@ -342,11 +287,9 @@ class Miner:
             except Exception as e:
                 print(f"Inference failed for frame {offset + frame_number_in_batch}: {e}")
                 boxes = []
-            results.append(
-                TVFrameResult(
-                    frame_id=offset + frame_number_in_batch,
-                    boxes=boxes,
-                    keypoints=[(0, 0) for _ in range(max(0, int(n_keypoints)))],
-                )
-            )
         return results

+"""Plate-detection miner — plate_v6 + consensus-TTA inference.
+Weights: plate_v6 (resumed plate_v5 + difficulty-weighted scraped real challenges
++ synth CCTV; 18 epochs, peak val mAP50 0.930).
+Inference (smile0123-style consensus-TTA, our bench winner at gated 0.443):
+  - low conf (0.15) for high recall, super-high-conf (>=0.90) passes directly
+  - hflip cross-view consensus: low-conf boxes must match a flipped-view box at IoU>=0.01
+  - final hard-NMS at iou=0.32, max_det=150
+Bench on 221-shard live pseudo-GT pool: gated 0.443  mAP 0.975  fp/img 0.25  ms_p95 ~150 (A4000)
+On pro_6000 + TEE expect ~2-3s p95 including network/attest overhead.
 """
 from pathlib import Path
 import math
         if cn_path.is_file():
             lines = cn_path.read_text(encoding="utf-8").splitlines()
             self.class_names = [
+                ln.strip() for ln in lines
                 if ln.strip() and not ln.strip().startswith("#")
             ]
         else:
         except Exception as e:
             print(f"preload_dlls failed: {e}")
         try:
             import torch
             if torch.cuda.is_available():
                 print(f"GPU: {torch.cuda.get_device_name(0)}")
                 print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
         except Exception as e:
             print(f"GPU detection failed: {e}")
         try:
             self.session = ort.InferenceSession(
+                str(model_path), sess_options=sess_options,
                 providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
             )
+            print("Created ORT session with CUDA provider")
         except Exception as e:
             print(f"CUDA session creation failed, falling back to CPU: {e}")
             self.session = ort.InferenceSession(
+                str(model_path), sess_options=sess_options,
                 providers=["CPUExecutionProvider"],
             )
         for inp in self.session.get_inputs():
             print("INPUT:", inp.name, inp.shape, inp.type)
         for out in self.session.get_outputs():
         self.input_name = self.session.get_inputs()[0].name
         self.output_names = [o.name for o in self.session.get_outputs()]
         self.input_shape = self.session.get_inputs()[0].shape
         self.input_dtype = (
+            np.float16 if "float16" in self.session.get_inputs()[0].type
             else np.float32
         )
         self.input_height = self._safe_dim(self.input_shape[2], default=SIZE)
         self.input_width = self._safe_dim(self.input_shape[3], default=SIZE)
+        # Consensus-TTA preset (bench winner — gated 0.443)
+        self.conf_thres = 0.15        # low — collect MANY candidates
+        self.conf_high = 0.90         # >= this → pass through without TTA match
+        self.tta_match_iou = 0.01     # very permissive cross-view match
+        self.iou_thres = 0.32         # final hard-NMS
+        self.max_det = 150
+        print(f"Preset: conf={self.conf_thres} conf_high={self.conf_high} "
+              f"tta_match_iou={self.tta_match_iou} iou={self.iou_thres} max_det={self.max_det}")
         print(f"ONNX model loaded from: {model_path}")
     def __repr__(self) -> str:
+        return f"ONNXRuntime(providers={self.session.get_providers()})"
     @staticmethod
     def _safe_dim(value, default: int) -> int:
         return value if isinstance(value, int) and value > 0 else default
+    # ---------- preprocessing ----------
+    def _letterbox(self, image, new_shape, color=(114, 114, 114)):
         h, w = image.shape[:2]
         new_w, new_h = new_shape
         ratio = min(new_w / w, new_h / h)
+        rw, rh = int(round(w * ratio)), int(round(h * ratio))
+        if (rw, rh) != (w, h):
             interp = cv2.INTER_CUBIC if ratio > 1.0 else cv2.INTER_LINEAR
+            image = cv2.resize(image, (rw, rh), interpolation=interp)
+        dw, dh = (new_w - rw) / 2.0, (new_h - rh) / 2.0
+        left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+        top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+        padded = cv2.copyMakeBorder(image, top, bottom, left, right,
+                                    borderType=cv2.BORDER_CONSTANT, value=color)
         return padded, ratio, (dw, dh)
+    def _preprocess(self, image):
         img, ratio, pad = self._letterbox(image, (self.input_width, self.input_height))
         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
         img = np.transpose(img, (2, 0, 1))[None, ...]
         return np.ascontiguousarray(img, dtype=self.input_dtype), ratio, pad
     @staticmethod
+    def _clip_boxes(boxes, image_size):
         w, h = image_size
         boxes[:, 0] = np.clip(boxes[:, 0], 0, w - 1)
         boxes[:, 1] = np.clip(boxes[:, 1], 0, h - 1)
         boxes[:, 3] = np.clip(boxes[:, 3], 0, h - 1)
         return boxes
+    # ---------- detection helpers ----------
+    @staticmethod
+    def _iou_one_to_many(box, boxes):
+        if len(boxes) == 0:
+            return np.zeros(0, dtype=np.float32)
+        xx1 = np.maximum(box[0], boxes[:, 0])
+        yy1 = np.maximum(box[1], boxes[:, 1])
+        xx2 = np.minimum(box[2], boxes[:, 2])
+        yy2 = np.minimum(box[3], boxes[:, 3])
+        inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
+        ai = max(0.0, (box[2] - box[0]) * (box[3] - box[1]))
+        aj = (np.maximum(0.0, boxes[:, 2] - boxes[:, 0])
+              * np.maximum(0.0, boxes[:, 3] - boxes[:, 1]))
+        return inter / (ai + aj - inter + 1e-7)
     @staticmethod
+    def _hard_nms(boxes, scores, iou_thresh, max_det):
         N = len(boxes)
         if N == 0:
             return np.array([], dtype=np.intp)
         order = np.argsort(-scores)
+        keep = []
         while len(order):
             i = int(order[0])
             keep.append(i)
+            if len(order) == 1 or len(keep) >= max_det:
                 break
             rest = order[1:]
             xx1 = np.maximum(boxes[i, 0], boxes[rest, 0])
             xx2 = np.minimum(boxes[i, 2], boxes[rest, 2])
             yy2 = np.minimum(boxes[i, 3], boxes[rest, 3])
             inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
+            ai = (boxes[i, 2] - boxes[i, 0]) * (boxes[i, 3] - boxes[i, 1])
+            ar = (boxes[rest, 2] - boxes[rest, 0]) * (boxes[rest, 3] - boxes[rest, 1])
+            iou = inter / (ai + ar - inter + 1e-7)
             order = rest[iou <= iou_thresh]
         return np.array(keep, dtype=np.intp)
+    def _raw_dets(self, image, conf):
         x, ratio, (dw, dh) = self._preprocess(image)
         out = self.session.run(self.output_names, {self.input_name: x})[0]
         if out.ndim == 3:
         boxes = self._clip_boxes(boxes, (ow, oh))
         return np.concatenate([boxes, scores[:, None]], axis=1)
+    # ---------- consensus-TTA prediction ----------
+    def _predict_consensus_tta(self, image):
+        d_o = self._raw_dets(image, self.conf_thres)
+        flipped = cv2.flip(image, 1)
+        d_f = self._raw_dets(flipped, self.conf_thres)
+        if len(d_f):
+            w = image.shape[1]
+            d_f = np.stack([w - d_f[:, 2], d_f[:, 1], w - d_f[:, 0],
+                            d_f[:, 3], d_f[:, 4]], axis=1)
+        accepted_boxes = []
+        accepted_scores = []
+        # Original-view candidates
+        for i in range(len(d_o)):
+            s = float(d_o[i, 4])
+            if s >= self.conf_high:
+                accepted_boxes.append(d_o[i, :4])
+                accepted_scores.append(s)
+            elif len(d_f) > 0:
+                ious = self._iou_one_to_many(d_o[i, :4], d_f[:, :4])
+                j = int(np.argmax(ious))
+                if ious[j] >= self.tta_match_iou:
+                    fused = max(s, float(d_f[j, 4]))
+                    accepted_boxes.append(d_o[i, :4])
+                    accepted_scores.append(fused)
+        # Flip-view high-conf boxes that original missed
+        for i in range(len(d_f)):
+            s = float(d_f[i, 4])
+            if s < self.conf_high:
+                continue
+            if len(d_o) == 0:
+                accepted_boxes.append(d_f[i, :4])
+                accepted_scores.append(s)
+                continue
+            ious = self._iou_one_to_many(d_f[i, :4], d_o[:, :4])
+            if np.max(ious) < self.tta_match_iou:
+                accepted_boxes.append(d_f[i, :4])
+                accepted_scores.append(s)
+        if not accepted_boxes:
             return np.zeros((0, 5), dtype=np.float32)
+        boxes = np.array(accepted_boxes, dtype=np.float32)
+        scores = np.array(accepted_scores, dtype=np.float32)
+        keep = self._hard_nms(boxes, scores, self.iou_thres, self.max_det)
+        return np.concatenate([boxes[keep], scores[keep][:, None]], axis=1)
     # ---------- single-image predict ----------
     def _predict_single(self, image: ndarray) -> list[BoundingBox]:
         if image.dtype != np.uint8:
             image = image.astype(np.uint8)
+        dets = self._predict_consensus_tta(image)
         results: list[BoundingBox] = []
         for row in dets:
             x1, y1, x2, y2, conf = row.tolist()
             if x2 <= x1 or y2 <= y1:
                 continue
+            results.append(BoundingBox(
+                x1=int(math.floor(x1)),
+                y1=int(math.floor(y1)),
+                x2=int(math.ceil(x2)),
+                y2=int(math.ceil(y2)),
+                cls_id=0,
+                conf=float(conf),
+            ))
         return results
     # ---------- chute entrypoint ----------
+    def predict_batch(self, batch_images: list[ndarray], offset: int,
+                      n_keypoints: int) -> list[TVFrameResult]:
         results: list[TVFrameResult] = []
         for frame_number_in_batch, image in enumerate(batch_images):
             try:
             except Exception as e:
                 print(f"Inference failed for frame {offset + frame_number_in_batch}: {e}")
                 boxes = []
+            results.append(TVFrameResult(
+                frame_id=offset + frame_number_in_batch,
+                boxes=boxes,
+                keypoints=[(0, 0) for _ in range(max(0, int(n_keypoints)))],
+            ))
         return results