scorevision: push artifact

Browse files

Files changed (7) hide show

README.md +8 -40
__pycache__/miner.cpython-312.pyc +0 -0
class_names.txt +4 -0
miner.py +296 -127
model_type.json +1 -1
person_weights.onnx +3 -0
vehicle_weights.onnx +3 -0

README.md CHANGED Viewed

@@ -1,42 +1,10 @@
----
-tags:
-- element_type:detect
-- model:yolov11-nano
-- object:person
-manako:
-  description: >
-    YOLOv11-nano fine-tuned for ground-level CCTV person detection on SN44.
-    Trained on CrowdHuman (15k, dense crowds) + BDD100K street pedestrians.
-    Conf threshold raised to 0.35 to minimise false positives.
-  source: meaculpitt/Detect-Person
-  prompt_hints: null
-  input_payload:
-  - name: frame
-    type: image
-    description: RGB frame (ground-level CCTV)
-  output_payload:
-  - name: detections
-    type: detections
-    description: Bounding boxes for detected persons
-  evaluation_score: 0.5563
-  last_benchmark:
-    type: coco_val2017
-    ran_at: '2026-03-25T02:58:57+00:00'
-    result_path: null
----
-# Detect-Person — SN44
-YOLOv11-nano fine-tuned for ground-level CCTV person detection.
-| Metric | Value |
-|--------|-------|
-| mAP@50 (COCO val2017) | 55.63% |
-| Precision (conf=0.35) | 56.86% |
-| Recall | 50.67% |
-| Baseline to beat | 37.55% |
-| Model size | 5.6 MB |
-| Input size | 1280×1280 |
-**Training data**: CrowdHuman (15k) + BDD100K (3.2k pedestrians)
-**Validation**: COCO val2017 persons (2,693 images)

+# ScoreVision SN44 Unified Miner
+Dual-model approach: vehicle (YOLO11s) + person (YOLO11s).
+Runs both models on every image and merges all detections.
+## Classes
+- cls_id 0: bus (vehicle eval) / person (person eval)
+- cls_id 1: car
+- cls_id 2: truck
+- cls_id 3: motorcycle

__pycache__/miner.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/miner.cpython-312.pyc and b/__pycache__/miner.cpython-312.pyc differ

class_names.txt CHANGED Viewed

	@@ -1 +1,5 @@




1	person

+bus
+car
+truck
+motorcycle
 person

miner.py CHANGED Viewed

@@ -1,7 +1,17 @@
 """
-Score Vision SN44 — DetectPerson miner v6 (2026-03-27).
-TTA (2-pass: original + hflip) + inline WBF. Stretch resize preprocessing.
-Single class: person (cls_id=0).
 """
 from pathlib import Path
@@ -13,75 +23,139 @@ import onnxruntime as ort
 from numpy import ndarray
 from pydantic import BaseModel
-CONF_THRESH = 0.35
-TTA_CONF_THRESH = 0.25
-IOU_THRESH = 0.45
-WBF_IOU_THR = 0.45
 WBF_SKIP_THR = 0.0001
-def _wbf(boxes_list: list[np.ndarray], scores_list: list[np.ndarray],
-         iou_thr: float = 0.45, skip_box_thr: float = 0.0001
-         ) -> tuple[np.ndarray, np.ndarray]:
-    """Weighted Boxes Fusion for single-class detection. Boxes in [0,1] normalized coords."""
     if not boxes_list:
         return np.empty((0, 4)), np.empty(0)
-    all_boxes, all_scores = [], []
     for bx, sc in zip(boxes_list, scores_list):
         for i in range(len(bx)):
-            if sc[i] < skip_box_thr:
                 continue
-            all_boxes.append(bx[i])
-            all_scores.append(sc[i])
-    if not all_boxes:
         return np.empty((0, 4)), np.empty(0)
-    all_boxes = np.array(all_boxes)
-    all_scores = np.array(all_scores)
-    order = all_scores.argsort()[::-1]
-    all_boxes = all_boxes[order]
-    all_scores = all_scores[order]
-    clusters: list[list[int]] = []
-    cluster_boxes: list[np.ndarray] = []
-    for i in range(len(all_boxes)):
-        matched = -1
-        best_iou = iou_thr
-        for c_idx, c_box in enumerate(cluster_boxes):
-            xx1 = max(all_boxes[i, 0], c_box[0])
-            yy1 = max(all_boxes[i, 1], c_box[1])
-            xx2 = min(all_boxes[i, 2], c_box[2])
-            yy2 = min(all_boxes[i, 3], c_box[3])
             inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
-            a1 = (all_boxes[i, 2] - all_boxes[i, 0]) * (all_boxes[i, 3] - all_boxes[i, 1])
-            a2 = (c_box[2] - c_box[0]) * (c_box[3] - c_box[1])
             iou = inter / (a1 + a2 - inter + 1e-9)
             if iou > best_iou:
                 best_iou = iou
-                matched = c_idx
         if matched >= 0:
             clusters[matched].append(i)
             idxs = clusters[matched]
-            weights = all_scores[idxs]
-            w_sum = weights.sum()
-            cluster_boxes[matched] = (all_boxes[idxs] * weights[:, None]).sum(0) / w_sum
         else:
             clusters.append([i])
-            cluster_boxes.append(all_boxes[i].copy())
-    fused_boxes, fused_scores = [], []
-    for c_idx, idxs in enumerate(clusters):
-        weights = all_scores[idxs]
-        fused_boxes.append(cluster_boxes[c_idx])
-        fused_scores.append(weights.mean())
-    if not fused_boxes:
         return np.empty((0, 4)), np.empty(0)
-    return np.array(fused_boxes), np.array(fused_scores)
 class BoundingBox(BaseModel):
@@ -102,127 +176,222 @@ class TVFrameResult(BaseModel):
 class Miner:
     def __init__(self, path_hf_repo: Path) -> None:
         self.path_hf_repo = path_hf_repo
-        self.class_names = ['person']
-        self.session = ort.InferenceSession(
-            str(path_hf_repo / "weights.onnx"),
             providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
         )
-        self.input_name = self.session.get_inputs()[0].name
-        input_shape = self.session.get_inputs()[0].shape
-        self.input_h = int(input_shape[2])
-        self.input_w = int(input_shape[3])
-        self.conf_threshold = CONF_THRESH
-        self.tta_conf_threshold = TTA_CONF_THRESH
-        self.iou_threshold = IOU_THRESH
     def __repr__(self) -> str:
-        return f"DetectPerson Miner v6 2-pass TTA + WBF iou={WBF_IOU_THR}"
-    def _preprocess(self, image_bgr: ndarray) -> tuple[np.ndarray, tuple[int, int]]:
-        h, w = image_bgr.shape[:2]
         rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
-        resized = cv2.resize(rgb, (self.input_w, self.input_h))
         x = resized.astype(np.float32) / 255.0
         x = np.transpose(x, (2, 0, 1))[None, ...]
-        return x, (h, w)
-    def _decode_raw(self, raw: np.ndarray, orig_h: int, orig_w: int,
-                    conf_thresh: float | None = None) -> tuple[np.ndarray, np.ndarray]:
         pred = raw[0]
         if pred.ndim != 2:
             return np.empty((0, 4)), np.empty(0)
         if pred.shape[0] < pred.shape[1]:
-            pred = pred.transpose(1, 0)
         if pred.shape[1] < 5:
             return np.empty((0, 4)), np.empty(0)
-        boxes = pred[:, :4]
         cls_scores = pred[:, 4:]
-        if cls_scores.shape[1] == 0:
-            return np.empty((0, 4)), np.empty(0)
         confs = np.max(cls_scores, axis=1)
-        thresh = conf_thresh if conf_thresh is not None else self.conf_threshold
-        keep = confs >= thresh
-        boxes, confs = boxes[keep], confs[keep]
-        if boxes.shape[0] == 0:
             return np.empty((0, 4)), np.empty(0)
-        sx = orig_w / float(self.input_w)
-        sy = orig_h / float(self.input_h)
         cx, cy, bw, bh = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
-        x1 = np.clip((cx - bw / 2) * sx, 0, orig_w)
-        y1 = np.clip((cy - bh / 2) * sy, 0, orig_h)
-        x2 = np.clip((cx + bw / 2) * sx, 0, orig_w)
-        y2 = np.clip((cy + bh / 2) * sy, 0, orig_h)
         return np.stack([x1, y1, x2, y2], axis=1), confs
-    def _run_single_pass(self, image_bgr: ndarray, conf_thresh: float | None = None
-                         ) -> tuple[np.ndarray, np.ndarray]:
-        orig_h, orig_w = image_bgr.shape[:2]
-        inp, _ = self._preprocess(image_bgr)
-        raw = self.session.run(None, {self.input_name: inp})[0]
-        return self._decode_raw(raw, orig_h, orig_w, conf_thresh)
-    def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
-        orig_h, orig_w = image_bgr.shape[:2]
-        all_boxes, all_scores = [], []
         def _collect(boxes, confs):
             if len(boxes) == 0:
                 return
             norm = boxes.copy()
-            norm[:, [0, 2]] /= orig_w
-            norm[:, [1, 3]] /= orig_h
             norm = np.clip(norm, 0, 1)
-            all_boxes.append(norm)
-            all_scores.append(confs)
-        # Pass 1: original (low threshold for TTA)
-        _collect(*self._run_single_pass(image_bgr, self.tta_conf_threshold))
-        # Pass 2: horizontal flip
         flipped = cv2.flip(image_bgr, 1)
-        boxes_f, confs_f = self._run_single_pass(flipped, self.tta_conf_threshold)
-        if len(boxes_f):
-            boxes_f[:, 0], boxes_f[:, 2] = orig_w - boxes_f[:, 2], orig_w - boxes_f[:, 0]
-        _collect(boxes_f, confs_f)
-        # (1.2x crop pass REMOVED — adds more FPs than TPs)
-        if not all_boxes:
             return []
-        fused_boxes, fused_scores = _wbf(
-            all_boxes, all_scores,
-            iou_thr=WBF_IOU_THR, skip_box_thr=WBF_SKIP_THR,
-        )
-        if len(fused_boxes) == 0:
             return []
-        # Denormalize
-        fused_boxes[:, [0, 2]] *= orig_w
-        fused_boxes[:, [1, 3]] *= orig_h
-        # Apply final conf threshold after WBF
-        keep = fused_scores >= self.conf_threshold
-        fused_boxes = fused_boxes[keep]
-        fused_scores = fused_scores[keep]
-        out: list[BoundingBox] = []
-        for i in range(len(fused_boxes)):
-            b = fused_boxes[i]
             out.append(BoundingBox(
-                x1=max(0, min(orig_w, math.floor(b[0]))),
-                y1=max(0, min(orig_h, math.floor(b[1]))),
-                x2=max(0, min(orig_w, math.ceil(b[2]))),
-                y2=max(0, min(orig_h, math.ceil(b[3]))),
                 cls_id=0,
-                conf=max(0.0, min(1.0, float(fused_scores[i]))),
             ))
         return out
     def predict_batch(
         self,
         batch_images: list[ndarray],

 """
+Score Vision SN44 — Unified miner v1 (2026-03-27).
+Dual-model: vehicle (YOLO11s) + person (YOLO11s).
+Vehicle model (vehicle_weights.onnx):
+  Trained classes: 0=car, 1=bus, 2=truck, 3=motorcycle
+  Remapped to manifest: 0=bus, 1=car, 2=truck, 3=motorcycle
+Person model (person_weights.onnx):
+  Single class: 0=person
+Both models run on every image. All detections merged.
+cls_id 0 is shared: "bus" for vehicle eval, "person" for person eval.
+Vehicle eval uses cls_id 0-3. Person eval uses cls_id 0 only.
 """
 from pathlib import Path
 from numpy import ndarray
 from pydantic import BaseModel
+# ── Vehicle config ──────────────────────────────────────────────────────────
+VEH_MODEL_TO_OUT: dict[int, int] = {0: 1, 1: 0, 2: 2, 3: 3}
+VEH_NUM_CLASSES = 4
+VEH_IMG_SIZE = 1280
+VEH_CONF_PER_CLASS = {0: 0.33, 1: 0.50, 2: 0.40, 3: 0.36}
+VEH_CONF_DEFAULT = 0.35
+VEH_TTA_CONF = 0.25
+VEH_WBF_IOU = 0.55
+# ── Person config ───────────────────────────────────────────────────────────
+PER_CONF = 0.35
+PER_TTA_CONF = 0.25
+PER_WBF_IOU = 0.45
+# ── Shared ──────────────────────────────────────────────────────────────────
 WBF_SKIP_THR = 0.0001
+def _wbf_multi(boxes_list, scores_list, labels_list, iou_thr=0.55, skip_thr=0.0001):
+    """Weighted Boxes Fusion (multi-class). Boxes in [0,1] normalized coords."""
+    if not boxes_list:
+        return np.empty((0, 4)), np.empty(0), np.empty(0)
+    all_b, all_s, all_l = [], [], []
+    for bx, sc, lb in zip(boxes_list, scores_list, labels_list):
+        for i in range(len(bx)):
+            if sc[i] < skip_thr:
+                continue
+            all_b.append(bx[i])
+            all_s.append(sc[i])
+            all_l.append(int(lb[i]))
+    if not all_b:
+        return np.empty((0, 4)), np.empty(0), np.empty(0)
+    all_b = np.array(all_b)
+    all_s = np.array(all_s)
+    all_l = np.array(all_l, dtype=int)
+    fused_b, fused_s, fused_l = [], [], []
+    for cls in np.unique(all_l):
+        m = all_l == cls
+        cb, cs = all_b[m], all_s[m]
+        order = cs.argsort()[::-1]
+        cb, cs = cb[order], cs[order]
+        clusters, cboxes = [], []
+        for i in range(len(cb)):
+            matched, best_iou = -1, iou_thr
+            for ci, cbox in enumerate(cboxes):
+                xx1 = max(cb[i, 0], cbox[0])
+                yy1 = max(cb[i, 1], cbox[1])
+                xx2 = min(cb[i, 2], cbox[2])
+                yy2 = min(cb[i, 3], cbox[3])
+                inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
+                a1 = (cb[i, 2] - cb[i, 0]) * (cb[i, 3] - cb[i, 1])
+                a2 = (cbox[2] - cbox[0]) * (cbox[3] - cbox[1])
+                iou = inter / (a1 + a2 - inter + 1e-9)
+                if iou > best_iou:
+                    best_iou = iou
+                    matched = ci
+            if matched >= 0:
+                clusters[matched].append(i)
+                idxs = clusters[matched]
+                w = cs[idxs]
+                cboxes[matched] = (cb[idxs] * w[:, None]).sum(0) / w.sum()
+            else:
+                clusters.append([i])
+                cboxes.append(cb[i].copy())
+        for ci, idxs in enumerate(clusters):
+            fused_b.append(cboxes[ci])
+            fused_s.append(cs[idxs].mean())
+            fused_l.append(cls)
+    if not fused_b:
+        return np.empty((0, 4)), np.empty(0), np.empty(0)
+    return np.array(fused_b), np.array(fused_s), np.array(fused_l)
+def _wbf_single(boxes_list, scores_list, iou_thr=0.45, skip_thr=0.0001):
+    """Weighted Boxes Fusion (single-class). Boxes in [0,1] normalized coords."""
     if not boxes_list:
         return np.empty((0, 4)), np.empty(0)
+    all_b, all_s = [], []
     for bx, sc in zip(boxes_list, scores_list):
         for i in range(len(bx)):
+            if sc[i] < skip_thr:
                 continue
+            all_b.append(bx[i])
+            all_s.append(sc[i])
+    if not all_b:
         return np.empty((0, 4)), np.empty(0)
+    all_b = np.array(all_b)
+    all_s = np.array(all_s)
+    order = all_s.argsort()[::-1]
+    all_b, all_s = all_b[order], all_s[order]
+    clusters, cboxes = [], []
+    for i in range(len(all_b)):
+        matched, best_iou = -1, iou_thr
+        for ci, cbox in enumerate(cboxes):
+            xx1 = max(all_b[i, 0], cbox[0])
+            yy1 = max(all_b[i, 1], cbox[1])
+            xx2 = min(all_b[i, 2], cbox[2])
+            yy2 = min(all_b[i, 3], cbox[3])
             inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
+            a1 = (all_b[i, 2] - all_b[i, 0]) * (all_b[i, 3] - all_b[i, 1])
+            a2 = (cbox[2] - cbox[0]) * (cbox[3] - cbox[1])
             iou = inter / (a1 + a2 - inter + 1e-9)
             if iou > best_iou:
                 best_iou = iou
+                matched = ci
         if matched >= 0:
             clusters[matched].append(i)
             idxs = clusters[matched]
+            w = all_s[idxs]
+            cboxes[matched] = (all_b[idxs] * w[:, None]).sum(0) / w.sum()
         else:
             clusters.append([i])
+            cboxes.append(all_b[i].copy())
+    fused_b, fused_s = [], []
+    for ci, idxs in enumerate(clusters):
+        fused_b.append(cboxes[ci])
+        fused_s.append(all_s[idxs].mean())
+    if not fused_b:
         return np.empty((0, 4)), np.empty(0)
+    return np.array(fused_b), np.array(fused_s)
 class BoundingBox(BaseModel):
 class Miner:
     def __init__(self, path_hf_repo: Path) -> None:
         self.path_hf_repo = path_hf_repo
+        # Vehicle model (YOLO11s, 4 classes)
+        self.veh_session = ort.InferenceSession(
+            str(path_hf_repo / "vehicle_weights.onnx"),
+            providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
+        )
+        self.veh_input_name = self.veh_session.get_inputs()[0].name
+        # Person model (YOLO11s, 1 class)
+        self.per_session = ort.InferenceSession(
+            str(path_hf_repo / "person_weights.onnx"),
             providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
         )
+        self.per_input_name = self.per_session.get_inputs()[0].name
+        per_shape = self.per_session.get_inputs()[0].shape
+        self.per_h = int(per_shape[2])
+        self.per_w = int(per_shape[3])
     def __repr__(self) -> str:
+        return "Unified Miner v1 — dual-model vehicle+person"
+    # ── Vehicle preprocessing (letterbox) ───────────────────────────────────
+    def _veh_letterbox(self, img):
+        h, w = img.shape[:2]
+        r = min(VEH_IMG_SIZE / h, VEH_IMG_SIZE / w)
+        nw, nh = int(round(w * r)), int(round(h * r))
+        img_r = cv2.resize(img, (nw, nh), interpolation=cv2.INTER_LINEAR)
+        dw, dh = VEH_IMG_SIZE - nw, VEH_IMG_SIZE - nh
+        pl, pt = dw // 2, dh // 2
+        img_p = cv2.copyMakeBorder(
+            img_r, pt, dh - pt, pl, dw - pl,
+            cv2.BORDER_CONSTANT, value=(114, 114, 114),
+        )
+        return img_p, r, pl, pt
+    def _veh_preprocess(self, image_bgr):
+        img_p, ratio, pl, pt = self._veh_letterbox(image_bgr)
+        rgb = cv2.cvtColor(img_p, cv2.COLOR_BGR2RGB)
+        inp = rgb.astype(np.float32) / 255.0
+        inp = np.ascontiguousarray(inp.transpose(2, 0, 1)[np.newaxis])
+        return inp, ratio, pl, pt
+    def _veh_decode(self, raw, ratio, pl, pt, ow, oh, conf_thresh):
+        pred = raw[0]
+        if pred.shape[0] < pred.shape[1]:
+            pred = pred.T
+        cls_scores = pred[:, 4:]
+        cls_ids = np.argmax(cls_scores, axis=1)
+        confs = np.max(cls_scores, axis=1)
+        mask = confs >= conf_thresh
+        if not mask.any():
+            return np.empty((0, 4)), np.empty(0), np.empty(0, dtype=int)
+        bx, confs, cls_ids = pred[mask, :4], confs[mask], cls_ids[mask]
+        cx, cy, bw, bh = bx[:, 0], bx[:, 1], bx[:, 2], bx[:, 3]
+        x1 = np.clip((cx - bw / 2 - pl) / ratio, 0, ow)
+        y1 = np.clip((cy - bh / 2 - pt) / ratio, 0, oh)
+        x2 = np.clip((cx + bw / 2 - pl) / ratio, 0, ow)
+        y2 = np.clip((cy + bh / 2 - pt) / ratio, 0, oh)
+        return np.stack([x1, y1, x2, y2], axis=1), confs, cls_ids
+    def _veh_run_pass(self, image_bgr, conf_thresh):
+        oh, ow = image_bgr.shape[:2]
+        inp, ratio, pl, pt = self._veh_preprocess(image_bgr)
+        raw = self.veh_session.run(None, {self.veh_input_name: inp})[0]
+        return self._veh_decode(raw, ratio, pl, pt, ow, oh, conf_thresh)
+    def _infer_vehicle(self, image_bgr):
+        oh, ow = image_bgr.shape[:2]
+        all_b, all_s, all_l = [], [], []
+        def _collect(boxes, confs, cls_ids):
+            if len(boxes) == 0:
+                return
+            out_cls = np.array([VEH_MODEL_TO_OUT[int(c)] for c in cls_ids])
+            norm = boxes.copy()
+            norm[:, [0, 2]] /= ow
+            norm[:, [1, 3]] /= oh
+            norm = np.clip(norm, 0, 1)
+            all_b.append(norm)
+            all_s.append(confs)
+            all_l.append(out_cls)
+        # Pass 1: original
+        _collect(*self._veh_run_pass(image_bgr, VEH_TTA_CONF))
+        # Pass 2: hflip
+        flipped = cv2.flip(image_bgr, 1)
+        bx, sc, cl = self._veh_run_pass(flipped, VEH_TTA_CONF)
+        if len(bx):
+            bx[:, 0], bx[:, 2] = ow - bx[:, 2], ow - bx[:, 0]
+        _collect(bx, sc, cl)
+        if not all_b:
+            return []
+        fb, fs, fl = _wbf_multi(all_b, all_s, all_l, iou_thr=VEH_WBF_IOU, skip_thr=WBF_SKIP_THR)
+        if len(fb) == 0:
+            return []
+        fb[:, [0, 2]] *= ow
+        fb[:, [1, 3]] *= oh
+        keep = np.array([
+            fs[i] >= VEH_CONF_PER_CLASS.get(int(fl[i]), VEH_CONF_DEFAULT)
+            for i in range(len(fs))
+        ])
+        if not keep.any():
+            return []
+        fb, fs, fl = fb[keep], fs[keep], fl[keep]
+        out = []
+        for i in range(len(fb)):
+            b = fb[i]
+            out.append(BoundingBox(
+                x1=max(0, min(ow, math.floor(b[0]))),
+                y1=max(0, min(oh, math.floor(b[1]))),
+                x2=max(0, min(ow, math.ceil(b[2]))),
+                y2=max(0, min(oh, math.ceil(b[3]))),
+                cls_id=int(fl[i]),
+                conf=max(0.0, min(1.0, float(fs[i]))),
+            ))
+        return out
+    # ── Person preprocessing (stretch resize) ──────────────────────────────
+    def _per_preprocess(self, image_bgr):
         rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
+        resized = cv2.resize(rgb, (self.per_w, self.per_h))
         x = resized.astype(np.float32) / 255.0
         x = np.transpose(x, (2, 0, 1))[None, ...]
+        return x
+    def _per_decode(self, raw, oh, ow, conf_thresh):
         pred = raw[0]
         if pred.ndim != 2:
             return np.empty((0, 4)), np.empty(0)
         if pred.shape[0] < pred.shape[1]:
+            pred = pred.T
         if pred.shape[1] < 5:
             return np.empty((0, 4)), np.empty(0)
         cls_scores = pred[:, 4:]
         confs = np.max(cls_scores, axis=1)
+        keep = confs >= conf_thresh
+        boxes, confs = pred[keep, :4], confs[keep]
+        if len(boxes) == 0:
             return np.empty((0, 4)), np.empty(0)
+        sx, sy = ow / float(self.per_w), oh / float(self.per_h)
         cx, cy, bw, bh = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
+        x1 = np.clip((cx - bw / 2) * sx, 0, ow)
+        y1 = np.clip((cy - bh / 2) * sy, 0, oh)
+        x2 = np.clip((cx + bw / 2) * sx, 0, ow)
+        y2 = np.clip((cy + bh / 2) * sy, 0, oh)
         return np.stack([x1, y1, x2, y2], axis=1), confs
+    def _per_run_pass(self, image_bgr, conf_thresh):
+        oh, ow = image_bgr.shape[:2]
+        inp = self._per_preprocess(image_bgr)
+        raw = self.per_session.run(None, {self.per_input_name: inp})[0]
+        return self._per_decode(raw, oh, ow, conf_thresh)
+    def _infer_person(self, image_bgr):
+        oh, ow = image_bgr.shape[:2]
+        all_b, all_s = [], []
         def _collect(boxes, confs):
             if len(boxes) == 0:
                 return
             norm = boxes.copy()
+            norm[:, [0, 2]] /= ow
+            norm[:, [1, 3]] /= oh
             norm = np.clip(norm, 0, 1)
+            all_b.append(norm)
+            all_s.append(confs)
+        # Pass 1: original
+        _collect(*self._per_run_pass(image_bgr, PER_TTA_CONF))
+        # Pass 2: hflip
         flipped = cv2.flip(image_bgr, 1)
+        bx, sc = self._per_run_pass(flipped, PER_TTA_CONF)
+        if len(bx):
+            bx[:, 0], bx[:, 2] = ow - bx[:, 2], ow - bx[:, 0]
+        _collect(bx, sc)
+        if not all_b:
             return []
+        fb, fs = _wbf_single(all_b, all_s, iou_thr=PER_WBF_IOU, skip_thr=WBF_SKIP_THR)
+        if len(fb) == 0:
             return []
+        fb[:, [0, 2]] *= ow
+        fb[:, [1, 3]] *= oh
+        keep = fs >= PER_CONF
+        fb, fs = fb[keep], fs[keep]
+        out = []
+        for i in range(len(fb)):
+            b = fb[i]
             out.append(BoundingBox(
+                x1=max(0, min(ow, math.floor(b[0]))),
+                y1=max(0, min(oh, math.floor(b[1]))),
+                x2=max(0, min(ow, math.ceil(b[2]))),
+                y2=max(0, min(oh, math.ceil(b[3]))),
                 cls_id=0,
+                conf=max(0.0, min(1.0, float(fs[i]))),
             ))
         return out
+    # ── Unified inference ───────────────────────────────────────────────────
+    def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
+        vehicle_boxes = self._infer_vehicle(image_bgr)
+        person_boxes = self._infer_person(image_bgr)
+        return vehicle_boxes + person_boxes
     def predict_batch(
         self,
         batch_images: list[ndarray],

model_type.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"task_type": "object-detection", "model_type": "yolov11-~~nano~~", "deploy": "2026-03-~~26T07~~:~~46Z~~"}


1	+ {"task_type": "object-detection", "model_type": "yolov11-small-dual", "deploy": "2026-03-27T09:00Z"}

person_weights.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f32ed65b9024a69693f675d494c7fc813a964766c54b241464a463377342da60
+size 5607862

vehicle_weights.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3916408ec21f8c94358c18914f922814770b78557e52fe17ff7a9ee74339a5a
+size 19272252