meaculpitt
/

ScoreVision

@@ -1,408 +0,0 @@
-"""
-Score Vision SN44 — Unified miner v1 (2026-03-27).
-Dual-model: vehicle (YOLO11s) + person (YOLO11s).
-Vehicle model (vehicle_weights.onnx):
-  Trained classes: 0=car, 1=bus, 2=truck, 3=motorcycle
-  Remapped to manifest: 0=bus, 1=car, 2=truck, 3=motorcycle
-Person model (person_weights.onnx):
-  Single class: 0=person
-Both models run on every image. All detections merged.
-cls_id 0 is shared: "bus" for vehicle eval, "person" for person eval.
-Vehicle eval uses cls_id 0-3. Person eval uses cls_id 0 only.
-"""
-from pathlib import Path
-import math
-import cv2
-import numpy as np
-import onnxruntime as ort
-from numpy import ndarray
-from pydantic import BaseModel
-# ── Vehicle config ──────────────────────────────────────────────────────────
-VEH_MODEL_TO_OUT: dict[int, int] = {0: 1, 1: 0, 2: 2, 3: 3}
-VEH_NUM_CLASSES = 4
-VEH_IMG_SIZE = 1280
-VEH_CONF_PER_CLASS = {0: 0.33, 1: 0.50, 2: 0.40, 3: 0.36}
-VEH_CONF_DEFAULT = 0.35
-VEH_TTA_CONF = 0.25
-VEH_WBF_IOU = 0.55
-# ── Person config ───────────────────────────────────────────────────────────
-PER_CONF = 0.35
-PER_TTA_CONF = 0.25
-PER_WBF_IOU = 0.45
-# ── Shared ──────────────────────────────────────────────────────────────────
-WBF_SKIP_THR = 0.0001
-def _wbf_multi(boxes_list, scores_list, labels_list, iou_thr=0.55, skip_thr=0.0001):
-    """Weighted Boxes Fusion (multi-class). Boxes in [0,1] normalized coords."""
-    if not boxes_list:
-        return np.empty((0, 4)), np.empty(0), np.empty(0)
-    all_b, all_s, all_l = [], [], []
-    for bx, sc, lb in zip(boxes_list, scores_list, labels_list):
-        for i in range(len(bx)):
-            if sc[i] < skip_thr:
-                continue
-            all_b.append(bx[i])
-            all_s.append(sc[i])
-            all_l.append(int(lb[i]))
-    if not all_b:
-        return np.empty((0, 4)), np.empty(0), np.empty(0)
-    all_b = np.array(all_b)
-    all_s = np.array(all_s)
-    all_l = np.array(all_l, dtype=int)
-    fused_b, fused_s, fused_l = [], [], []
-    for cls in np.unique(all_l):
-        m = all_l == cls
-        cb, cs = all_b[m], all_s[m]
-        order = cs.argsort()[::-1]
-        cb, cs = cb[order], cs[order]
-        clusters, cboxes = [], []
-        for i in range(len(cb)):
-            matched, best_iou = -1, iou_thr
-            for ci, cbox in enumerate(cboxes):
-                xx1 = max(cb[i, 0], cbox[0])
-                yy1 = max(cb[i, 1], cbox[1])
-                xx2 = min(cb[i, 2], cbox[2])
-                yy2 = min(cb[i, 3], cbox[3])
-                inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
-                a1 = (cb[i, 2] - cb[i, 0]) * (cb[i, 3] - cb[i, 1])
-                a2 = (cbox[2] - cbox[0]) * (cbox[3] - cbox[1])
-                iou = inter / (a1 + a2 - inter + 1e-9)
-                if iou > best_iou:
-                    best_iou = iou
-                    matched = ci
-            if matched >= 0:
-                clusters[matched].append(i)
-                idxs = clusters[matched]
-                w = cs[idxs]
-                cboxes[matched] = (cb[idxs] * w[:, None]).sum(0) / w.sum()
-            else:
-                clusters.append([i])
-                cboxes.append(cb[i].copy())
-        for ci, idxs in enumerate(clusters):
-            fused_b.append(cboxes[ci])
-            fused_s.append(cs[idxs].mean())
-            fused_l.append(cls)
-    if not fused_b:
-        return np.empty((0, 4)), np.empty(0), np.empty(0)
-    return np.array(fused_b), np.array(fused_s), np.array(fused_l)
-def _wbf_single(boxes_list, scores_list, iou_thr=0.45, skip_thr=0.0001):
-    """Weighted Boxes Fusion (single-class). Boxes in [0,1] normalized coords."""
-    if not boxes_list:
-        return np.empty((0, 4)), np.empty(0)
-    all_b, all_s = [], []
-    for bx, sc in zip(boxes_list, scores_list):
-        for i in range(len(bx)):
-            if sc[i] < skip_thr:
-                continue
-            all_b.append(bx[i])
-            all_s.append(sc[i])
-    if not all_b:
-        return np.empty((0, 4)), np.empty(0)
-    all_b = np.array(all_b)
-    all_s = np.array(all_s)
-    order = all_s.argsort()[::-1]
-    all_b, all_s = all_b[order], all_s[order]
-    clusters, cboxes = [], []
-    for i in range(len(all_b)):
-        matched, best_iou = -1, iou_thr
-        for ci, cbox in enumerate(cboxes):
-            xx1 = max(all_b[i, 0], cbox[0])
-            yy1 = max(all_b[i, 1], cbox[1])
-            xx2 = min(all_b[i, 2], cbox[2])
-            yy2 = min(all_b[i, 3], cbox[3])
-            inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
-            a1 = (all_b[i, 2] - all_b[i, 0]) * (all_b[i, 3] - all_b[i, 1])
-            a2 = (cbox[2] - cbox[0]) * (cbox[3] - cbox[1])
-            iou = inter / (a1 + a2 - inter + 1e-9)
-            if iou > best_iou:
-                best_iou = iou
-                matched = ci
-        if matched >= 0:
-            clusters[matched].append(i)
-            idxs = clusters[matched]
-            w = all_s[idxs]
-            cboxes[matched] = (all_b[idxs] * w[:, None]).sum(0) / w.sum()
-        else:
-            clusters.append([i])
-            cboxes.append(all_b[i].copy())
-    fused_b, fused_s = [], []
-    for ci, idxs in enumerate(clusters):
-        fused_b.append(cboxes[ci])
-        fused_s.append(all_s[idxs].mean())
-    if not fused_b:
-        return np.empty((0, 4)), np.empty(0)
-    return np.array(fused_b), np.array(fused_s)
-class BoundingBox(BaseModel):
-    x1: int
-    y1: int
-    x2: int
-    y2: int
-    cls_id: int
-    conf: float
-class TVFrameResult(BaseModel):
-    frame_id: int
-    boxes: list[BoundingBox]
-    keypoints: list[tuple[int, int]]
-class Miner:
-    def __init__(self, path_hf_repo: Path) -> None:
-        self.path_hf_repo = path_hf_repo
-        # Vehicle model (YOLO11s, 4 classes)
-        self.veh_session = ort.InferenceSession(
-            str(path_hf_repo / "vehicle_weights.onnx"),
-            providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
-        )
-        self.veh_input_name = self.veh_session.get_inputs()[0].name
-        # Person model (YOLO11s, 1 class)
-        self.per_session = ort.InferenceSession(
-            str(path_hf_repo / "person_weights.onnx"),
-            providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
-        )
-        self.per_input_name = self.per_session.get_inputs()[0].name
-        per_shape = self.per_session.get_inputs()[0].shape
-        self.per_h = int(per_shape[2])
-        self.per_w = int(per_shape[3])
-    def __repr__(self) -> str:
-        return "Unified Miner v1 — dual-model vehicle+person"
-    # ── Vehicle preprocessing (letterbox) ───────────────────────────────────
-    def _veh_letterbox(self, img):
-        h, w = img.shape[:2]
-        r = min(VEH_IMG_SIZE / h, VEH_IMG_SIZE / w)
-        nw, nh = int(round(w * r)), int(round(h * r))
-        img_r = cv2.resize(img, (nw, nh), interpolation=cv2.INTER_LINEAR)
-        dw, dh = VEH_IMG_SIZE - nw, VEH_IMG_SIZE - nh
-        pl, pt = dw // 2, dh // 2
-        img_p = cv2.copyMakeBorder(
-            img_r, pt, dh - pt, pl, dw - pl,
-            cv2.BORDER_CONSTANT, value=(114, 114, 114),
-        )
-        return img_p, r, pl, pt
-    def _veh_preprocess(self, image_bgr):
-        img_p, ratio, pl, pt = self._veh_letterbox(image_bgr)
-        rgb = cv2.cvtColor(img_p, cv2.COLOR_BGR2RGB)
-        inp = rgb.astype(np.float32) / 255.0
-        inp = np.ascontiguousarray(inp.transpose(2, 0, 1)[np.newaxis])
-        return inp, ratio, pl, pt
-    def _veh_decode(self, raw, ratio, pl, pt, ow, oh, conf_thresh):
-        pred = raw[0]
-        if pred.shape[0] < pred.shape[1]:
-            pred = pred.T
-        cls_scores = pred[:, 4:]
-        cls_ids = np.argmax(cls_scores, axis=1)
-        confs = np.max(cls_scores, axis=1)
-        mask = confs >= conf_thresh
-        if not mask.any():
-            return np.empty((0, 4)), np.empty(0), np.empty(0, dtype=int)
-        bx, confs, cls_ids = pred[mask, :4], confs[mask], cls_ids[mask]
-        cx, cy, bw, bh = bx[:, 0], bx[:, 1], bx[:, 2], bx[:, 3]
-        x1 = np.clip((cx - bw / 2 - pl) / ratio, 0, ow)
-        y1 = np.clip((cy - bh / 2 - pt) / ratio, 0, oh)
-        x2 = np.clip((cx + bw / 2 - pl) / ratio, 0, ow)
-        y2 = np.clip((cy + bh / 2 - pt) / ratio, 0, oh)
-        return np.stack([x1, y1, x2, y2], axis=1), confs, cls_ids
-    def _veh_run_pass(self, image_bgr, conf_thresh):
-        oh, ow = image_bgr.shape[:2]
-        inp, ratio, pl, pt = self._veh_preprocess(image_bgr)
-        raw = self.veh_session.run(None, {self.veh_input_name: inp})[0]
-        return self._veh_decode(raw, ratio, pl, pt, ow, oh, conf_thresh)
-    def _infer_vehicle(self, image_bgr):
-        oh, ow = image_bgr.shape[:2]
-        all_b, all_s, all_l = [], [], []
-        def _collect(boxes, confs, cls_ids):
-            if len(boxes) == 0:
-                return
-            out_cls = np.array([VEH_MODEL_TO_OUT[int(c)] for c in cls_ids])
-            norm = boxes.copy()
-            norm[:, [0, 2]] /= ow
-            norm[:, [1, 3]] /= oh
-            norm = np.clip(norm, 0, 1)
-            all_b.append(norm)
-            all_s.append(confs)
-            all_l.append(out_cls)
-        # Pass 1: original
-        _collect(*self._veh_run_pass(image_bgr, VEH_TTA_CONF))
-        # Pass 2: hflip
-        flipped = cv2.flip(image_bgr, 1)
-        bx, sc, cl = self._veh_run_pass(flipped, VEH_TTA_CONF)
-        if len(bx):
-            bx[:, 0], bx[:, 2] = ow - bx[:, 2], ow - bx[:, 0]
-        _collect(bx, sc, cl)
-        if not all_b:
-            return []
-        fb, fs, fl = _wbf_multi(all_b, all_s, all_l, iou_thr=VEH_WBF_IOU, skip_thr=WBF_SKIP_THR)
-        if len(fb) == 0:
-            return []
-        fb[:, [0, 2]] *= ow
-        fb[:, [1, 3]] *= oh
-        keep = np.array([
-            fs[i] >= VEH_CONF_PER_CLASS.get(int(fl[i]), VEH_CONF_DEFAULT)
-            for i in range(len(fs))
-        ])
-        if not keep.any():
-            return []
-        fb, fs, fl = fb[keep], fs[keep], fl[keep]
-        out = []
-        for i in range(len(fb)):
-            b = fb[i]
-            out.append(BoundingBox(
-                x1=max(0, min(ow, math.floor(b[0]))),
-                y1=max(0, min(oh, math.floor(b[1]))),
-                x2=max(0, min(ow, math.ceil(b[2]))),
-                y2=max(0, min(oh, math.ceil(b[3]))),
-                cls_id=int(fl[i]),
-                conf=max(0.0, min(1.0, float(fs[i]))),
-            ))
-        return out
-    # ── Person preprocessing (stretch resize) ──────────────────────────────
-    def _per_preprocess(self, image_bgr):
-        rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
-        resized = cv2.resize(rgb, (self.per_w, self.per_h))
-        x = resized.astype(np.float32) / 255.0
-        x = np.transpose(x, (2, 0, 1))[None, ...]
-        return x
-    def _per_decode(self, raw, oh, ow, conf_thresh):
-        pred = raw[0]
-        if pred.ndim != 2:
-            return np.empty((0, 4)), np.empty(0)
-        if pred.shape[0] < pred.shape[1]:
-            pred = pred.T
-        if pred.shape[1] < 5:
-            return np.empty((0, 4)), np.empty(0)
-        cls_scores = pred[:, 4:]
-        confs = np.max(cls_scores, axis=1)
-        keep = confs >= conf_thresh
-        boxes, confs = pred[keep, :4], confs[keep]
-        if len(boxes) == 0:
-            return np.empty((0, 4)), np.empty(0)
-        sx, sy = ow / float(self.per_w), oh / float(self.per_h)
-        cx, cy, bw, bh = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
-        x1 = np.clip((cx - bw / 2) * sx, 0, ow)
-        y1 = np.clip((cy - bh / 2) * sy, 0, oh)
-        x2 = np.clip((cx + bw / 2) * sx, 0, ow)
-        y2 = np.clip((cy + bh / 2) * sy, 0, oh)
-        return np.stack([x1, y1, x2, y2], axis=1), confs
-    def _per_run_pass(self, image_bgr, conf_thresh):
-        oh, ow = image_bgr.shape[:2]
-        inp = self._per_preprocess(image_bgr)
-        raw = self.per_session.run(None, {self.per_input_name: inp})[0]
-        return self._per_decode(raw, oh, ow, conf_thresh)
-    def _infer_person(self, image_bgr):
-        oh, ow = image_bgr.shape[:2]
-        all_b, all_s = [], []
-        def _collect(boxes, confs):
-            if len(boxes) == 0:
-                return
-            norm = boxes.copy()
-            norm[:, [0, 2]] /= ow
-            norm[:, [1, 3]] /= oh
-            norm = np.clip(norm, 0, 1)
-            all_b.append(norm)
-            all_s.append(confs)
-        # Pass 1: original
-        _collect(*self._per_run_pass(image_bgr, PER_TTA_CONF))
-        # Pass 2: hflip
-        flipped = cv2.flip(image_bgr, 1)
-        bx, sc = self._per_run_pass(flipped, PER_TTA_CONF)
-        if len(bx):
-            bx[:, 0], bx[:, 2] = ow - bx[:, 2], ow - bx[:, 0]
-        _collect(bx, sc)
-        if not all_b:
-            return []
-        fb, fs = _wbf_single(all_b, all_s, iou_thr=PER_WBF_IOU, skip_thr=WBF_SKIP_THR)
-        if len(fb) == 0:
-            return []
-        fb[:, [0, 2]] *= ow
-        fb[:, [1, 3]] *= oh
-        keep = fs >= PER_CONF
-        fb, fs = fb[keep], fs[keep]
-        out = []
-        for i in range(len(fb)):
-            b = fb[i]
-            out.append(BoundingBox(
-                x1=max(0, min(ow, math.floor(b[0]))),
-                y1=max(0, min(oh, math.floor(b[1]))),
-                x2=max(0, min(ow, math.ceil(b[2]))),
-                y2=max(0, min(oh, math.ceil(b[3]))),
-                cls_id=0,
-                conf=max(0.0, min(1.0, float(fs[i]))),
-            ))
-        return out
-    # ── Unified inference ───────────────────────────────────────────────────
-    def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
-        vehicle_boxes = self._infer_vehicle(image_bgr)
-        person_boxes = self._infer_person(image_bgr)
-        return vehicle_boxes + person_boxes
-    def predict_batch(
-        self,
-        batch_images: list[ndarray],
-        offset: int,
-        n_keypoints: int,
-    ) -> list[TVFrameResult]:
-        results: list[TVFrameResult] = []
-        for idx, image in enumerate(batch_images):
-            boxes = self._infer_single(image)
-            keypoints = [(0, 0) for _ in range(max(0, int(n_keypoints)))]
-            results.append(TVFrameResult(
-                frame_id=offset + idx, boxes=boxes, keypoints=keypoints,
-            ))
-        return results