iotaminer
/

ScoreVision

ONNX

Model card Files Files and versions

xet

Community

iotaminer commited on Apr 23

Commit

e15d45e

verified ·

1 Parent(s): 53c1227

scorevision: push artifact

Browse files

Files changed (1) hide show

miner.py +344 -161

miner.py CHANGED Viewed

@@ -1,16 +1,25 @@
-"""TurboVision miner for Detect-petrol-station-1-0.
-YOLOv11s ONNX FP16 + NMS baked in, with horizontal-flip TTA to boost recall.
-4 classes: 0=petrol hose, 1=petrol pump, 2=price board, 3=roof canopy.
 """
 from __future__ import annotations
 from pathlib import Path
-from typing import List, Tuple
 import cv2
 import numpy as np
 import onnxruntime as ort
 from pydantic import BaseModel
@@ -29,176 +38,350 @@ class TVFrameResult(BaseModel):
     keypoints: list[tuple[int, int]]
 class Miner:
-    IMGSZ = 1280
-    # Per-class conf thresholds: 0=petrol hose, 1=petrol pump, 2=price board, 3=roof canopy.
-    # Tuned via greedy grid search on 100 fresh challenges vs real SAM3 pseudo-GT.
-    CLASS_CONF_THRES = (0.43, 0.63, 0.37, 0.41)
-    CONF_THRES = 0.37  # fallback / pre-filter at lowest per-class threshold
-    IOU_THRES = 0.45
-    NUM_CLASSES = 4
-    MIN_BOX_FRAC = 0.005
-    USE_TTA = True
     def __init__(self, path_hf_repo: Path) -> None:
-        self.onnx_path = path_hf_repo / 'weights.onnx'
-        if not self.onnx_path.exists():
-            raise FileNotFoundError(f'Model not found at {self.onnx_path}')
-        # Help ORT find CUDA libs shipped with nvidia-*-cu12 packages (pytorch/onnxruntime).
-        import os as _os
-        import site as _site
-        import glob as _glob
-        cuda_lib_dirs: list[str] = []
-        for sp in _site.getsitepackages() + [_site.getusersitepackages()]:
-            for sub in ('nvidia/cuda_runtime/lib', 'nvidia/cublas/lib', 'nvidia/cudnn/lib',
-                        'nvidia/cufft/lib', 'nvidia/cuda_nvrtc/lib', 'nvidia/curand/lib',
-                        'nvidia/cusparse/lib', 'nvidia/cusolver/lib', 'nvidia/nvjitlink/lib'):
-                p = f'{sp}/{sub}'
-                if _glob.glob(f'{p}/*.so*'):
-                    cuda_lib_dirs.append(p)
-        if cuda_lib_dirs:
-            existing = _os.environ.get('LD_LIBRARY_PATH', '')
-            _os.environ['LD_LIBRARY_PATH'] = ':'.join(cuda_lib_dirs + ([existing] if existing else []))
-        providers: list = []
         try:
             ort.preload_dlls()
-        except Exception as _pe:
-            print(f'[Miner] preload_dlls failed: {_pe}')
-        available = ort.get_available_providers()
-        if 'CUDAExecutionProvider' in available:
-            providers.append(('CUDAExecutionProvider', {'device_id': 0}))
-        providers.append('CPUExecutionProvider')
-        so = ort.SessionOptions()
-        so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-        self.session = ort.InferenceSession(str(self.onnx_path), sess_options=so, providers=providers)
         self.input_name = self.session.get_inputs()[0].name
-        inp = self.session.get_inputs()[0]
-        self.input_shape = inp.shape
-        self.input_dtype = np.float16 if inp.type == 'tensor(float16)' else np.float32
-        self.active_providers = self.session.get_providers()
-        print(f'[Miner] Loaded {self.onnx_path.name} | providers={self.active_providers} | dtype={self.input_dtype}')
-        print(f'[Miner] cuda_lib_dirs discovered: {cuda_lib_dirs[:3]}')
-        print(f'[Miner] ort.get_available_providers() = {available}')
-    def __repr__(self) -> str:
-        return f'PetrolMiner(yolo11s-onnx-fp16-nms, tta={self.USE_TTA}, conf={self.CONF_THRES}, providers={getattr(self, "active_providers", "?")})'
-    @staticmethod
-    def _letterbox(img, new_size=1280, color=(114, 114, 114)):
-        h, w = img.shape[:2]
-        r = min(new_size / h, new_size / w)
-        nh, nw = int(round(h * r)), int(round(w * r))
-        resized = cv2.resize(img, (nw, nh), interpolation=cv2.INTER_LINEAR)
-        top = (new_size - nh) // 2
-        bottom = new_size - nh - top
-        left = (new_size - nw) // 2
-        right = new_size - nw - left
-        padded = cv2.copyMakeBorder(resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
-        return padded, r, (left, top)
-    def _preprocess(self, img):
-        h, w = img.shape[:2]
-        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-        padded, r, (lx, ty) = self._letterbox(img_rgb, self.IMGSZ)
-        x = padded.astype(self.input_dtype) / 255.0
-        x = x.transpose(2, 0, 1)[None, ...]
-        return np.ascontiguousarray(x), r, (lx, ty), (w, h)
-    def _run_onnx(self, img):
-        x, r, (lx, ty), (W, H) = self._preprocess(img)
-        outputs = self.session.run(None, {self.input_name: x})
-        det = outputs[0]
-        if det.ndim == 3: det = det[0]
-        if det.size == 0: return [], [], [], W, H
-        det = np.asarray(det, dtype=np.float32)
-        if det.shape[-1] < 6: return [], [], [], W, H
-        xyxy = det[:, :4].copy()
-        conf = det[:, 4].copy()
-        cls_id = det[:, 5].astype(int)
-        keep = conf >= self.CONF_THRES
-        xyxy, conf, cls_id = xyxy[keep], conf[keep], cls_id[keep]
-        if len(xyxy) == 0: return [], [], [], W, H
-        xyxy[:, [0, 2]] = (xyxy[:, [0, 2]] - lx) / r
-        xyxy[:, [1, 3]] = (xyxy[:, [1, 3]] - ty) / r
-        xyxy[:, 0::2] = np.clip(xyxy[:, 0::2], 0, W - 1)
-        xyxy[:, 1::2] = np.clip(xyxy[:, 1::2], 0, H - 1)
-        min_side = self.MIN_BOX_FRAC * min(W, H)
-        mask = (
-            (cls_id >= 0) & (cls_id < self.NUM_CLASSES)
-            & ((xyxy[:, 2] - xyxy[:, 0]) >= min_side)
-            & ((xyxy[:, 3] - xyxy[:, 1]) >= min_side)
         )
-        return xyxy[mask], conf[mask], cls_id[mask], W, H
-    @staticmethod
-    def _hard_nms_per_class(xyxy, conf, cls_id, iou_thres=0.5):
-        if len(xyxy) == 0: return np.empty((0,), dtype=int)
-        keep = []
-        for c in np.unique(cls_id):
-            idx = np.where(cls_id == c)[0]
-            b = xyxy[idx]; s = conf[idx]
-            order = np.argsort(-s)
-            b = b[order]; s = s[order]; idx = idx[order]
-            areas = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
-            suppressed = np.zeros(len(b), dtype=bool)
-            for i in range(len(b)):
-                if suppressed[i]: continue
-                keep.append(idx[i])
-                xx1 = np.maximum(b[i, 0], b[i+1:, 0])
-                yy1 = np.maximum(b[i, 1], b[i+1:, 1])
-                xx2 = np.minimum(b[i, 2], b[i+1:, 2])
-                yy2 = np.minimum(b[i, 3], b[i+1:, 3])
-                inter = np.maximum(0, xx2 - xx1) * np.maximum(0, yy2 - yy1)
-                iou = inter / (areas[i] + areas[i+1:] - inter + 1e-9)
-                suppressed[i+1:][iou > iou_thres] = True
-        return np.array(keep, dtype=int)
-    def _predict_single(self, img):
-        xyxy1, conf1, cls1, W, H = self._run_onnx(img)
-        if not self.USE_TTA:
-            xyxy, conf, cls_id = xyxy1, conf1, cls1
         else:
-            img_f = cv2.flip(img, 1)
-            xyxy2, conf2, cls2, _, _ = self._run_onnx(img_f)
-            if len(xyxy2) > 0:
-                tmp = xyxy2.copy()
-                tmp[:, 0] = W - xyxy2[:, 2]
-                tmp[:, 2] = W - xyxy2[:, 0]
-                xyxy2 = tmp
-            pieces_xyxy = [a for a in (xyxy1, xyxy2) if len(a) > 0]
-            pieces_conf = [a for a in (conf1, conf2) if len(a) > 0]
-            pieces_cls  = [a for a in (cls1,  cls2)  if len(a) > 0]
-            xyxy   = np.vstack(pieces_xyxy) if pieces_xyxy else np.empty((0, 4))
-            conf   = np.concatenate(pieces_conf) if pieces_conf else np.empty((0,))
-            cls_id = np.concatenate(pieces_cls) if pieces_cls else np.empty((0,))
-            if len(xyxy) > 0:
-                keep = self._hard_nms_per_class(xyxy, conf, cls_id, iou_thres=self.IOU_THRES)
-                xyxy, conf, cls_id = xyxy[keep], conf[keep], cls_id[keep]
-        boxes = []
-        for i in range(len(xyxy)):
-            ci = int(cls_id[i])
-            if 0 <= ci < self.NUM_CLASSES and float(conf[i]) < self.CLASS_CONF_THRES[ci]:
                 continue
-            boxes.append(BoundingBox(
-                x1=int(round(float(xyxy[i, 0]))),
-                y1=int(round(float(xyxy[i, 1]))),
-                x2=int(round(float(xyxy[i, 2]))),
-                y2=int(round(float(xyxy[i, 3]))),
-                cls_id=ci,
-                conf=float(conf[i]),
-            ))
-        return boxes
-    def predict_batch(self, batch_images, offset, n_keypoints):
-        results = []
-        for i, img in enumerate(batch_images):
             try:
-                boxes = self._predict_single(img)
             except Exception as e:
-                print(f'[Miner] predict error on frame {offset + i}: {e}')
                 boxes = []
-            kps = [(0, 0) for _ in range(n_keypoints)]
-            results.append(TVFrameResult(frame_id=offset + i, boxes=boxes, keypoints=kps))
         return results

+"""
+Detect-Person miner for ScoreVision.
+Loaded by the TurboVision chute_template from the root of the HF repo.
+Thresholds (imgsz, conf, iou, max_det) are overridable via SN44_* env vars
+so operators can hot-patch without redeploying.
+Contract expected by the chute template:
+  * class `Miner(path_hf_repo: Path)`
+  * method `predict_batch(batch_images, offset, n_keypoints) -> list[TVFrameResult]`
 """
 from __future__ import annotations
+import math
+import os
 from pathlib import Path
 import cv2
 import numpy as np
 import onnxruntime as ort
+from numpy import ndarray
 from pydantic import BaseModel
     keypoints: list[tuple[int, int]]
+# ---------------------------------------------------------------------------
+# Tuned hyperparameters (override via env for hot-patching without redeploy)
+# ---------------------------------------------------------------------------
+_DEFAULT_WEIGHTS = "weights.onnx"
+_DEFAULT_IMGSZ = 960
+_DEFAULT_CONF = 0.25
+_DEFAULT_IOU = 0.60
+_DEFAULT_MAX_DET = 300
+def _env_int(name: str, default: int) -> int:
+    try:
+        return int(os.environ.get(name, default))
+    except (TypeError, ValueError):
+        return default
+def _env_float(name: str, default: float) -> float:
+    try:
+        return float(os.environ.get(name, default))
+    except (TypeError, ValueError):
+        return default
+def _letterbox(
+    image: ndarray,
+    new_shape: tuple[int, int],
+    color: tuple[int, int, int] = (114, 114, 114),
+) -> tuple[ndarray, float, tuple[float, float]]:
+    """YOLO-style letterbox preserving aspect ratio, returns (img, ratio, (dw, dh))."""
+    h, w = image.shape[:2]
+    new_w, new_h = new_shape
+    ratio = min(new_w / w, new_h / h)
+    resized_w = int(round(w * ratio))
+    resized_h = int(round(h * ratio))
+    if (resized_w, resized_h) != (w, h):
+        interp = cv2.INTER_CUBIC if ratio > 1.0 else cv2.INTER_LINEAR
+        image = cv2.resize(image, (resized_w, resized_h), interpolation=interp)
+    dw = (new_w - resized_w) / 2.0
+    dh = (new_h - resized_h) / 2.0
+    left = int(round(dw - 0.1))
+    right = int(round(dw + 0.1))
+    top = int(round(dh - 0.1))
+    bottom = int(round(dh + 0.1))
+    padded = cv2.copyMakeBorder(
+        image, top, bottom, left, right,
+        borderType=cv2.BORDER_CONSTANT, value=color,
+    )
+    return padded, ratio, (dw, dh)
+def _xywh_to_xyxy(boxes: np.ndarray) -> np.ndarray:
+    out = np.empty_like(boxes)
+    out[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.0
+    out[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.0
+    out[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.0
+    out[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.0
+    return out
+def _hard_nms(boxes: np.ndarray, scores: np.ndarray, iou_thresh: float) -> np.ndarray:
+    """Pure numpy hard NMS. Avoids torchvision to keep the chute slim."""
+    if len(boxes) == 0:
+        return np.array([], dtype=np.intp)
+    boxes = np.asarray(boxes, dtype=np.float32)
+    scores = np.asarray(scores, dtype=np.float32)
+    order = np.argsort(scores)[::-1]
+    keep: list[int] = []
+    while len(order) > 0:
+        i = int(order[0])
+        keep.append(i)
+        if len(order) == 1:
+            break
+        rest = order[1:]
+        xx1 = np.maximum(boxes[i, 0], boxes[rest, 0])
+        yy1 = np.maximum(boxes[i, 1], boxes[rest, 1])
+        xx2 = np.minimum(boxes[i, 2], boxes[rest, 2])
+        yy2 = np.minimum(boxes[i, 3], boxes[rest, 3])
+        inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
+        area_i = max(0.0, (boxes[i, 2] - boxes[i, 0]) * (boxes[i, 3] - boxes[i, 1]))
+        area_r = np.maximum(0.0, boxes[rest, 2] - boxes[rest, 0]) * np.maximum(
+            0.0, boxes[rest, 3] - boxes[rest, 1]
+        )
+        iou = inter / (area_i + area_r - inter + 1e-7)
+        order = rest[iou <= iou_thresh]
+    return np.array(keep, dtype=np.intp)
+def _clip_boxes(boxes: np.ndarray, image_size: tuple[int, int]) -> np.ndarray:
+    w, h = image_size
+    boxes[:, 0] = np.clip(boxes[:, 0], 0, w - 1)
+    boxes[:, 1] = np.clip(boxes[:, 1], 0, h - 1)
+    boxes[:, 2] = np.clip(boxes[:, 2], 0, w - 1)
+    boxes[:, 3] = np.clip(boxes[:, 3], 0, h - 1)
+    return boxes
 class Miner:
+    """Detect-Person miner: ONNX Runtime + raw YOLO decode + numpy NMS."""
     def __init__(self, path_hf_repo: Path) -> None:
+        self.class_names = ["person"]
+        weights_name = os.environ.get("SN44_ONNX_WEIGHTS", _DEFAULT_WEIGHTS)
+        weights_path = path_hf_repo / weights_name
+        if not weights_path.is_file():
+            raise FileNotFoundError(
+                f"ONNX weights '{weights_name}' not found in {path_hf_repo}"
+            )
+        print("ORT version:", ort.__version__)
         try:
             ort.preload_dlls()
+            print("ORT preload_dlls ok")
+        except Exception as e:
+            print(f"ORT preload_dlls skipped: {e}")
+        print("ORT available providers:", ort.get_available_providers())
+        sess_options = ort.SessionOptions()
+        sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+        try:
+            self.session = ort.InferenceSession(
+                str(weights_path),
+                sess_options=sess_options,
+                providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
+            )
+            print("ORT session created with CUDA preferred")
+        except Exception as e:
+            print(f"ORT CUDA provider failed, falling back to CPU: {e}")
+            self.session = ort.InferenceSession(
+                str(weights_path),
+                sess_options=sess_options,
+                providers=["CPUExecutionProvider"],
+            )
+        print("ORT session providers:", self.session.get_providers())
+        for inp in self.session.get_inputs():
+            print("ONNX INPUT:", inp.name, inp.shape, inp.type)
+        for out in self.session.get_outputs():
+            print("ONNX OUTPUT:", out.name, out.shape, out.type)
         self.input_name = self.session.get_inputs()[0].name
+        self.output_names = [o.name for o in self.session.get_outputs()]
+        input_shape = self.session.get_inputs()[0].shape
+        h = input_shape[2] if isinstance(input_shape[2], int) and input_shape[2] > 0 else _DEFAULT_IMGSZ
+        w = input_shape[3] if isinstance(input_shape[3], int) and input_shape[3] > 0 else _DEFAULT_IMGSZ
+        self.input_height = _env_int("SN44_IMGSZ", h)
+        self.input_width = _env_int("SN44_IMGSZ", w)
+        self.conf_thres = _env_float("SN44_CONF", _DEFAULT_CONF)
+        self.iou_thres = _env_float("SN44_IOU", _DEFAULT_IOU)
+        self.max_det = _env_int("SN44_MAX_DET", _DEFAULT_MAX_DET)
+        self.min_w = 4
+        self.min_h = 4
+        self.min_box_area = 16
+        self.max_aspect_ratio = 8.0
+        self.max_box_area_ratio = 0.9
+        self.person_cls_idx = 0
+        print(
+            "Miner ready: "
+            f"imgsz={self.input_height}x{self.input_width}, "
+            f"conf={self.conf_thres:.3f}, iou={self.iou_thres:.3f}, "
+            f"max_det={self.max_det}, providers={self.session.get_providers()}"
         )
+    def __repr__(self) -> str:
+        return (
+            "DetectPersonMiner("
+            f"providers={self.session.get_providers()}, "
+            f"imgsz={self.input_height}x{self.input_width}, "
+            f"conf={self.conf_thres}, iou={self.iou_thres})"
+        )
+    def _preprocess(
+        self, image: ndarray
+    ) -> tuple[np.ndarray, float, tuple[float, float], tuple[int, int]]:
+        if image.dtype != np.uint8:
+            image = image.astype(np.uint8)
+        orig_h, orig_w = image.shape[:2]
+        img, ratio, pad = _letterbox(image, (self.input_width, self.input_height))
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img = img.astype(np.float32) / 255.0
+        img = np.transpose(img, (2, 0, 1))[None, ...]
+        img = np.ascontiguousarray(img, dtype=np.float32)
+        return img, ratio, pad, (orig_w, orig_h)
+    def _filter_sane(
+        self,
+        boxes: np.ndarray,
+        scores: np.ndarray,
+        orig_size: tuple[int, int],
+    ) -> tuple[np.ndarray, np.ndarray]:
+        if len(boxes) == 0:
+            return boxes, scores
+        orig_w, orig_h = orig_size
+        image_area = float(orig_w * orig_h)
+        keep: list[int] = []
+        for i, box in enumerate(boxes):
+            x1, y1, x2, y2 = box.tolist()
+            bw = x2 - x1
+            bh = y2 - y1
+            if bw <= 0 or bh <= 0:
+                continue
+            if bw < self.min_w or bh < self.min_h:
+                continue
+            area = bw * bh
+            if area < self.min_box_area:
+                continue
+            if area > self.max_box_area_ratio * image_area:
+                continue
+            ar = max(bw / max(bh, 1e-6), bh / max(bw, 1e-6))
+            if ar > self.max_aspect_ratio:
+                continue
+            keep.append(i)
+        if not keep:
+            return (
+                np.empty((0, 4), dtype=np.float32),
+                np.empty((0,), dtype=np.float32),
+            )
+        keep_idx = np.array(keep, dtype=np.intp)
+        return boxes[keep_idx], scores[keep_idx]
+    def _decode_yolov11(
+        self,
+        preds: np.ndarray,
+        ratio: float,
+        pad: tuple[float, float],
+        orig_size: tuple[int, int],
+    ) -> list[BoundingBox]:
+        """
+        Ultralytics YOLOv8/11 ONNX output is [1, 4+nc, N].
+        For COCO nc=80 → shape [1, 84, N]. No objectness term;
+        class score IS the detection score.
+        """
+        if preds.ndim != 3:
+            return []
+        preds = preds[0]
+        if preds.shape[0] == 4 + len(self._coco_classes()):
+            preds = preds.T
+        elif preds.shape[1] == 4 + len(self._coco_classes()):
+            pass
         else:
+            if preds.shape[0] < preds.shape[1]:
+                preds = preds.T
+        if preds.shape[1] < 5:
+            return []
+        boxes_xywh = preds[:, :4].astype(np.float32)
+        class_scores = preds[:, 4:].astype(np.float32)
+        person_scores = class_scores[:, self.person_cls_idx]
+        mask = person_scores >= self.conf_thres
+        if not np.any(mask):
+            return []
+        boxes_xywh = boxes_xywh[mask]
+        scores = person_scores[mask]
+        boxes = _xywh_to_xyxy(boxes_xywh)
+        pad_w, pad_h = pad
+        boxes[:, [0, 2]] -= pad_w
+        boxes[:, [1, 3]] -= pad_h
+        boxes /= ratio
+        boxes = _clip_boxes(boxes, orig_size)
+        boxes, scores = self._filter_sane(boxes, scores, orig_size)
+        if len(boxes) == 0:
+            return []
+        keep = _hard_nms(boxes, scores, self.iou_thres)
+        keep = keep[: self.max_det]
+        boxes = boxes[keep]
+        scores = scores[keep]
+        out: list[BoundingBox] = []
+        for box, conf in zip(boxes, scores):
+            if box[2] <= box[0] or box[3] <= box[1]:
                 continue
+            out.append(
+                BoundingBox(
+                    x1=int(math.floor(box[0])),
+                    y1=int(math.floor(box[1])),
+                    x2=int(math.ceil(box[2])),
+                    y2=int(math.ceil(box[3])),
+                    cls_id=0,
+                    conf=float(conf),
+                )
+            )
+        return out
+    @staticmethod
+    def _coco_classes() -> list[str]:
+        return [
+            "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train",
+            "truck", "boat", "traffic light", "fire hydrant", "stop sign",
+            "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep",
+            "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
+            "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
+            "sports ball", "kite", "baseball bat", "baseball glove", "skateboard",
+            "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork",
+            "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange",
+            "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair",
+            "couch", "potted plant", "bed", "dining table", "toilet", "tv",
+            "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave",
+            "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
+            "scissors", "teddy bear", "hair drier", "toothbrush",
+        ]
+    def _predict_single(self, image: np.ndarray) -> list[BoundingBox]:
+        if image is None:
+            raise ValueError("Input image is None")
+        if not isinstance(image, np.ndarray) or image.ndim != 3 or image.shape[2] != 3:
+            raise ValueError(f"Expected HWC RGB/BGR image, got shape={getattr(image, 'shape', None)}")
+        input_tensor, ratio, pad, orig_size = self._preprocess(image)
+        outputs = self.session.run(self.output_names, {self.input_name: input_tensor})
+        return self._decode_yolov11(outputs[0], ratio, pad, orig_size)
+    def predict_batch(
+        self,
+        batch_images: list[ndarray],
+        offset: int,
+        n_keypoints: int,
+    ) -> list[TVFrameResult]:
+        results: list[TVFrameResult] = []
+        for i, image in enumerate(batch_images):
+            frame_id = offset + i
             try:
+                boxes = self._predict_single(image)
             except Exception as e:
+                print(f"Inference failed for frame {frame_id}: {e}")
                 boxes = []
+            results.append(
+                TVFrameResult(
+                    frame_id=frame_id,
+                    boxes=boxes,
+                    keypoints=[(0, 0) for _ in range(max(0, int(n_keypoints)))],
+                )
+            )
         return results