import numpy as np import cv2 import onnxruntime as ort from typing import List, Dict, Tuple class ObjectDetector: def __init__(self, model_path: str, class_names: List[str], input_size: int = 640): self.class_names = class_names self.input_size = input_size self.session = self._load_model(model_path) self._warmup() def _load_model(self, model_path: str) -> ort.InferenceSession: options = ort.SessionOptions() options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL return ort.InferenceSession( model_path, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'], sess_options=options ) def _warmup(self): dummy_input = np.random.randn(1, 3, self.input_size, self.input_size).astype(np.float32) self.session.run(None, {"images": dummy_input}) @staticmethod def compute_iou(box: np.ndarray, boxes: np.ndarray) -> np.ndarray: xmin = np.maximum(box[0], boxes[:, 0]) ymin = np.maximum(box[1], boxes[:, 1]) xmax = np.minimum(box[2], boxes[:, 2]) ymax = np.minimum(box[3], boxes[:, 3]) intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin) box_area = (box[2] - box[0]) * (box[3] - box[1]) boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) return intersection_area / (box_area + boxes_area - intersection_area + 1e-6) @staticmethod def nms(boxes: np.ndarray, scores: np.ndarray, iou_threshold: float) -> List[int]: sorted_indices = np.argsort(scores)[::-1] keep_boxes = [] while sorted_indices.size > 0: box_id = sorted_indices[0] keep_boxes.append(box_id) ious = ObjectDetector.compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :]) keep_indices = np.where(ious < iou_threshold)[0] sorted_indices = sorted_indices[keep_indices + 1] return keep_boxes def preprocess(self, image: np.ndarray) -> Tuple[np.ndarray, float, Tuple[int, int]]: h, w = image.shape[:2] scale = min(self.input_size / h, self.input_size / w) new_h, new_w = int(h * scale), int(w * scale) resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR) canvas = np.full((self.input_size, self.input_size, 3), 114, dtype=np.uint8) ph, pw = (self.input_size - new_h) // 2, (self.input_size - new_w) // 2 canvas[ph:ph+new_h, pw:pw+new_w] = resized blob = canvas.astype(np.float32) / 255.0 return blob.transpose(2, 0, 1)[None, ...], scale, (pw, ph) def postprocess( self, predictions: np.ndarray, original_shape: Tuple[int, int], scale: float, padding: Tuple[int, int], conf_threshold: float = 0.3, iou_threshold: float = 0.45 ) -> List[Dict]: predictions = np.squeeze(predictions).T scores = np.max(predictions[:, 4:], axis=1) valid = scores > conf_threshold predictions = predictions[valid] if predictions.size == 0: return [] boxes = predictions[:, :4] boxes[:, [0, 1]] = boxes[:, [0, 1]] - boxes[:, [2, 3]] / 2 boxes[:, [2, 3]] = boxes[:, [0, 1]] + boxes[:, [2, 3]] pad_w, pad_h = padding boxes[:, [0, 2]] = (boxes[:, [0, 2]] - pad_w) / scale boxes[:, [1, 3]] = (boxes[:, [1, 3]] - pad_h) / scale h, w = original_shape boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, w) boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, h) class_ids = np.argmax(predictions[:, 4:], axis=1) indices = self.nms(boxes, scores[valid], iou_threshold) return [{ "class": self.class_names[int(class_ids[i])], "confidence": float(scores[valid][i]), "bbox": boxes[i].tolist(), "bbox_normalized": [ float((boxes[i][0] + boxes[i][2])/2 / w), float((boxes[i][1] + boxes[i][3])/2 / h), float((boxes[i][2] - boxes[i][0]) / w), float((boxes[i][3] - boxes[i][1]) / h) ] } for i in indices] def predict(self, image: np.ndarray) -> List[Dict]: """Main prediction method""" input_tensor, scale, padding = self.preprocess(image) outputs = self.session.run(None, {"images": input_tensor}) return self.postprocess(outputs[0], image.shape[:2], scale, padding)