Spaces:

francmeister
/

model

Sleeping

File size: 4,616 Bytes

import numpy as np
import cv2
import onnxruntime as ort
from typing import List, Dict, Tuple

class ObjectDetector:
    def __init__(self, model_path: str, class_names: List[str], input_size: int = 640):
        self.class_names = class_names
        self.input_size = input_size
        self.session = self._load_model(model_path)
        self._warmup()

    def _load_model(self, model_path: str) -> ort.InferenceSession:
        options = ort.SessionOptions()
        options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
        return ort.InferenceSession(
            model_path,
            providers=['CUDAExecutionProvider', 'CPUExecutionProvider'],
            sess_options=options
        )

    def _warmup(self):
        dummy_input = np.random.randn(1, 3, self.input_size, self.input_size).astype(np.float32)
        self.session.run(None, {"images": dummy_input})

    @staticmethod
    def compute_iou(box: np.ndarray, boxes: np.ndarray) -> np.ndarray:
        xmin = np.maximum(box[0], boxes[:, 0])
        ymin = np.maximum(box[1], boxes[:, 1])
        xmax = np.minimum(box[2], boxes[:, 2])
        ymax = np.minimum(box[3], boxes[:, 3])
        
        intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)
        box_area = (box[2] - box[0]) * (box[3] - box[1])
        boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
        
        return intersection_area / (box_area + boxes_area - intersection_area + 1e-6)

    @staticmethod
    def nms(boxes: np.ndarray, scores: np.ndarray, iou_threshold: float) -> List[int]:
        sorted_indices = np.argsort(scores)[::-1]
        keep_boxes = []
        
        while sorted_indices.size > 0:
            box_id = sorted_indices[0]
            keep_boxes.append(box_id)
            ious = ObjectDetector.compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])
            keep_indices = np.where(ious < iou_threshold)[0]
            sorted_indices = sorted_indices[keep_indices + 1]
        return keep_boxes

    def preprocess(self, image: np.ndarray) -> Tuple[np.ndarray, float, Tuple[int, int]]:
        h, w = image.shape[:2]
        scale = min(self.input_size / h, self.input_size / w)
        new_h, new_w = int(h * scale), int(w * scale)
        
        resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
        canvas = np.full((self.input_size, self.input_size, 3), 114, dtype=np.uint8)
        ph, pw = (self.input_size - new_h) // 2, (self.input_size - new_w) // 2
        canvas[ph:ph+new_h, pw:pw+new_w] = resized
        
        blob = canvas.astype(np.float32) / 255.0
        return blob.transpose(2, 0, 1)[None, ...], scale, (pw, ph)

    def postprocess(
        self,
        predictions: np.ndarray,
        original_shape: Tuple[int, int],
        scale: float,
        padding: Tuple[int, int],
        conf_threshold: float = 0.3,
        iou_threshold: float = 0.45
    ) -> List[Dict]:
        predictions = np.squeeze(predictions).T
        scores = np.max(predictions[:, 4:], axis=1)
        valid = scores > conf_threshold
        predictions = predictions[valid]
        
        if predictions.size == 0:
            return []

        boxes = predictions[:, :4]
        boxes[:, [0, 1]] = boxes[:, [0, 1]] - boxes[:, [2, 3]] / 2
        boxes[:, [2, 3]] = boxes[:, [0, 1]] + boxes[:, [2, 3]]
        
        pad_w, pad_h = padding
        boxes[:, [0, 2]] = (boxes[:, [0, 2]] - pad_w) / scale
        boxes[:, [1, 3]] = (boxes[:, [1, 3]] - pad_h) / scale
        
        h, w = original_shape
        boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, w)
        boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, h)
        
        class_ids = np.argmax(predictions[:, 4:], axis=1)
        indices = self.nms(boxes, scores[valid], iou_threshold)
        
        return [{
            "class": self.class_names[int(class_ids[i])],
            "confidence": float(scores[valid][i]),
            "bbox": boxes[i].tolist(),
            "bbox_normalized": [
                float((boxes[i][0] + boxes[i][2])/2 / w),
                float((boxes[i][1] + boxes[i][3])/2 / h),
                float((boxes[i][2] - boxes[i][0]) / w),
                float((boxes[i][3] - boxes[i][1]) / h)
            ]
        } for i in indices]

    def predict(self, image: np.ndarray) -> List[Dict]:
        """Main prediction method"""
        input_tensor, scale, padding = self.preprocess(image)
        outputs = self.session.run(None, {"images": input_tensor})
        return self.postprocess(outputs[0], image.shape[:2], scale, padding)