model / inference.py
Denny Lulak
Fix
fa35c9b
import numpy as np
import cv2
import onnxruntime as ort
from typing import List, Dict, Tuple
class ObjectDetector:
def __init__(self, model_path: str, class_names: List[str], input_size: int = 640):
self.class_names = class_names
self.input_size = input_size
self.session = self._load_model(model_path)
self._warmup()
def _load_model(self, model_path: str) -> ort.InferenceSession:
options = ort.SessionOptions()
options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
return ort.InferenceSession(
model_path,
providers=['CUDAExecutionProvider', 'CPUExecutionProvider'],
sess_options=options
)
def _warmup(self):
dummy_input = np.random.randn(1, 3, self.input_size, self.input_size).astype(np.float32)
self.session.run(None, {"images": dummy_input})
@staticmethod
def compute_iou(box: np.ndarray, boxes: np.ndarray) -> np.ndarray:
xmin = np.maximum(box[0], boxes[:, 0])
ymin = np.maximum(box[1], boxes[:, 1])
xmax = np.minimum(box[2], boxes[:, 2])
ymax = np.minimum(box[3], boxes[:, 3])
intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)
box_area = (box[2] - box[0]) * (box[3] - box[1])
boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
return intersection_area / (box_area + boxes_area - intersection_area + 1e-6)
@staticmethod
def nms(boxes: np.ndarray, scores: np.ndarray, iou_threshold: float) -> List[int]:
sorted_indices = np.argsort(scores)[::-1]
keep_boxes = []
while sorted_indices.size > 0:
box_id = sorted_indices[0]
keep_boxes.append(box_id)
ious = ObjectDetector.compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])
keep_indices = np.where(ious < iou_threshold)[0]
sorted_indices = sorted_indices[keep_indices + 1]
return keep_boxes
def preprocess(self, image: np.ndarray) -> Tuple[np.ndarray, float, Tuple[int, int]]:
h, w = image.shape[:2]
scale = min(self.input_size / h, self.input_size / w)
new_h, new_w = int(h * scale), int(w * scale)
resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
canvas = np.full((self.input_size, self.input_size, 3), 114, dtype=np.uint8)
ph, pw = (self.input_size - new_h) // 2, (self.input_size - new_w) // 2
canvas[ph:ph+new_h, pw:pw+new_w] = resized
blob = canvas.astype(np.float32) / 255.0
return blob.transpose(2, 0, 1)[None, ...], scale, (pw, ph)
def postprocess(
self,
predictions: np.ndarray,
original_shape: Tuple[int, int],
scale: float,
padding: Tuple[int, int],
conf_threshold: float = 0.3,
iou_threshold: float = 0.45
) -> List[Dict]:
predictions = np.squeeze(predictions).T
scores = np.max(predictions[:, 4:], axis=1)
valid = scores > conf_threshold
predictions = predictions[valid]
if predictions.size == 0:
return []
boxes = predictions[:, :4]
boxes[:, [0, 1]] = boxes[:, [0, 1]] - boxes[:, [2, 3]] / 2
boxes[:, [2, 3]] = boxes[:, [0, 1]] + boxes[:, [2, 3]]
pad_w, pad_h = padding
boxes[:, [0, 2]] = (boxes[:, [0, 2]] - pad_w) / scale
boxes[:, [1, 3]] = (boxes[:, [1, 3]] - pad_h) / scale
h, w = original_shape
boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, w)
boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, h)
class_ids = np.argmax(predictions[:, 4:], axis=1)
indices = self.nms(boxes, scores[valid], iou_threshold)
return [{
"class": self.class_names[int(class_ids[i])],
"confidence": float(scores[valid][i]),
"bbox": boxes[i].tolist(),
"bbox_normalized": [
float((boxes[i][0] + boxes[i][2])/2 / w),
float((boxes[i][1] + boxes[i][3])/2 / h),
float((boxes[i][2] - boxes[i][0]) / w),
float((boxes[i][3] - boxes[i][1]) / h)
]
} for i in indices]
def predict(self, image: np.ndarray) -> List[Dict]:
"""Main prediction method"""
input_tensor, scale, padding = self.preprocess(image)
outputs = self.session.run(None, {"images": input_tensor})
return self.postprocess(outputs[0], image.shape[:2], scale, padding)