Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import cv2 | |
| import onnxruntime as ort | |
| from typing import List, Dict, Tuple | |
| class ObjectDetector: | |
| def __init__(self, model_path: str, class_names: List[str], input_size: int = 640): | |
| self.class_names = class_names | |
| self.input_size = input_size | |
| self.session = self._load_model(model_path) | |
| self._warmup() | |
| def _load_model(self, model_path: str) -> ort.InferenceSession: | |
| options = ort.SessionOptions() | |
| options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL | |
| return ort.InferenceSession( | |
| model_path, | |
| providers=['CUDAExecutionProvider', 'CPUExecutionProvider'], | |
| sess_options=options | |
| ) | |
| def _warmup(self): | |
| dummy_input = np.random.randn(1, 3, self.input_size, self.input_size).astype(np.float32) | |
| self.session.run(None, {"images": dummy_input}) | |
| def compute_iou(box: np.ndarray, boxes: np.ndarray) -> np.ndarray: | |
| xmin = np.maximum(box[0], boxes[:, 0]) | |
| ymin = np.maximum(box[1], boxes[:, 1]) | |
| xmax = np.minimum(box[2], boxes[:, 2]) | |
| ymax = np.minimum(box[3], boxes[:, 3]) | |
| intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin) | |
| box_area = (box[2] - box[0]) * (box[3] - box[1]) | |
| boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) | |
| return intersection_area / (box_area + boxes_area - intersection_area + 1e-6) | |
| def nms(boxes: np.ndarray, scores: np.ndarray, iou_threshold: float) -> List[int]: | |
| sorted_indices = np.argsort(scores)[::-1] | |
| keep_boxes = [] | |
| while sorted_indices.size > 0: | |
| box_id = sorted_indices[0] | |
| keep_boxes.append(box_id) | |
| ious = ObjectDetector.compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :]) | |
| keep_indices = np.where(ious < iou_threshold)[0] | |
| sorted_indices = sorted_indices[keep_indices + 1] | |
| return keep_boxes | |
| def preprocess(self, image: np.ndarray) -> Tuple[np.ndarray, float, Tuple[int, int]]: | |
| h, w = image.shape[:2] | |
| scale = min(self.input_size / h, self.input_size / w) | |
| new_h, new_w = int(h * scale), int(w * scale) | |
| resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR) | |
| canvas = np.full((self.input_size, self.input_size, 3), 114, dtype=np.uint8) | |
| ph, pw = (self.input_size - new_h) // 2, (self.input_size - new_w) // 2 | |
| canvas[ph:ph+new_h, pw:pw+new_w] = resized | |
| blob = canvas.astype(np.float32) / 255.0 | |
| return blob.transpose(2, 0, 1)[None, ...], scale, (pw, ph) | |
| def postprocess( | |
| self, | |
| predictions: np.ndarray, | |
| original_shape: Tuple[int, int], | |
| scale: float, | |
| padding: Tuple[int, int], | |
| conf_threshold: float = 0.3, | |
| iou_threshold: float = 0.45 | |
| ) -> List[Dict]: | |
| predictions = np.squeeze(predictions).T | |
| scores = np.max(predictions[:, 4:], axis=1) | |
| valid = scores > conf_threshold | |
| predictions = predictions[valid] | |
| if predictions.size == 0: | |
| return [] | |
| boxes = predictions[:, :4] | |
| boxes[:, [0, 1]] = boxes[:, [0, 1]] - boxes[:, [2, 3]] / 2 | |
| boxes[:, [2, 3]] = boxes[:, [0, 1]] + boxes[:, [2, 3]] | |
| pad_w, pad_h = padding | |
| boxes[:, [0, 2]] = (boxes[:, [0, 2]] - pad_w) / scale | |
| boxes[:, [1, 3]] = (boxes[:, [1, 3]] - pad_h) / scale | |
| h, w = original_shape | |
| boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, w) | |
| boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, h) | |
| class_ids = np.argmax(predictions[:, 4:], axis=1) | |
| indices = self.nms(boxes, scores[valid], iou_threshold) | |
| return [{ | |
| "class": self.class_names[int(class_ids[i])], | |
| "confidence": float(scores[valid][i]), | |
| "bbox": boxes[i].tolist(), | |
| "bbox_normalized": [ | |
| float((boxes[i][0] + boxes[i][2])/2 / w), | |
| float((boxes[i][1] + boxes[i][3])/2 / h), | |
| float((boxes[i][2] - boxes[i][0]) / w), | |
| float((boxes[i][3] - boxes[i][1]) / h) | |
| ] | |
| } for i in indices] | |
| def predict(self, image: np.ndarray) -> List[Dict]: | |
| """Main prediction method""" | |
| input_tensor, scale, padding = self.preprocess(image) | |
| outputs = self.session.run(None, {"images": input_tensor}) | |
| return self.postprocess(outputs[0], image.shape[:2], scale, padding) |