AXERA-TECH
/

YOLO11

+import axengine as axe
+import numpy as np
+import cv2
+import argparse
+from dataclasses import dataclass
+# COCO Class Names
+COCO_CLASSES = [
+    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+    'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
+    'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
+    'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
+    'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
+    'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+    'kite', 'baseball bat', 'baseball glove', 'skateboard',
+    'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
+    'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+    'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
+    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
+    'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
+    'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
+    'teddy bear', 'hair drier', 'toothbrush'
+]
+@dataclass
+class Object:
+    bbox: list  # [x0, y0, width, height]
+    label: int
+    prob: float
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+def softmax(x, axis=-1):
+    x = x - np.max(x, axis=axis, keepdims=True)
+    e_x = np.exp(x)
+    return e_x / np.sum(e_x, axis=axis, keepdims=True)
+def decode_distributions(feat, reg_max=16):
+    prob = softmax(feat, axis=-1)
+    dis = np.sum(prob * np.arange(reg_max), axis=-1)
+    return dis
+def preprocess(image_path, input_size):
+    image = cv2.imread(image_path)
+    if image is None:
+        raise FileNotFoundError(f"Unable to read image file: {image_path}")
+    original_shape = image.shape[:2]
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    resized_image = cv2.resize(image, input_size)
+    input_tensor = np.expand_dims(resized_image, axis=0).astype(np.uint8)
+    return input_tensor, original_shape, image
+def postprocess(outputs, original_shape, input_size, confidence_threshold, nms_threshold, reg_max=16):
+    heads = [
+        {'output': outputs[0], 'grid_size': input_size[0] // 8, 'stride': 8},
+        {'output': outputs[1], 'grid_size': input_size[0] // 16, 'stride': 16},
+        {'output': outputs[2], 'grid_size': input_size[0] // 32, 'stride': 32}
+    ]
+    detections = []
+    num_classes = 80
+    bbox_channels = 4 * reg_max
+    class_channels = num_classes
+    for head in heads:
+        output = head['output']
+        batch_size, grid_h, grid_w, channels = output.shape
+        stride = head['stride']
+        bbox_part = output[:, :, :, :bbox_channels]
+        class_part = output[:, :, :, bbox_channels:]
+        bbox_part = bbox_part.reshape(batch_size, grid_h, grid_w, 4, reg_max)
+        bbox_part = bbox_part.reshape(grid_h * grid_w, 4, reg_max)
+        class_part = class_part.reshape(batch_size, grid_h * grid_w, class_channels)
+        for b in range(batch_size):
+            for i in range(grid_h * grid_w):
+                h = i // grid_w
+                w = i % grid_w
+                class_scores = class_part[b, i, :]
+                class_id = np.argmax(class_scores)
+                class_score = class_scores[class_id]
+                box_prob = sigmoid(class_score)
+                if box_prob < confidence_threshold:
+                    continue
+                bbox = bbox_part[i, :, :]
+                dis_left = decode_distributions(bbox[0, :], reg_max)
+                dis_top = decode_distributions(bbox[1, :], reg_max)
+                dis_right = decode_distributions(bbox[2, :], reg_max)
+                dis_bottom = decode_distributions(bbox[3, :], reg_max)
+                pb_cx = (w + 0.5) * stride
+                pb_cy = (h + 0.5) * stride
+                x0 = pb_cx - dis_left * stride
+                y0 = pb_cy - dis_top * stride
+                x1 = pb_cx + dis_right * stride
+                y1 = pb_cy + dis_bottom * stride
+                scale_x = original_shape[1] / input_size[0]
+                scale_y = original_shape[0] / input_size[1]
+                x0 = np.clip(x0 * scale_x, 0, original_shape[1] - 1)
+                y0 = np.clip(y0 * scale_y, 0, original_shape[0] - 1)
+                x1 = np.clip(x1 * scale_x, 0, original_shape[1] - 1)
+                y1 = np.clip(y1 * scale_y, 0, original_shape[0] - 1)
+                width = x1 - x0
+                height = y1 - y0
+                detections.append(Object(
+                    bbox=[float(x0), float(y0), float(width), float(height)],
+                    label=int(class_id),
+                    prob=float(box_prob)
+                ))
+    if len(detections) == 0:
+        return []
+    boxes = np.array([d.bbox for d in detections])
+    scores = np.array([d.prob for d in detections])
+    class_ids = np.array([d.label for d in detections])
+    final_detections = []
+    unique_classes = np.unique(class_ids)
+    for cls in unique_classes:
+        idxs = np.where(class_ids == cls)[0]
+        cls_boxes = boxes[idxs]
+        cls_scores = scores[idxs]
+        x1_cls = cls_boxes[:, 0]
+        y1_cls = cls_boxes[:, 1]
+        x2_cls = cls_boxes[:, 0] + cls_boxes[:, 2]
+        y2_cls = cls_boxes[:, 1] + cls_boxes[:, 3]
+        areas = (x2_cls - x1_cls) * (y2_cls - y1_cls)
+        order = cls_scores.argsort()[::-1]
+        keep = []
+        while order.size > 0:
+            i = order[0]
+            keep.append(i)
+            if order.size == 1:
+                break
+            xx1 = np.maximum(x1_cls[i], x1_cls[order[1:]])
+            yy1 = np.maximum(y1_cls[i], y1_cls[order[1:]])
+            xx2 = np.minimum(x2_cls[i], x2_cls[order[1:]])
+            yy2 = np.minimum(y2_cls[i], y2_cls[order[1:]])
+            w = np.maximum(0, xx2 - xx1)
+            h = np.maximum(0, yy2 - yy1)
+            intersection = w * h
+            iou = intersection / (areas[i] + areas[order[1:]] - intersection)
+            inds = np.where(iou <= nms_threshold)[0]
+            order = order[inds + 1]
+        for idx in keep:
+            final_detections.append(Object(
+                bbox=cls_boxes[idx].tolist(),
+                label=int(cls),
+                prob=float(cls_scores[idx])
+            ))
+    return final_detections
+def main():
+    parser = argparse.ArgumentParser(description="YOLO11 AXEngine Inference")
+    parser.add_argument('--model', type=str, default='yolo11x.axmodel', help='Model path')
+    parser.add_argument('--image', type=str, default='dog.jpg', help='Image path')
+    parser.add_argument('--conf', type=float, default=0.45, help='Confidence threshold')
+    parser.add_argument('--nms', type=float, default=0.45, help='NMS threshold')
+    parser.add_argument('--size', type=int, nargs=2, default=[640, 640], help='Input size W H')
+    parser.add_argument('--regmax', type=int, default=16, help='DFL reg_max value')
+    args = parser.parse_args()
+    try:
+        input_tensor, original_shape, original_image = preprocess(args.image, tuple(args.size))
+    except FileNotFoundError as e:
+        print(e)
+        return
+    try:
+        session = axe.InferenceSession(args.model)
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        return
+    input_name = session.get_inputs()[0].name
+    output_names = [output.name for output in session.get_outputs()]
+    try:
+        outputs = session.run(output_names, {input_name: input_tensor})
+    except Exception as e:
+        print(f"Error during inference: {e}")
+        return
+    try:
+        detections = postprocess(
+            outputs,
+            original_shape,
+            tuple(args.size),
+            args.conf,
+            args.nms,
+            reg_max=args.regmax
+        )
+    except Exception as e:
+        print(f"Error during post-processing: {e}")
+        return
+    for det in detections:
+        bbox = det.bbox
+        score = det.prob
+        class_id = det.label
+        if class_id >= len(COCO_CLASSES):
+            label = f"cls{class_id}: {score:.2f}"
+        else:
+            label = f"{COCO_CLASSES[class_id]}: {score:.2f}"
+        x, y, w, h = map(int, bbox)
+        cv2.rectangle(original_image, (x, y), (x + w, y + h), (0, 255, 0), 2)
+        cv2.putText(original_image, label, (x, y - 10),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+    cv2.imwrite('detections.png', cv2.cvtColor(original_image, cv2.COLOR_RGB2BGR))
+    print("结果已保存到 detections.png")
+if __name__ == '__main__':
+    main()