File size: 5,575 Bytes
993d81c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | #!/usr/bin/env python3
"""
RTMPose-M axengine inference on AXERA NPU.
"""
import argparse
import os
from time import time
from typing import Tuple
import cv2
import numpy as np
try:
import axengine as axe
except ImportError:
import onnxruntime as axe
SIMCC_SPLIT_RATIO = 2.0
NUM_KP = 17
COCO_SKELETON = [
(15, 13), (13, 11), (16, 14), (14, 12), (11, 12),
(5, 11), (6, 12), (5, 6), (5, 7), (6, 8),
(7, 9), (8, 10), (1, 2), (0, 1), (0, 2),
(1, 3), (2, 4), (3, 5), (4, 6),
]
def bbox_xyxy2cs(bbox: np.ndarray, padding: float = 1.0) -> Tuple[np.ndarray, np.ndarray]:
x1, y1, x2, y2 = bbox
center = np.array([(x1 + x2) * 0.5, (y1 + y2) * 0.5], dtype=np.float32)
scale = np.array([(x2 - x1) * padding, (y2 - y1) * padding], dtype=np.float32)
return center, scale
def _fix_aspect_ratio(bbox_scale: np.ndarray, aspect_ratio: float) -> np.ndarray:
w, h = bbox_scale
if w > h * aspect_ratio:
return np.array([w, w / aspect_ratio], dtype=np.float32)
else:
return np.array([h * aspect_ratio, h], dtype=np.float32)
def _rotate_point(pt: np.ndarray, angle_rad: float) -> np.ndarray:
sn, cs = np.sin(angle_rad), np.cos(angle_rad)
return np.array([cs * pt[0] - sn * pt[1], sn * pt[0] + cs * pt[1]])
def _get_3rd_point(a: np.ndarray, b: np.ndarray) -> np.ndarray:
direction = a - b
return b + np.r_[-direction[1], direction[0]]
def get_warp_matrix(center, scale, rot, output_size):
src_w = scale[0]
dst_w, dst_h = output_size
rot_rad = np.deg2rad(rot)
src_dir = _rotate_point(np.array([0.0, src_w * -0.5]), rot_rad)
dst_dir = np.array([0.0, dst_w * -0.5])
src_points = np.zeros((3, 2), dtype=np.float32)
src_points[0] = center
src_points[1] = center + src_dir
src_points[2] = _get_3rd_point(src_points[0], src_points[1])
dst_points = np.zeros((3, 2), dtype=np.float32)
dst_points[0] = [dst_w * 0.5, dst_h * 0.5]
dst_points[1] = [dst_w * 0.5, dst_h * 0.5] + dst_dir
dst_points[2] = _get_3rd_point(dst_points[0], dst_points[1])
return cv2.getAffineTransform(src_points, dst_points)
def preprocess(img_bgr, input_size=(192, 256)):
h, w = img_bgr.shape[:2]
bbox = np.array([0, 0, w, h], dtype=np.float32)
center, scale = bbox_xyxy2cs(bbox, padding=1.25)
scale = _fix_aspect_ratio(scale, input_size[0] / input_size[1])
warp_mat = get_warp_matrix(center, scale, 0, input_size)
img_warped = cv2.warpAffine(img_bgr, warp_mat, input_size, flags=cv2.INTER_LINEAR)
inp = img_warped[None] # (1, H, W, 3) uint8 NHWC BGR, axmodel handles BGR->RGB
return inp, center, scale
def get_simcc_maximum(simcc_x, simcc_y):
N, K, Wx = simcc_x.shape
x_locs = np.argmax(simcc_x, axis=2)
y_locs = np.argmax(simcc_y, axis=2)
x_vals = np.take_along_axis(simcc_x, x_locs[:, :, None], axis=2).squeeze(2)
y_vals = np.take_along_axis(simcc_y, y_locs[:, :, None], axis=2).squeeze(2)
locs = np.stack([x_locs, y_locs], axis=-1).astype(np.float32)
scores = np.minimum(x_vals, y_vals)
return locs, scores
def draw(img, keypoints, scores, thr=0.3):
for i, ((x, y), s) in enumerate(zip(keypoints, scores)):
if s < thr:
continue
cv2.circle(img, (int(x), int(y)), 4, (0, 255, 0), -1)
for i, j in COCO_SKELETON:
if scores[i] >= thr and scores[j] >= thr:
pt1 = (int(keypoints[i][0]), int(keypoints[i][1]))
pt2 = (int(keypoints[j][0]), int(keypoints[j][1]))
cv2.line(img, pt1, pt2, (255, 128, 0), 2)
def main():
ap = argparse.ArgumentParser()
ap.add_argument("-m", "--model", default="output/rtmpose_m_npu3.axmodel")
ap.add_argument("-i", "--image", required=True)
ap.add_argument("-o", "--output", default="ax_result.jpg")
ap.add_argument("--score_thres", type=float, default=0.3)
ap.add_argument("--warmup", type=int, default=3)
ap.add_argument("--repeat", type=int, default=10)
args = ap.parse_args()
img0 = cv2.imread(args.image)
assert img0 is not None, f"Cannot read {args.image}"
inp, center, scale = preprocess(img0)
input_size = (192, 256)
model = axe.InferenceSession(args.model)
inp_info = model.get_inputs()[0]
dtype_str = getattr(inp_info, "dtype", getattr(inp_info, "type", "unknown"))
print(f"Model input: name={inp_info.name}, shape={inp_info.shape}, dtype={dtype_str}")
for _ in range(args.warmup):
model.run(None, {inp_info.name: inp})
t0 = time()
for _ in range(args.repeat):
outputs = model.run(None, {inp_info.name: inp})
elapsed = (time() - t0) / args.repeat * 1000
print(f"Forward: {elapsed:.2f} ms (avg of {args.repeat} runs)")
simcc_x, simcc_y = outputs[0], outputs[1]
print(f"simcc_x: shape={simcc_x.shape}, range=[{simcc_x.min():.2f}, {simcc_x.max():.2f}]")
print(f"simcc_y: shape={simcc_y.shape}, range=[{simcc_y.min():.2f}, {simcc_y.max():.2f}]")
locs, scores = get_simcc_maximum(simcc_x, simcc_y)
keypoints = locs / SIMCC_SPLIT_RATIO
keypoints = keypoints / np.array(input_size) * scale + center - scale / 2
keypoints = keypoints[0]
scores = scores[0]
above = (scores >= args.score_thres).sum()
print(f"kpts above {args.score_thres}: {above}/{NUM_KP}")
for i, ((x, y), sc) in enumerate(zip(keypoints, scores)):
print(f" kp{i:02d}: ({x:6.1f}, {y:6.1f}) score={sc:.4f}")
draw(img0, keypoints, scores, args.score_thres)
cv2.imwrite(args.output, img0)
print(f"Saved: {args.output}")
if __name__ == "__main__":
main()
|