Upload 13 files
Browse files- .gitattributes +5 -0
- AX650/rtmpose_m_npu1.axmodel +3 -0
- AX650/rtmpose_m_npu3.axmodel +3 -0
- ax_infer.py +162 -0
- ax_result.jpg +3 -0
- config.json +235 -0
- export_onnx.py +101 -0
- onnx_infer.py +155 -0
- replace_hardsigmoid.py +113 -0
- result_onnx.jpg +3 -0
- rtmpose_cali.tar +3 -0
- rtmpose_m_256x192.onnx +3 -0
- rtmpose_m_256x192_no_hs.onnx +3 -0
- test.jpg +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
ax_result.jpg filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
AX650/rtmpose_m_npu1.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
AX650/rtmpose_m_npu3.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
result_onnx.jpg filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
test.jpg filter=lfs diff=lfs merge=lfs -text
|
AX650/rtmpose_m_npu1.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4ebc503bdf788009706c60876b3ecd6ed12a833888286868f4cf4208a1eb91e
|
| 3 |
+
size 18848816
|
AX650/rtmpose_m_npu3.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:17313d5d20b0070672bf8617960162ea2ac6f7b8da12b42b5bb4974c45570fa7
|
| 3 |
+
size 18247844
|
ax_infer.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
RTMPose-M axengine inference on AXERA NPU.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import argparse
|
| 7 |
+
import os
|
| 8 |
+
from time import time
|
| 9 |
+
from typing import Tuple
|
| 10 |
+
|
| 11 |
+
import cv2
|
| 12 |
+
import numpy as np
|
| 13 |
+
|
| 14 |
+
try:
|
| 15 |
+
import axengine as axe
|
| 16 |
+
except ImportError:
|
| 17 |
+
import onnxruntime as axe
|
| 18 |
+
|
| 19 |
+
SIMCC_SPLIT_RATIO = 2.0
|
| 20 |
+
NUM_KP = 17
|
| 21 |
+
COCO_SKELETON = [
|
| 22 |
+
(15, 13), (13, 11), (16, 14), (14, 12), (11, 12),
|
| 23 |
+
(5, 11), (6, 12), (5, 6), (5, 7), (6, 8),
|
| 24 |
+
(7, 9), (8, 10), (1, 2), (0, 1), (0, 2),
|
| 25 |
+
(1, 3), (2, 4), (3, 5), (4, 6),
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def bbox_xyxy2cs(bbox: np.ndarray, padding: float = 1.0) -> Tuple[np.ndarray, np.ndarray]:
|
| 30 |
+
x1, y1, x2, y2 = bbox
|
| 31 |
+
center = np.array([(x1 + x2) * 0.5, (y1 + y2) * 0.5], dtype=np.float32)
|
| 32 |
+
scale = np.array([(x2 - x1) * padding, (y2 - y1) * padding], dtype=np.float32)
|
| 33 |
+
return center, scale
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def _fix_aspect_ratio(bbox_scale: np.ndarray, aspect_ratio: float) -> np.ndarray:
|
| 37 |
+
w, h = bbox_scale
|
| 38 |
+
if w > h * aspect_ratio:
|
| 39 |
+
return np.array([w, w / aspect_ratio], dtype=np.float32)
|
| 40 |
+
else:
|
| 41 |
+
return np.array([h * aspect_ratio, h], dtype=np.float32)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def _rotate_point(pt: np.ndarray, angle_rad: float) -> np.ndarray:
|
| 45 |
+
sn, cs = np.sin(angle_rad), np.cos(angle_rad)
|
| 46 |
+
return np.array([cs * pt[0] - sn * pt[1], sn * pt[0] + cs * pt[1]])
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _get_3rd_point(a: np.ndarray, b: np.ndarray) -> np.ndarray:
|
| 50 |
+
direction = a - b
|
| 51 |
+
return b + np.r_[-direction[1], direction[0]]
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def get_warp_matrix(center, scale, rot, output_size):
|
| 55 |
+
src_w = scale[0]
|
| 56 |
+
dst_w, dst_h = output_size
|
| 57 |
+
|
| 58 |
+
rot_rad = np.deg2rad(rot)
|
| 59 |
+
src_dir = _rotate_point(np.array([0.0, src_w * -0.5]), rot_rad)
|
| 60 |
+
dst_dir = np.array([0.0, dst_w * -0.5])
|
| 61 |
+
|
| 62 |
+
src_points = np.zeros((3, 2), dtype=np.float32)
|
| 63 |
+
src_points[0] = center
|
| 64 |
+
src_points[1] = center + src_dir
|
| 65 |
+
src_points[2] = _get_3rd_point(src_points[0], src_points[1])
|
| 66 |
+
|
| 67 |
+
dst_points = np.zeros((3, 2), dtype=np.float32)
|
| 68 |
+
dst_points[0] = [dst_w * 0.5, dst_h * 0.5]
|
| 69 |
+
dst_points[1] = [dst_w * 0.5, dst_h * 0.5] + dst_dir
|
| 70 |
+
dst_points[2] = _get_3rd_point(dst_points[0], dst_points[1])
|
| 71 |
+
|
| 72 |
+
return cv2.getAffineTransform(src_points, dst_points)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def preprocess(img_bgr, input_size=(192, 256)):
|
| 76 |
+
h, w = img_bgr.shape[:2]
|
| 77 |
+
bbox = np.array([0, 0, w, h], dtype=np.float32)
|
| 78 |
+
center, scale = bbox_xyxy2cs(bbox, padding=1.25)
|
| 79 |
+
scale = _fix_aspect_ratio(scale, input_size[0] / input_size[1])
|
| 80 |
+
|
| 81 |
+
warp_mat = get_warp_matrix(center, scale, 0, input_size)
|
| 82 |
+
img_warped = cv2.warpAffine(img_bgr, warp_mat, input_size, flags=cv2.INTER_LINEAR)
|
| 83 |
+
|
| 84 |
+
inp = img_warped[None] # (1, H, W, 3) uint8 NHWC BGR, axmodel handles BGR->RGB
|
| 85 |
+
return inp, center, scale
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def get_simcc_maximum(simcc_x, simcc_y):
|
| 89 |
+
N, K, Wx = simcc_x.shape
|
| 90 |
+
x_locs = np.argmax(simcc_x, axis=2)
|
| 91 |
+
y_locs = np.argmax(simcc_y, axis=2)
|
| 92 |
+
x_vals = np.take_along_axis(simcc_x, x_locs[:, :, None], axis=2).squeeze(2)
|
| 93 |
+
y_vals = np.take_along_axis(simcc_y, y_locs[:, :, None], axis=2).squeeze(2)
|
| 94 |
+
locs = np.stack([x_locs, y_locs], axis=-1).astype(np.float32)
|
| 95 |
+
scores = np.minimum(x_vals, y_vals)
|
| 96 |
+
return locs, scores
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def draw(img, keypoints, scores, thr=0.3):
|
| 100 |
+
for i, ((x, y), s) in enumerate(zip(keypoints, scores)):
|
| 101 |
+
if s < thr:
|
| 102 |
+
continue
|
| 103 |
+
cv2.circle(img, (int(x), int(y)), 4, (0, 255, 0), -1)
|
| 104 |
+
for i, j in COCO_SKELETON:
|
| 105 |
+
if scores[i] >= thr and scores[j] >= thr:
|
| 106 |
+
pt1 = (int(keypoints[i][0]), int(keypoints[i][1]))
|
| 107 |
+
pt2 = (int(keypoints[j][0]), int(keypoints[j][1]))
|
| 108 |
+
cv2.line(img, pt1, pt2, (255, 128, 0), 2)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def main():
|
| 112 |
+
ap = argparse.ArgumentParser()
|
| 113 |
+
ap.add_argument("-m", "--model", default="output/rtmpose_m_npu3.axmodel")
|
| 114 |
+
ap.add_argument("-i", "--image", required=True)
|
| 115 |
+
ap.add_argument("-o", "--output", default="ax_result.jpg")
|
| 116 |
+
ap.add_argument("--score_thres", type=float, default=0.3)
|
| 117 |
+
ap.add_argument("--warmup", type=int, default=3)
|
| 118 |
+
ap.add_argument("--repeat", type=int, default=10)
|
| 119 |
+
args = ap.parse_args()
|
| 120 |
+
|
| 121 |
+
img0 = cv2.imread(args.image)
|
| 122 |
+
assert img0 is not None, f"Cannot read {args.image}"
|
| 123 |
+
|
| 124 |
+
inp, center, scale = preprocess(img0)
|
| 125 |
+
input_size = (192, 256)
|
| 126 |
+
|
| 127 |
+
model = axe.InferenceSession(args.model)
|
| 128 |
+
inp_info = model.get_inputs()[0]
|
| 129 |
+
dtype_str = getattr(inp_info, "dtype", getattr(inp_info, "type", "unknown"))
|
| 130 |
+
print(f"Model input: name={inp_info.name}, shape={inp_info.shape}, dtype={dtype_str}")
|
| 131 |
+
|
| 132 |
+
for _ in range(args.warmup):
|
| 133 |
+
model.run(None, {inp_info.name: inp})
|
| 134 |
+
|
| 135 |
+
t0 = time()
|
| 136 |
+
for _ in range(args.repeat):
|
| 137 |
+
outputs = model.run(None, {inp_info.name: inp})
|
| 138 |
+
elapsed = (time() - t0) / args.repeat * 1000
|
| 139 |
+
print(f"Forward: {elapsed:.2f} ms (avg of {args.repeat} runs)")
|
| 140 |
+
|
| 141 |
+
simcc_x, simcc_y = outputs[0], outputs[1]
|
| 142 |
+
print(f"simcc_x: shape={simcc_x.shape}, range=[{simcc_x.min():.2f}, {simcc_x.max():.2f}]")
|
| 143 |
+
print(f"simcc_y: shape={simcc_y.shape}, range=[{simcc_y.min():.2f}, {simcc_y.max():.2f}]")
|
| 144 |
+
|
| 145 |
+
locs, scores = get_simcc_maximum(simcc_x, simcc_y)
|
| 146 |
+
keypoints = locs / SIMCC_SPLIT_RATIO
|
| 147 |
+
keypoints = keypoints / np.array(input_size) * scale + center - scale / 2
|
| 148 |
+
keypoints = keypoints[0]
|
| 149 |
+
scores = scores[0]
|
| 150 |
+
|
| 151 |
+
above = (scores >= args.score_thres).sum()
|
| 152 |
+
print(f"kpts above {args.score_thres}: {above}/{NUM_KP}")
|
| 153 |
+
for i, ((x, y), sc) in enumerate(zip(keypoints, scores)):
|
| 154 |
+
print(f" kp{i:02d}: ({x:6.1f}, {y:6.1f}) score={sc:.4f}")
|
| 155 |
+
|
| 156 |
+
draw(img0, keypoints, scores, args.score_thres)
|
| 157 |
+
cv2.imwrite(args.output, img0)
|
| 158 |
+
print(f"Saved: {args.output}")
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
if __name__ == "__main__":
|
| 162 |
+
main()
|
ax_result.jpg
ADDED
|
Git LFS Details
|
config.json
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_type": "ONNX",
|
| 3 |
+
"npu_mode": "NPU3",
|
| 4 |
+
"input_shapes": "input:1x3x256x192",
|
| 5 |
+
"quant": {
|
| 6 |
+
"input_configs": [
|
| 7 |
+
{
|
| 8 |
+
"tensor_name": "DEFAULT",
|
| 9 |
+
"calibration_dataset": "./rtmpose_cali.tar",
|
| 10 |
+
"calibration_size": 100,
|
| 11 |
+
"calibration_mean": [
|
| 12 |
+
123.675,
|
| 13 |
+
116.28,
|
| 14 |
+
103.53
|
| 15 |
+
],
|
| 16 |
+
"calibration_std": [
|
| 17 |
+
58.395,
|
| 18 |
+
57.12,
|
| 19 |
+
57.375
|
| 20 |
+
]
|
| 21 |
+
}
|
| 22 |
+
],
|
| 23 |
+
"calibration_method": "MSE",
|
| 24 |
+
"layer_configs": [
|
| 25 |
+
{
|
| 26 |
+
"layer_names": [
|
| 27 |
+
"Conv_0",
|
| 28 |
+
"Conv_3",
|
| 29 |
+
"Conv_6",
|
| 30 |
+
"Conv_9",
|
| 31 |
+
"Conv_12",
|
| 32 |
+
"Conv_15",
|
| 33 |
+
"Conv_18",
|
| 34 |
+
"Conv_21",
|
| 35 |
+
"Conv_24",
|
| 36 |
+
"Conv_28",
|
| 37 |
+
"Conv_31",
|
| 38 |
+
"Conv_34",
|
| 39 |
+
"Conv_46",
|
| 40 |
+
"Conv_55",
|
| 41 |
+
"Conv_58",
|
| 42 |
+
"Conv_65",
|
| 43 |
+
"Conv_68",
|
| 44 |
+
"Conv_75",
|
| 45 |
+
"Conv_78",
|
| 46 |
+
"Conv_85",
|
| 47 |
+
"Conv_88",
|
| 48 |
+
"Conv_103",
|
| 49 |
+
"Conv_112",
|
| 50 |
+
"Conv_115",
|
| 51 |
+
"Conv_122",
|
| 52 |
+
"Conv_125",
|
| 53 |
+
"Conv_132",
|
| 54 |
+
"Conv_135",
|
| 55 |
+
"Conv_142",
|
| 56 |
+
"Conv_145",
|
| 57 |
+
"Conv_160",
|
| 58 |
+
"Conv_179",
|
| 59 |
+
"Conv_182",
|
| 60 |
+
"Conv_188",
|
| 61 |
+
"Conv_191",
|
| 62 |
+
"Conv_205",
|
| 63 |
+
"Conv_40",
|
| 64 |
+
"Conv_43",
|
| 65 |
+
"Conv_49",
|
| 66 |
+
"Conv_52",
|
| 67 |
+
"Conv_71",
|
| 68 |
+
"Conv_81",
|
| 69 |
+
"Conv_91",
|
| 70 |
+
"Conv_97",
|
| 71 |
+
"Conv_100",
|
| 72 |
+
"Conv_106",
|
| 73 |
+
"Conv_109",
|
| 74 |
+
"Conv_118",
|
| 75 |
+
"Conv_128",
|
| 76 |
+
"Conv_138",
|
| 77 |
+
"Conv_148",
|
| 78 |
+
"Conv_154",
|
| 79 |
+
"Conv_157",
|
| 80 |
+
"Conv_163",
|
| 81 |
+
"Conv_170",
|
| 82 |
+
"Conv_173",
|
| 83 |
+
"Conv_176",
|
| 84 |
+
"Conv_185",
|
| 85 |
+
"Conv_194",
|
| 86 |
+
"Conv_199",
|
| 87 |
+
"Conv_202",
|
| 88 |
+
"GlobalAveragePool_39",
|
| 89 |
+
"GlobalAveragePool_153",
|
| 90 |
+
"GlobalAveragePool_198",
|
| 91 |
+
"Concat_152",
|
| 92 |
+
"Concat_169",
|
| 93 |
+
"op_1:onnx.Silu",
|
| 94 |
+
"op_2:onnx.Silu",
|
| 95 |
+
"op_5:onnx.Silu",
|
| 96 |
+
"op_6:onnx.Silu",
|
| 97 |
+
"op_7:onnx.Silu",
|
| 98 |
+
"op_8:onnx.Silu",
|
| 99 |
+
"op_9:onnx.Silu",
|
| 100 |
+
"op_10:onnx.Silu",
|
| 101 |
+
"op_11:onnx.Silu",
|
| 102 |
+
"op_12:onnx.Silu",
|
| 103 |
+
"op_13:onnx.Silu",
|
| 104 |
+
"op_15:onnx.Silu",
|
| 105 |
+
"op_16:onnx.Silu",
|
| 106 |
+
"op_17:onnx.Silu",
|
| 107 |
+
"op_18:onnx.Silu",
|
| 108 |
+
"op_19:onnx.Silu",
|
| 109 |
+
"op_20:onnx.Silu",
|
| 110 |
+
"op_21:onnx.Silu",
|
| 111 |
+
"op_23:onnx.Silu",
|
| 112 |
+
"op_25:onnx.Silu",
|
| 113 |
+
"op_26:onnx.Silu",
|
| 114 |
+
"op_27:onnx.Silu",
|
| 115 |
+
"op_29:onnx.Silu",
|
| 116 |
+
"op_30:onnx.Silu",
|
| 117 |
+
"op_34:onnx.Silu",
|
| 118 |
+
"op_49:onnx.Silu",
|
| 119 |
+
"op_50:onnx.Silu",
|
| 120 |
+
"op_52:onnx.Silu",
|
| 121 |
+
"op_54:onnx.Silu",
|
| 122 |
+
"op_56:onnx.Silu",
|
| 123 |
+
"op_57:onnx.Silu",
|
| 124 |
+
"op_58:onnx.Silu",
|
| 125 |
+
"Add_64",
|
| 126 |
+
"Add_74",
|
| 127 |
+
"Add_84",
|
| 128 |
+
"Add_94",
|
| 129 |
+
"Add_121",
|
| 130 |
+
"Add_131",
|
| 131 |
+
"Add_141",
|
| 132 |
+
"Add_151",
|
| 133 |
+
"MaxPool_166",
|
| 134 |
+
"MaxPool_167",
|
| 135 |
+
"MaxPool_168",
|
| 136 |
+
"Reshape_212",
|
| 137 |
+
"Split_233",
|
| 138 |
+
"Unsqueeze_234",
|
| 139 |
+
"Split_237",
|
| 140 |
+
"Squeeze_238",
|
| 141 |
+
"Squeeze_239",
|
| 142 |
+
"Transpose_240"
|
| 143 |
+
],
|
| 144 |
+
"data_type": "U16"
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"layer_names": [
|
| 148 |
+
"Conv_61",
|
| 149 |
+
"op_32:onnx.Silu",
|
| 150 |
+
"op_43:onnx.Silu",
|
| 151 |
+
"op_53:onnx.Silu",
|
| 152 |
+
"op_4:onnx.Silu",
|
| 153 |
+
"op_14:onnx.Silu",
|
| 154 |
+
"op_22:onnx.Silu",
|
| 155 |
+
"op_33:onnx.Silu",
|
| 156 |
+
"op_35:onnx.Silu",
|
| 157 |
+
"op_37:onnx.Silu",
|
| 158 |
+
"op_38:onnx.Silu",
|
| 159 |
+
"op_39:onnx.Silu",
|
| 160 |
+
"op_40:onnx.Silu",
|
| 161 |
+
"hs_replace_1_Mul",
|
| 162 |
+
"hs_replace_1_Add",
|
| 163 |
+
"hs_replace_1_Clip",
|
| 164 |
+
"Mul_42",
|
| 165 |
+
"Concat_95",
|
| 166 |
+
"GlobalAveragePool_96",
|
| 167 |
+
"hs_replace_2_Mul",
|
| 168 |
+
"hs_replace_2_Add",
|
| 169 |
+
"hs_replace_2_Clip",
|
| 170 |
+
"Mul_99",
|
| 171 |
+
"hs_replace_3_Mul",
|
| 172 |
+
"hs_replace_3_Add",
|
| 173 |
+
"hs_replace_3_Clip",
|
| 174 |
+
"Mul_156",
|
| 175 |
+
"hs_replace_4_Mul",
|
| 176 |
+
"hs_replace_4_Add",
|
| 177 |
+
"hs_replace_4_Clip",
|
| 178 |
+
"Mul_201",
|
| 179 |
+
"op_66:onnx.RMSNormalization",
|
| 180 |
+
"op_61:onnx.FullyConnected",
|
| 181 |
+
"Mul_249",
|
| 182 |
+
"op_67:onnx.RMSNormalization",
|
| 183 |
+
"op_62:onnx.FullyConnected",
|
| 184 |
+
"op_36:onnx.Silu",
|
| 185 |
+
"Mul_235",
|
| 186 |
+
"Add_236",
|
| 187 |
+
"MatMul_241",
|
| 188 |
+
"op_68:onnx.Mul",
|
| 189 |
+
"Relu_244",
|
| 190 |
+
"Mul_245",
|
| 191 |
+
"MatMul_246",
|
| 192 |
+
"Mul_247",
|
| 193 |
+
"op_63:onnx.FullyConnected",
|
| 194 |
+
"Add_250",
|
| 195 |
+
"op_64:onnx.FullyConnected",
|
| 196 |
+
"op_65:onnx.FullyConnected",
|
| 197 |
+
"op_48:onnx.Silu",
|
| 198 |
+
"op_41:onnx.Silu",
|
| 199 |
+
"op_42:onnx.Silu",
|
| 200 |
+
"op_44:onnx.Silu",
|
| 201 |
+
"op_45:onnx.Silu",
|
| 202 |
+
"op_46:onnx.Silu",
|
| 203 |
+
"op_47:onnx.Silu",
|
| 204 |
+
"Add_27",
|
| 205 |
+
"Add_37",
|
| 206 |
+
"op_24:onnx.Silu",
|
| 207 |
+
"Concat_38",
|
| 208 |
+
"op_28:onnx.Silu",
|
| 209 |
+
"Concat_197",
|
| 210 |
+
"op_3:onnx.Silu",
|
| 211 |
+
"op_55:onnx.Silu",
|
| 212 |
+
"op_31:onnx.Silu",
|
| 213 |
+
"op_51:onnx.Silu"
|
| 214 |
+
],
|
| 215 |
+
"data_type": "FP32",
|
| 216 |
+
"weight_data_type": "FP32",
|
| 217 |
+
"output_data_type": "FP32"
|
| 218 |
+
}
|
| 219 |
+
],
|
| 220 |
+
"precision_analysis": true,
|
| 221 |
+
"precision_analysis_method": "EndToEnd"
|
| 222 |
+
},
|
| 223 |
+
"input_processors": [
|
| 224 |
+
{
|
| 225 |
+
"tensor_name": "DEFAULT",
|
| 226 |
+
"tensor_format": "RGB",
|
| 227 |
+
"src_format": "BGR",
|
| 228 |
+
"src_dtype": "U8",
|
| 229 |
+
"src_layout": "NHWC"
|
| 230 |
+
}
|
| 231 |
+
],
|
| 232 |
+
"compiler": {
|
| 233 |
+
"check": 0
|
| 234 |
+
}
|
| 235 |
+
}
|
export_onnx.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Export RTMPose-M 256x192 ONNX from official OpenMMLab pretrained model.
|
| 3 |
+
|
| 4 |
+
Downloads the official pre-exported ONNX from OpenMMLab model zoo,
|
| 5 |
+
converts opset if needed, and fixes the batch dimension to static 1
|
| 6 |
+
|
| 7 |
+
Model: RTMPose-M (13.58M params)
|
| 8 |
+
Input: 1x3x256x192 (RGB, float32)
|
| 9 |
+
Output: simcc_x (1,17,384), simcc_y (1,17,512)
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import argparse
|
| 13 |
+
import io
|
| 14 |
+
import os
|
| 15 |
+
import zipfile
|
| 16 |
+
|
| 17 |
+
import numpy as np
|
| 18 |
+
import onnx
|
| 19 |
+
import onnx.version_converter
|
| 20 |
+
import requests
|
| 21 |
+
|
| 22 |
+
ONNX_URL = (
|
| 23 |
+
"https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/"
|
| 24 |
+
"onnx_sdk/rtmpose-m_simcc-body7_pt-body7_420e-256x192-"
|
| 25 |
+
"e48f03d0_20230504.zip"
|
| 26 |
+
)
|
| 27 |
+
CACHE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".model_cache")
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def download_onnx() -> str:
|
| 31 |
+
"""Download official RTMPose-M ONNX and cache locally."""
|
| 32 |
+
cache_path = os.path.join(CACHE_DIR, "rtmpose_m_official.onnx")
|
| 33 |
+
if os.path.exists(cache_path):
|
| 34 |
+
print(f"Using cached ONNX: {cache_path}")
|
| 35 |
+
return cache_path
|
| 36 |
+
|
| 37 |
+
os.makedirs(CACHE_DIR, exist_ok=True)
|
| 38 |
+
print("Downloading official RTMPose-M ONNX from OpenMMLab...")
|
| 39 |
+
resp = requests.get(ONNX_URL, timeout=120)
|
| 40 |
+
resp.raise_for_status()
|
| 41 |
+
|
| 42 |
+
with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
|
| 43 |
+
for name in zf.namelist():
|
| 44 |
+
if name.endswith(".onnx"):
|
| 45 |
+
with zf.open(name) as src, open(cache_path, "wb") as dst:
|
| 46 |
+
dst.write(src.read())
|
| 47 |
+
print(f"Cached: {cache_path}")
|
| 48 |
+
return cache_path
|
| 49 |
+
|
| 50 |
+
raise RuntimeError("No .onnx found in downloaded zip")
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def convert_opset(model: onnx.ModelProto, target_opset: int) -> onnx.ModelProto:
|
| 54 |
+
"""Convert ONNX model to target opset version if needed."""
|
| 55 |
+
current_opset = model.opset_import[0].version
|
| 56 |
+
if current_opset == target_opset:
|
| 57 |
+
return model
|
| 58 |
+
print(f"Converting opset {current_opset} -> {target_opset}")
|
| 59 |
+
return onnx.version_converter.convert_version(model, target_opset)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def fix_batch_dim(model: onnx.ModelProto, batch: int = 1) -> None:
|
| 63 |
+
"""Replace dynamic batch dim (dim_param) with static dim_value."""
|
| 64 |
+
for tensor in list(model.graph.input) + list(model.graph.output):
|
| 65 |
+
dim0 = tensor.type.tensor_type.shape.dim[0]
|
| 66 |
+
if dim0.dim_param:
|
| 67 |
+
dim0.ClearField("dim_param")
|
| 68 |
+
dim0.dim_value = batch
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def print_model_info(model: onnx.ModelProto) -> None:
|
| 72 |
+
"""Print model parameter count and IO shapes."""
|
| 73 |
+
total_params = sum(int(np.prod(init.dims)) for init in model.graph.initializer)
|
| 74 |
+
print(f"Parameters: {total_params / 1e6:.2f}M")
|
| 75 |
+
for inp in model.graph.input:
|
| 76 |
+
dims = [d.dim_value for d in inp.type.tensor_type.shape.dim]
|
| 77 |
+
print(f" Input: {inp.name} {dims}")
|
| 78 |
+
for out in model.graph.output:
|
| 79 |
+
dims = [d.dim_value for d in out.type.tensor_type.shape.dim]
|
| 80 |
+
print(f" Output: {out.name} {dims}")
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def main():
|
| 84 |
+
ap = argparse.ArgumentParser(description="Export RTMPose-M 256x192 ONNX")
|
| 85 |
+
ap.add_argument("--opset", type=int, default=13, help="Target ONNX opset version")
|
| 86 |
+
ap.add_argument("--output", default="rtmpose_m_256x192.onnx", help="Output path")
|
| 87 |
+
ap.add_argument("--batch", type=int, default=1, help="Static batch size")
|
| 88 |
+
args = ap.parse_args()
|
| 89 |
+
|
| 90 |
+
source_path = download_onnx()
|
| 91 |
+
model = onnx.load(source_path)
|
| 92 |
+
model = convert_opset(model, args.opset)
|
| 93 |
+
fix_batch_dim(model, args.batch)
|
| 94 |
+
onnx.save(model, args.output)
|
| 95 |
+
|
| 96 |
+
print(f"\nExported: {args.output} ({os.path.getsize(args.output) / 1e6:.2f} MB)")
|
| 97 |
+
print_model_info(model)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
if __name__ == "__main__":
|
| 101 |
+
main()
|
onnx_infer.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
ONNX Runtime inference for RTMPose-M 256x192
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import argparse
|
| 7 |
+
import os
|
| 8 |
+
from typing import Tuple
|
| 9 |
+
|
| 10 |
+
import cv2
|
| 11 |
+
import numpy as np
|
| 12 |
+
import onnxruntime as ort
|
| 13 |
+
|
| 14 |
+
SIMCC_SPLIT_RATIO = 2.0
|
| 15 |
+
NUM_KP = 17
|
| 16 |
+
MEAN = np.array([123.675, 116.28, 103.53], dtype=np.float32)
|
| 17 |
+
STD = np.array([58.395, 57.12, 57.375], dtype=np.float32)
|
| 18 |
+
COCO_SKELETON = [
|
| 19 |
+
(15, 13), (13, 11), (16, 14), (14, 12), (11, 12),
|
| 20 |
+
(5, 11), (6, 12), (5, 6), (5, 7), (6, 8),
|
| 21 |
+
(7, 9), (8, 10), (1, 2), (0, 1), (0, 2),
|
| 22 |
+
(1, 3), (2, 4), (3, 5), (4, 6),
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def bbox_xyxy2cs(bbox: np.ndarray, padding: float = 1.0) -> Tuple[np.ndarray, np.ndarray]:
|
| 27 |
+
dim = bbox.ndim
|
| 28 |
+
if dim == 1:
|
| 29 |
+
bbox = bbox[None, :]
|
| 30 |
+
x1, y1, x2, y2 = np.hsplit(bbox, [1, 2, 3])
|
| 31 |
+
center = np.hstack([x1 + x2, y1 + y2]) * 0.5
|
| 32 |
+
scale = np.hstack([x2 - x1, y2 - y1]) * padding
|
| 33 |
+
if dim == 1:
|
| 34 |
+
center = center[0]
|
| 35 |
+
scale = scale[0]
|
| 36 |
+
return center, scale
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def _fix_aspect_ratio(bbox_scale: np.ndarray, aspect_ratio: float) -> np.ndarray:
|
| 40 |
+
w, h = np.hsplit(bbox_scale, [1])
|
| 41 |
+
return np.where(w > h * aspect_ratio,
|
| 42 |
+
np.hstack([w, w / aspect_ratio]),
|
| 43 |
+
np.hstack([h * aspect_ratio, h]))
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def _rotate_point(pt: np.ndarray, angle_rad: float) -> np.ndarray:
|
| 47 |
+
sn, cs = np.sin(angle_rad), np.cos(angle_rad)
|
| 48 |
+
return np.array([cs * pt[0] - sn * pt[1], sn * pt[0] + cs * pt[1]])
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def _get_3rd_point(a: np.ndarray, b: np.ndarray) -> np.ndarray:
|
| 52 |
+
direction = a - b
|
| 53 |
+
return b + np.r_[-direction[1], direction[0]]
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def get_warp_matrix(center, scale, rot, output_size):
|
| 57 |
+
shift = np.array([0, 0], dtype=np.float32)
|
| 58 |
+
src_w = scale[0]
|
| 59 |
+
dst_w, dst_h = output_size
|
| 60 |
+
|
| 61 |
+
rot_rad = np.deg2rad(rot)
|
| 62 |
+
src_dir = _rotate_point(np.array([0.0, src_w * -0.5]), rot_rad)
|
| 63 |
+
dst_dir = np.array([0.0, dst_w * -0.5])
|
| 64 |
+
|
| 65 |
+
src_points = np.zeros((3, 2), dtype=np.float32)
|
| 66 |
+
src_points[0] = center + scale * shift
|
| 67 |
+
src_points[1] = center + src_dir + scale * shift
|
| 68 |
+
src_points[2] = _get_3rd_point(src_points[0], src_points[1])
|
| 69 |
+
|
| 70 |
+
dst_points = np.zeros((3, 2), dtype=np.float32)
|
| 71 |
+
dst_points[0] = [dst_w * 0.5, dst_h * 0.5]
|
| 72 |
+
dst_points[1] = [dst_w * 0.5, dst_h * 0.5] + dst_dir
|
| 73 |
+
dst_points[2] = _get_3rd_point(dst_points[0], dst_points[1])
|
| 74 |
+
|
| 75 |
+
return cv2.getAffineTransform(src_points, dst_points)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def preprocess(img_bgr, input_size=(192, 256)):
|
| 79 |
+
h, w = img_bgr.shape[:2]
|
| 80 |
+
bbox = np.array([0, 0, w, h], dtype=np.float32)
|
| 81 |
+
center, scale = bbox_xyxy2cs(bbox, padding=1.25)
|
| 82 |
+
scale = _fix_aspect_ratio(scale.reshape(1, 2), input_size[0] / input_size[1])[0]
|
| 83 |
+
|
| 84 |
+
warp_mat = get_warp_matrix(center, scale, 0, input_size)
|
| 85 |
+
img_warped = cv2.warpAffine(img_bgr, warp_mat, input_size, flags=cv2.INTER_LINEAR)
|
| 86 |
+
|
| 87 |
+
img_rgb = cv2.cvtColor(img_warped, cv2.COLOR_BGR2RGB).astype(np.float32)
|
| 88 |
+
img_rgb = (img_rgb - MEAN) / STD
|
| 89 |
+
inp = img_rgb.transpose(2, 0, 1)[None]
|
| 90 |
+
return inp, center, scale
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def get_simcc_maximum(simcc_x, simcc_y):
|
| 94 |
+
N, K, Wx = simcc_x.shape
|
| 95 |
+
Wy = simcc_y.shape[2]
|
| 96 |
+
x_locs = np.argmax(simcc_x, axis=2)
|
| 97 |
+
y_locs = np.argmax(simcc_y, axis=2)
|
| 98 |
+
x_vals = np.take_along_axis(simcc_x, x_locs[:, :, None], axis=2).squeeze(2)
|
| 99 |
+
y_vals = np.take_along_axis(simcc_y, y_locs[:, :, None], axis=2).squeeze(2)
|
| 100 |
+
locs = np.stack([x_locs, y_locs], axis=-1).astype(np.float32)
|
| 101 |
+
scores = np.minimum(x_vals, y_vals)
|
| 102 |
+
return locs, scores
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def draw(img, keypoints, scores, thr=0.3):
|
| 106 |
+
for i, ((x, y), s) in enumerate(zip(keypoints, scores)):
|
| 107 |
+
if s < thr:
|
| 108 |
+
continue
|
| 109 |
+
cv2.circle(img, (int(x), int(y)), 4, (0, 255, 0), -1)
|
| 110 |
+
for i, j in COCO_SKELETON:
|
| 111 |
+
if scores[i] >= thr and scores[j] >= thr:
|
| 112 |
+
pt1 = (int(keypoints[i][0]), int(keypoints[i][1]))
|
| 113 |
+
pt2 = (int(keypoints[j][0]), int(keypoints[j][1]))
|
| 114 |
+
cv2.line(img, pt1, pt2, (255, 128, 0), 2)
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def main():
|
| 118 |
+
ap = argparse.ArgumentParser()
|
| 119 |
+
ap.add_argument("-m", "--model", default="rtmpose_m_256x192.onnx")
|
| 120 |
+
ap.add_argument("-i", "--image", required=True)
|
| 121 |
+
ap.add_argument("-o", "--output", default="onnx_result.jpg")
|
| 122 |
+
ap.add_argument("--score_thres", type=float, default=0.3)
|
| 123 |
+
args = ap.parse_args()
|
| 124 |
+
|
| 125 |
+
img0 = cv2.imread(args.image)
|
| 126 |
+
assert img0 is not None, f"Cannot read {args.image}"
|
| 127 |
+
|
| 128 |
+
inp, center, scale = preprocess(img0)
|
| 129 |
+
input_size = (192, 256)
|
| 130 |
+
|
| 131 |
+
sess = ort.InferenceSession(args.model, providers=["CPUExecutionProvider"])
|
| 132 |
+
outputs = sess.run(None, {sess.get_inputs()[0].name: inp})
|
| 133 |
+
simcc_x, simcc_y = outputs[0], outputs[1]
|
| 134 |
+
|
| 135 |
+
print(f"simcc_x: shape={simcc_x.shape}, range=[{simcc_x.min():.2f}, {simcc_x.max():.2f}]")
|
| 136 |
+
print(f"simcc_y: shape={simcc_y.shape}, range=[{simcc_y.min():.2f}, {simcc_y.max():.2f}]")
|
| 137 |
+
|
| 138 |
+
locs, scores = get_simcc_maximum(simcc_x, simcc_y)
|
| 139 |
+
keypoints = locs / SIMCC_SPLIT_RATIO
|
| 140 |
+
keypoints = keypoints / np.array(input_size) * scale + center - scale / 2
|
| 141 |
+
keypoints = keypoints[0]
|
| 142 |
+
scores = scores[0]
|
| 143 |
+
|
| 144 |
+
above = (scores >= args.score_thres).sum()
|
| 145 |
+
print(f"kpts above {args.score_thres}: {above}/{NUM_KP}")
|
| 146 |
+
for i, ((x, y), s) in enumerate(zip(keypoints, scores)):
|
| 147 |
+
print(f" kp{i:02d}: ({x:6.1f}, {y:6.1f}) score={s:.3f}")
|
| 148 |
+
|
| 149 |
+
draw(img0, keypoints, scores, args.score_thres)
|
| 150 |
+
cv2.imwrite(args.output, img0)
|
| 151 |
+
print(f"Saved: {args.output}")
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
if __name__ == "__main__":
|
| 155 |
+
main()
|
replace_hardsigmoid.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Replace HardSigmoid with Mul + Add + Clip in RTMPose ONNX.
|
| 3 |
+
|
| 4 |
+
Replacing HardSigmoid with standard ops (Mul/Add/Clip)
|
| 5 |
+
allows FP32 or U16 quantization on these nodes.
|
| 6 |
+
|
| 7 |
+
Equivalent: HardSigmoid(x) = Clip(x * alpha + beta, 0, 1)
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import argparse
|
| 11 |
+
|
| 12 |
+
import numpy as np
|
| 13 |
+
import onnx
|
| 14 |
+
from onnx import TensorProto, helper, numpy_helper
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def replace_hardsigmoid(model: onnx.ModelProto) -> int:
|
| 18 |
+
graph = model.graph
|
| 19 |
+
nodes = list(graph.node)
|
| 20 |
+
|
| 21 |
+
new_nodes = []
|
| 22 |
+
initializers_to_add = []
|
| 23 |
+
hs_count = 0
|
| 24 |
+
|
| 25 |
+
for n in nodes:
|
| 26 |
+
if n.op_type == "HardSigmoid":
|
| 27 |
+
hs_count += 1
|
| 28 |
+
inp = n.input[0]
|
| 29 |
+
out = n.output[0]
|
| 30 |
+
prefix = f"hs_replace_{hs_count}"
|
| 31 |
+
|
| 32 |
+
alpha = 0.2
|
| 33 |
+
beta = 0.5
|
| 34 |
+
for attr in n.attribute:
|
| 35 |
+
if attr.name == "alpha":
|
| 36 |
+
alpha = attr.f
|
| 37 |
+
elif attr.name == "beta":
|
| 38 |
+
beta = attr.f
|
| 39 |
+
|
| 40 |
+
alpha_name = f"{prefix}_alpha"
|
| 41 |
+
beta_name = f"{prefix}_beta"
|
| 42 |
+
min_name = f"{prefix}_min"
|
| 43 |
+
max_name = f"{prefix}_max"
|
| 44 |
+
|
| 45 |
+
initializers_to_add.append(
|
| 46 |
+
numpy_helper.from_array(np.array([alpha], dtype=np.float32), alpha_name)
|
| 47 |
+
)
|
| 48 |
+
initializers_to_add.append(
|
| 49 |
+
numpy_helper.from_array(np.array([beta], dtype=np.float32), beta_name)
|
| 50 |
+
)
|
| 51 |
+
initializers_to_add.append(
|
| 52 |
+
numpy_helper.from_array(np.array([0.0], dtype=np.float32), min_name)
|
| 53 |
+
)
|
| 54 |
+
initializers_to_add.append(
|
| 55 |
+
numpy_helper.from_array(np.array([1.0], dtype=np.float32), max_name)
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
mul_out = f"{prefix}_mul_out"
|
| 59 |
+
add_out = f"{prefix}_add_out"
|
| 60 |
+
|
| 61 |
+
mul_node = helper.make_node("Mul", [inp, alpha_name], [mul_out], name=f"{prefix}_Mul")
|
| 62 |
+
add_node = helper.make_node("Add", [mul_out, beta_name], [add_out], name=f"{prefix}_Add")
|
| 63 |
+
clip_node = helper.make_node("Clip", [add_out, min_name, max_name], [out], name=f"{prefix}_Clip")
|
| 64 |
+
|
| 65 |
+
new_nodes.extend([mul_node, add_node, clip_node])
|
| 66 |
+
else:
|
| 67 |
+
new_nodes.append(n)
|
| 68 |
+
|
| 69 |
+
del graph.node[:]
|
| 70 |
+
graph.node.extend(new_nodes)
|
| 71 |
+
for init in initializers_to_add:
|
| 72 |
+
graph.initializer.append(init)
|
| 73 |
+
|
| 74 |
+
return hs_count
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def fix_batch_dim(model: onnx.ModelProto):
|
| 78 |
+
for inp in model.graph.input:
|
| 79 |
+
shape = inp.type.tensor_type.shape
|
| 80 |
+
if shape and shape.dim:
|
| 81 |
+
d0 = shape.dim[0]
|
| 82 |
+
if d0.dim_param or d0.dim_value != 1:
|
| 83 |
+
d0.dim_param = ""
|
| 84 |
+
d0.dim_value = 1
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def main():
|
| 88 |
+
ap = argparse.ArgumentParser()
|
| 89 |
+
ap.add_argument("--input", default="rtmpose_m_256x192.onnx")
|
| 90 |
+
ap.add_argument("--output", default="rtmpose_m_256x192_no_hs.onnx")
|
| 91 |
+
args = ap.parse_args()
|
| 92 |
+
|
| 93 |
+
model = onnx.load(args.input)
|
| 94 |
+
count = replace_hardsigmoid(model)
|
| 95 |
+
print(f"Replaced {count} HardSigmoid -> Mul+Add+Clip")
|
| 96 |
+
|
| 97 |
+
fix_batch_dim(model)
|
| 98 |
+
print("Fixed dynamic batch dim -> 1")
|
| 99 |
+
|
| 100 |
+
onnx.save(model, args.output)
|
| 101 |
+
print(f"Saved: {args.output}")
|
| 102 |
+
|
| 103 |
+
import onnxruntime as ort
|
| 104 |
+
|
| 105 |
+
sess = ort.InferenceSession(args.output, providers=["CPUExecutionProvider"])
|
| 106 |
+
inp = sess.get_inputs()[0]
|
| 107 |
+
dummy = np.random.randn(*inp.shape).astype(np.float32)
|
| 108 |
+
outs = sess.run(None, {inp.name: dummy})
|
| 109 |
+
print(f"Verify OK: {[o.shape for o in outs]}")
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
if __name__ == "__main__":
|
| 113 |
+
main()
|
result_onnx.jpg
ADDED
|
Git LFS Details
|
rtmpose_cali.tar
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3fa0db5fe95a54b8959bc9709bdb8427dc2820b4e9362abeb871c05a0e79d3b6
|
| 3 |
+
size 1259520
|
rtmpose_m_256x192.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f0bbe036e98aa62913e8b2b1e523cb7d614e1be3255fe9cf22df3c4a8342bd3
|
| 3 |
+
size 54330887
|
rtmpose_m_256x192_no_hs.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87bd9ae001505183d5489065663728728ed6b0a576a11785dcc4efefbe5d4796
|
| 3 |
+
size 54332145
|
test.jpg
ADDED
|
Git LFS Details
|