fangmingguo commited on
Commit
993d81c
·
verified ·
1 Parent(s): bf8fe4a

Upload 13 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ ax_result.jpg filter=lfs diff=lfs merge=lfs -text
37
+ AX650/rtmpose_m_npu1.axmodel filter=lfs diff=lfs merge=lfs -text
38
+ AX650/rtmpose_m_npu3.axmodel filter=lfs diff=lfs merge=lfs -text
39
+ result_onnx.jpg filter=lfs diff=lfs merge=lfs -text
40
+ test.jpg filter=lfs diff=lfs merge=lfs -text
AX650/rtmpose_m_npu1.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4ebc503bdf788009706c60876b3ecd6ed12a833888286868f4cf4208a1eb91e
3
+ size 18848816
AX650/rtmpose_m_npu3.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17313d5d20b0070672bf8617960162ea2ac6f7b8da12b42b5bb4974c45570fa7
3
+ size 18247844
ax_infer.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ RTMPose-M axengine inference on AXERA NPU.
4
+ """
5
+
6
+ import argparse
7
+ import os
8
+ from time import time
9
+ from typing import Tuple
10
+
11
+ import cv2
12
+ import numpy as np
13
+
14
+ try:
15
+ import axengine as axe
16
+ except ImportError:
17
+ import onnxruntime as axe
18
+
19
+ SIMCC_SPLIT_RATIO = 2.0
20
+ NUM_KP = 17
21
+ COCO_SKELETON = [
22
+ (15, 13), (13, 11), (16, 14), (14, 12), (11, 12),
23
+ (5, 11), (6, 12), (5, 6), (5, 7), (6, 8),
24
+ (7, 9), (8, 10), (1, 2), (0, 1), (0, 2),
25
+ (1, 3), (2, 4), (3, 5), (4, 6),
26
+ ]
27
+
28
+
29
+ def bbox_xyxy2cs(bbox: np.ndarray, padding: float = 1.0) -> Tuple[np.ndarray, np.ndarray]:
30
+ x1, y1, x2, y2 = bbox
31
+ center = np.array([(x1 + x2) * 0.5, (y1 + y2) * 0.5], dtype=np.float32)
32
+ scale = np.array([(x2 - x1) * padding, (y2 - y1) * padding], dtype=np.float32)
33
+ return center, scale
34
+
35
+
36
+ def _fix_aspect_ratio(bbox_scale: np.ndarray, aspect_ratio: float) -> np.ndarray:
37
+ w, h = bbox_scale
38
+ if w > h * aspect_ratio:
39
+ return np.array([w, w / aspect_ratio], dtype=np.float32)
40
+ else:
41
+ return np.array([h * aspect_ratio, h], dtype=np.float32)
42
+
43
+
44
+ def _rotate_point(pt: np.ndarray, angle_rad: float) -> np.ndarray:
45
+ sn, cs = np.sin(angle_rad), np.cos(angle_rad)
46
+ return np.array([cs * pt[0] - sn * pt[1], sn * pt[0] + cs * pt[1]])
47
+
48
+
49
+ def _get_3rd_point(a: np.ndarray, b: np.ndarray) -> np.ndarray:
50
+ direction = a - b
51
+ return b + np.r_[-direction[1], direction[0]]
52
+
53
+
54
+ def get_warp_matrix(center, scale, rot, output_size):
55
+ src_w = scale[0]
56
+ dst_w, dst_h = output_size
57
+
58
+ rot_rad = np.deg2rad(rot)
59
+ src_dir = _rotate_point(np.array([0.0, src_w * -0.5]), rot_rad)
60
+ dst_dir = np.array([0.0, dst_w * -0.5])
61
+
62
+ src_points = np.zeros((3, 2), dtype=np.float32)
63
+ src_points[0] = center
64
+ src_points[1] = center + src_dir
65
+ src_points[2] = _get_3rd_point(src_points[0], src_points[1])
66
+
67
+ dst_points = np.zeros((3, 2), dtype=np.float32)
68
+ dst_points[0] = [dst_w * 0.5, dst_h * 0.5]
69
+ dst_points[1] = [dst_w * 0.5, dst_h * 0.5] + dst_dir
70
+ dst_points[2] = _get_3rd_point(dst_points[0], dst_points[1])
71
+
72
+ return cv2.getAffineTransform(src_points, dst_points)
73
+
74
+
75
+ def preprocess(img_bgr, input_size=(192, 256)):
76
+ h, w = img_bgr.shape[:2]
77
+ bbox = np.array([0, 0, w, h], dtype=np.float32)
78
+ center, scale = bbox_xyxy2cs(bbox, padding=1.25)
79
+ scale = _fix_aspect_ratio(scale, input_size[0] / input_size[1])
80
+
81
+ warp_mat = get_warp_matrix(center, scale, 0, input_size)
82
+ img_warped = cv2.warpAffine(img_bgr, warp_mat, input_size, flags=cv2.INTER_LINEAR)
83
+
84
+ inp = img_warped[None] # (1, H, W, 3) uint8 NHWC BGR, axmodel handles BGR->RGB
85
+ return inp, center, scale
86
+
87
+
88
+ def get_simcc_maximum(simcc_x, simcc_y):
89
+ N, K, Wx = simcc_x.shape
90
+ x_locs = np.argmax(simcc_x, axis=2)
91
+ y_locs = np.argmax(simcc_y, axis=2)
92
+ x_vals = np.take_along_axis(simcc_x, x_locs[:, :, None], axis=2).squeeze(2)
93
+ y_vals = np.take_along_axis(simcc_y, y_locs[:, :, None], axis=2).squeeze(2)
94
+ locs = np.stack([x_locs, y_locs], axis=-1).astype(np.float32)
95
+ scores = np.minimum(x_vals, y_vals)
96
+ return locs, scores
97
+
98
+
99
+ def draw(img, keypoints, scores, thr=0.3):
100
+ for i, ((x, y), s) in enumerate(zip(keypoints, scores)):
101
+ if s < thr:
102
+ continue
103
+ cv2.circle(img, (int(x), int(y)), 4, (0, 255, 0), -1)
104
+ for i, j in COCO_SKELETON:
105
+ if scores[i] >= thr and scores[j] >= thr:
106
+ pt1 = (int(keypoints[i][0]), int(keypoints[i][1]))
107
+ pt2 = (int(keypoints[j][0]), int(keypoints[j][1]))
108
+ cv2.line(img, pt1, pt2, (255, 128, 0), 2)
109
+
110
+
111
+ def main():
112
+ ap = argparse.ArgumentParser()
113
+ ap.add_argument("-m", "--model", default="output/rtmpose_m_npu3.axmodel")
114
+ ap.add_argument("-i", "--image", required=True)
115
+ ap.add_argument("-o", "--output", default="ax_result.jpg")
116
+ ap.add_argument("--score_thres", type=float, default=0.3)
117
+ ap.add_argument("--warmup", type=int, default=3)
118
+ ap.add_argument("--repeat", type=int, default=10)
119
+ args = ap.parse_args()
120
+
121
+ img0 = cv2.imread(args.image)
122
+ assert img0 is not None, f"Cannot read {args.image}"
123
+
124
+ inp, center, scale = preprocess(img0)
125
+ input_size = (192, 256)
126
+
127
+ model = axe.InferenceSession(args.model)
128
+ inp_info = model.get_inputs()[0]
129
+ dtype_str = getattr(inp_info, "dtype", getattr(inp_info, "type", "unknown"))
130
+ print(f"Model input: name={inp_info.name}, shape={inp_info.shape}, dtype={dtype_str}")
131
+
132
+ for _ in range(args.warmup):
133
+ model.run(None, {inp_info.name: inp})
134
+
135
+ t0 = time()
136
+ for _ in range(args.repeat):
137
+ outputs = model.run(None, {inp_info.name: inp})
138
+ elapsed = (time() - t0) / args.repeat * 1000
139
+ print(f"Forward: {elapsed:.2f} ms (avg of {args.repeat} runs)")
140
+
141
+ simcc_x, simcc_y = outputs[0], outputs[1]
142
+ print(f"simcc_x: shape={simcc_x.shape}, range=[{simcc_x.min():.2f}, {simcc_x.max():.2f}]")
143
+ print(f"simcc_y: shape={simcc_y.shape}, range=[{simcc_y.min():.2f}, {simcc_y.max():.2f}]")
144
+
145
+ locs, scores = get_simcc_maximum(simcc_x, simcc_y)
146
+ keypoints = locs / SIMCC_SPLIT_RATIO
147
+ keypoints = keypoints / np.array(input_size) * scale + center - scale / 2
148
+ keypoints = keypoints[0]
149
+ scores = scores[0]
150
+
151
+ above = (scores >= args.score_thres).sum()
152
+ print(f"kpts above {args.score_thres}: {above}/{NUM_KP}")
153
+ for i, ((x, y), sc) in enumerate(zip(keypoints, scores)):
154
+ print(f" kp{i:02d}: ({x:6.1f}, {y:6.1f}) score={sc:.4f}")
155
+
156
+ draw(img0, keypoints, scores, args.score_thres)
157
+ cv2.imwrite(args.output, img0)
158
+ print(f"Saved: {args.output}")
159
+
160
+
161
+ if __name__ == "__main__":
162
+ main()
ax_result.jpg ADDED

Git LFS Details

  • SHA256: 6a51a0c154d54b6922e83d43c3ec544eee64557e2626aaf52d34864500d69a19
  • Pointer size: 131 Bytes
  • Size of remote file: 106 kB
config.json ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "ONNX",
3
+ "npu_mode": "NPU3",
4
+ "input_shapes": "input:1x3x256x192",
5
+ "quant": {
6
+ "input_configs": [
7
+ {
8
+ "tensor_name": "DEFAULT",
9
+ "calibration_dataset": "./rtmpose_cali.tar",
10
+ "calibration_size": 100,
11
+ "calibration_mean": [
12
+ 123.675,
13
+ 116.28,
14
+ 103.53
15
+ ],
16
+ "calibration_std": [
17
+ 58.395,
18
+ 57.12,
19
+ 57.375
20
+ ]
21
+ }
22
+ ],
23
+ "calibration_method": "MSE",
24
+ "layer_configs": [
25
+ {
26
+ "layer_names": [
27
+ "Conv_0",
28
+ "Conv_3",
29
+ "Conv_6",
30
+ "Conv_9",
31
+ "Conv_12",
32
+ "Conv_15",
33
+ "Conv_18",
34
+ "Conv_21",
35
+ "Conv_24",
36
+ "Conv_28",
37
+ "Conv_31",
38
+ "Conv_34",
39
+ "Conv_46",
40
+ "Conv_55",
41
+ "Conv_58",
42
+ "Conv_65",
43
+ "Conv_68",
44
+ "Conv_75",
45
+ "Conv_78",
46
+ "Conv_85",
47
+ "Conv_88",
48
+ "Conv_103",
49
+ "Conv_112",
50
+ "Conv_115",
51
+ "Conv_122",
52
+ "Conv_125",
53
+ "Conv_132",
54
+ "Conv_135",
55
+ "Conv_142",
56
+ "Conv_145",
57
+ "Conv_160",
58
+ "Conv_179",
59
+ "Conv_182",
60
+ "Conv_188",
61
+ "Conv_191",
62
+ "Conv_205",
63
+ "Conv_40",
64
+ "Conv_43",
65
+ "Conv_49",
66
+ "Conv_52",
67
+ "Conv_71",
68
+ "Conv_81",
69
+ "Conv_91",
70
+ "Conv_97",
71
+ "Conv_100",
72
+ "Conv_106",
73
+ "Conv_109",
74
+ "Conv_118",
75
+ "Conv_128",
76
+ "Conv_138",
77
+ "Conv_148",
78
+ "Conv_154",
79
+ "Conv_157",
80
+ "Conv_163",
81
+ "Conv_170",
82
+ "Conv_173",
83
+ "Conv_176",
84
+ "Conv_185",
85
+ "Conv_194",
86
+ "Conv_199",
87
+ "Conv_202",
88
+ "GlobalAveragePool_39",
89
+ "GlobalAveragePool_153",
90
+ "GlobalAveragePool_198",
91
+ "Concat_152",
92
+ "Concat_169",
93
+ "op_1:onnx.Silu",
94
+ "op_2:onnx.Silu",
95
+ "op_5:onnx.Silu",
96
+ "op_6:onnx.Silu",
97
+ "op_7:onnx.Silu",
98
+ "op_8:onnx.Silu",
99
+ "op_9:onnx.Silu",
100
+ "op_10:onnx.Silu",
101
+ "op_11:onnx.Silu",
102
+ "op_12:onnx.Silu",
103
+ "op_13:onnx.Silu",
104
+ "op_15:onnx.Silu",
105
+ "op_16:onnx.Silu",
106
+ "op_17:onnx.Silu",
107
+ "op_18:onnx.Silu",
108
+ "op_19:onnx.Silu",
109
+ "op_20:onnx.Silu",
110
+ "op_21:onnx.Silu",
111
+ "op_23:onnx.Silu",
112
+ "op_25:onnx.Silu",
113
+ "op_26:onnx.Silu",
114
+ "op_27:onnx.Silu",
115
+ "op_29:onnx.Silu",
116
+ "op_30:onnx.Silu",
117
+ "op_34:onnx.Silu",
118
+ "op_49:onnx.Silu",
119
+ "op_50:onnx.Silu",
120
+ "op_52:onnx.Silu",
121
+ "op_54:onnx.Silu",
122
+ "op_56:onnx.Silu",
123
+ "op_57:onnx.Silu",
124
+ "op_58:onnx.Silu",
125
+ "Add_64",
126
+ "Add_74",
127
+ "Add_84",
128
+ "Add_94",
129
+ "Add_121",
130
+ "Add_131",
131
+ "Add_141",
132
+ "Add_151",
133
+ "MaxPool_166",
134
+ "MaxPool_167",
135
+ "MaxPool_168",
136
+ "Reshape_212",
137
+ "Split_233",
138
+ "Unsqueeze_234",
139
+ "Split_237",
140
+ "Squeeze_238",
141
+ "Squeeze_239",
142
+ "Transpose_240"
143
+ ],
144
+ "data_type": "U16"
145
+ },
146
+ {
147
+ "layer_names": [
148
+ "Conv_61",
149
+ "op_32:onnx.Silu",
150
+ "op_43:onnx.Silu",
151
+ "op_53:onnx.Silu",
152
+ "op_4:onnx.Silu",
153
+ "op_14:onnx.Silu",
154
+ "op_22:onnx.Silu",
155
+ "op_33:onnx.Silu",
156
+ "op_35:onnx.Silu",
157
+ "op_37:onnx.Silu",
158
+ "op_38:onnx.Silu",
159
+ "op_39:onnx.Silu",
160
+ "op_40:onnx.Silu",
161
+ "hs_replace_1_Mul",
162
+ "hs_replace_1_Add",
163
+ "hs_replace_1_Clip",
164
+ "Mul_42",
165
+ "Concat_95",
166
+ "GlobalAveragePool_96",
167
+ "hs_replace_2_Mul",
168
+ "hs_replace_2_Add",
169
+ "hs_replace_2_Clip",
170
+ "Mul_99",
171
+ "hs_replace_3_Mul",
172
+ "hs_replace_3_Add",
173
+ "hs_replace_3_Clip",
174
+ "Mul_156",
175
+ "hs_replace_4_Mul",
176
+ "hs_replace_4_Add",
177
+ "hs_replace_4_Clip",
178
+ "Mul_201",
179
+ "op_66:onnx.RMSNormalization",
180
+ "op_61:onnx.FullyConnected",
181
+ "Mul_249",
182
+ "op_67:onnx.RMSNormalization",
183
+ "op_62:onnx.FullyConnected",
184
+ "op_36:onnx.Silu",
185
+ "Mul_235",
186
+ "Add_236",
187
+ "MatMul_241",
188
+ "op_68:onnx.Mul",
189
+ "Relu_244",
190
+ "Mul_245",
191
+ "MatMul_246",
192
+ "Mul_247",
193
+ "op_63:onnx.FullyConnected",
194
+ "Add_250",
195
+ "op_64:onnx.FullyConnected",
196
+ "op_65:onnx.FullyConnected",
197
+ "op_48:onnx.Silu",
198
+ "op_41:onnx.Silu",
199
+ "op_42:onnx.Silu",
200
+ "op_44:onnx.Silu",
201
+ "op_45:onnx.Silu",
202
+ "op_46:onnx.Silu",
203
+ "op_47:onnx.Silu",
204
+ "Add_27",
205
+ "Add_37",
206
+ "op_24:onnx.Silu",
207
+ "Concat_38",
208
+ "op_28:onnx.Silu",
209
+ "Concat_197",
210
+ "op_3:onnx.Silu",
211
+ "op_55:onnx.Silu",
212
+ "op_31:onnx.Silu",
213
+ "op_51:onnx.Silu"
214
+ ],
215
+ "data_type": "FP32",
216
+ "weight_data_type": "FP32",
217
+ "output_data_type": "FP32"
218
+ }
219
+ ],
220
+ "precision_analysis": true,
221
+ "precision_analysis_method": "EndToEnd"
222
+ },
223
+ "input_processors": [
224
+ {
225
+ "tensor_name": "DEFAULT",
226
+ "tensor_format": "RGB",
227
+ "src_format": "BGR",
228
+ "src_dtype": "U8",
229
+ "src_layout": "NHWC"
230
+ }
231
+ ],
232
+ "compiler": {
233
+ "check": 0
234
+ }
235
+ }
export_onnx.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Export RTMPose-M 256x192 ONNX from official OpenMMLab pretrained model.
3
+
4
+ Downloads the official pre-exported ONNX from OpenMMLab model zoo,
5
+ converts opset if needed, and fixes the batch dimension to static 1
6
+
7
+ Model: RTMPose-M (13.58M params)
8
+ Input: 1x3x256x192 (RGB, float32)
9
+ Output: simcc_x (1,17,384), simcc_y (1,17,512)
10
+ """
11
+
12
+ import argparse
13
+ import io
14
+ import os
15
+ import zipfile
16
+
17
+ import numpy as np
18
+ import onnx
19
+ import onnx.version_converter
20
+ import requests
21
+
22
+ ONNX_URL = (
23
+ "https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/"
24
+ "onnx_sdk/rtmpose-m_simcc-body7_pt-body7_420e-256x192-"
25
+ "e48f03d0_20230504.zip"
26
+ )
27
+ CACHE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".model_cache")
28
+
29
+
30
+ def download_onnx() -> str:
31
+ """Download official RTMPose-M ONNX and cache locally."""
32
+ cache_path = os.path.join(CACHE_DIR, "rtmpose_m_official.onnx")
33
+ if os.path.exists(cache_path):
34
+ print(f"Using cached ONNX: {cache_path}")
35
+ return cache_path
36
+
37
+ os.makedirs(CACHE_DIR, exist_ok=True)
38
+ print("Downloading official RTMPose-M ONNX from OpenMMLab...")
39
+ resp = requests.get(ONNX_URL, timeout=120)
40
+ resp.raise_for_status()
41
+
42
+ with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
43
+ for name in zf.namelist():
44
+ if name.endswith(".onnx"):
45
+ with zf.open(name) as src, open(cache_path, "wb") as dst:
46
+ dst.write(src.read())
47
+ print(f"Cached: {cache_path}")
48
+ return cache_path
49
+
50
+ raise RuntimeError("No .onnx found in downloaded zip")
51
+
52
+
53
+ def convert_opset(model: onnx.ModelProto, target_opset: int) -> onnx.ModelProto:
54
+ """Convert ONNX model to target opset version if needed."""
55
+ current_opset = model.opset_import[0].version
56
+ if current_opset == target_opset:
57
+ return model
58
+ print(f"Converting opset {current_opset} -> {target_opset}")
59
+ return onnx.version_converter.convert_version(model, target_opset)
60
+
61
+
62
+ def fix_batch_dim(model: onnx.ModelProto, batch: int = 1) -> None:
63
+ """Replace dynamic batch dim (dim_param) with static dim_value."""
64
+ for tensor in list(model.graph.input) + list(model.graph.output):
65
+ dim0 = tensor.type.tensor_type.shape.dim[0]
66
+ if dim0.dim_param:
67
+ dim0.ClearField("dim_param")
68
+ dim0.dim_value = batch
69
+
70
+
71
+ def print_model_info(model: onnx.ModelProto) -> None:
72
+ """Print model parameter count and IO shapes."""
73
+ total_params = sum(int(np.prod(init.dims)) for init in model.graph.initializer)
74
+ print(f"Parameters: {total_params / 1e6:.2f}M")
75
+ for inp in model.graph.input:
76
+ dims = [d.dim_value for d in inp.type.tensor_type.shape.dim]
77
+ print(f" Input: {inp.name} {dims}")
78
+ for out in model.graph.output:
79
+ dims = [d.dim_value for d in out.type.tensor_type.shape.dim]
80
+ print(f" Output: {out.name} {dims}")
81
+
82
+
83
+ def main():
84
+ ap = argparse.ArgumentParser(description="Export RTMPose-M 256x192 ONNX")
85
+ ap.add_argument("--opset", type=int, default=13, help="Target ONNX opset version")
86
+ ap.add_argument("--output", default="rtmpose_m_256x192.onnx", help="Output path")
87
+ ap.add_argument("--batch", type=int, default=1, help="Static batch size")
88
+ args = ap.parse_args()
89
+
90
+ source_path = download_onnx()
91
+ model = onnx.load(source_path)
92
+ model = convert_opset(model, args.opset)
93
+ fix_batch_dim(model, args.batch)
94
+ onnx.save(model, args.output)
95
+
96
+ print(f"\nExported: {args.output} ({os.path.getsize(args.output) / 1e6:.2f} MB)")
97
+ print_model_info(model)
98
+
99
+
100
+ if __name__ == "__main__":
101
+ main()
onnx_infer.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ ONNX Runtime inference for RTMPose-M 256x192
4
+ """
5
+
6
+ import argparse
7
+ import os
8
+ from typing import Tuple
9
+
10
+ import cv2
11
+ import numpy as np
12
+ import onnxruntime as ort
13
+
14
+ SIMCC_SPLIT_RATIO = 2.0
15
+ NUM_KP = 17
16
+ MEAN = np.array([123.675, 116.28, 103.53], dtype=np.float32)
17
+ STD = np.array([58.395, 57.12, 57.375], dtype=np.float32)
18
+ COCO_SKELETON = [
19
+ (15, 13), (13, 11), (16, 14), (14, 12), (11, 12),
20
+ (5, 11), (6, 12), (5, 6), (5, 7), (6, 8),
21
+ (7, 9), (8, 10), (1, 2), (0, 1), (0, 2),
22
+ (1, 3), (2, 4), (3, 5), (4, 6),
23
+ ]
24
+
25
+
26
+ def bbox_xyxy2cs(bbox: np.ndarray, padding: float = 1.0) -> Tuple[np.ndarray, np.ndarray]:
27
+ dim = bbox.ndim
28
+ if dim == 1:
29
+ bbox = bbox[None, :]
30
+ x1, y1, x2, y2 = np.hsplit(bbox, [1, 2, 3])
31
+ center = np.hstack([x1 + x2, y1 + y2]) * 0.5
32
+ scale = np.hstack([x2 - x1, y2 - y1]) * padding
33
+ if dim == 1:
34
+ center = center[0]
35
+ scale = scale[0]
36
+ return center, scale
37
+
38
+
39
+ def _fix_aspect_ratio(bbox_scale: np.ndarray, aspect_ratio: float) -> np.ndarray:
40
+ w, h = np.hsplit(bbox_scale, [1])
41
+ return np.where(w > h * aspect_ratio,
42
+ np.hstack([w, w / aspect_ratio]),
43
+ np.hstack([h * aspect_ratio, h]))
44
+
45
+
46
+ def _rotate_point(pt: np.ndarray, angle_rad: float) -> np.ndarray:
47
+ sn, cs = np.sin(angle_rad), np.cos(angle_rad)
48
+ return np.array([cs * pt[0] - sn * pt[1], sn * pt[0] + cs * pt[1]])
49
+
50
+
51
+ def _get_3rd_point(a: np.ndarray, b: np.ndarray) -> np.ndarray:
52
+ direction = a - b
53
+ return b + np.r_[-direction[1], direction[0]]
54
+
55
+
56
+ def get_warp_matrix(center, scale, rot, output_size):
57
+ shift = np.array([0, 0], dtype=np.float32)
58
+ src_w = scale[0]
59
+ dst_w, dst_h = output_size
60
+
61
+ rot_rad = np.deg2rad(rot)
62
+ src_dir = _rotate_point(np.array([0.0, src_w * -0.5]), rot_rad)
63
+ dst_dir = np.array([0.0, dst_w * -0.5])
64
+
65
+ src_points = np.zeros((3, 2), dtype=np.float32)
66
+ src_points[0] = center + scale * shift
67
+ src_points[1] = center + src_dir + scale * shift
68
+ src_points[2] = _get_3rd_point(src_points[0], src_points[1])
69
+
70
+ dst_points = np.zeros((3, 2), dtype=np.float32)
71
+ dst_points[0] = [dst_w * 0.5, dst_h * 0.5]
72
+ dst_points[1] = [dst_w * 0.5, dst_h * 0.5] + dst_dir
73
+ dst_points[2] = _get_3rd_point(dst_points[0], dst_points[1])
74
+
75
+ return cv2.getAffineTransform(src_points, dst_points)
76
+
77
+
78
+ def preprocess(img_bgr, input_size=(192, 256)):
79
+ h, w = img_bgr.shape[:2]
80
+ bbox = np.array([0, 0, w, h], dtype=np.float32)
81
+ center, scale = bbox_xyxy2cs(bbox, padding=1.25)
82
+ scale = _fix_aspect_ratio(scale.reshape(1, 2), input_size[0] / input_size[1])[0]
83
+
84
+ warp_mat = get_warp_matrix(center, scale, 0, input_size)
85
+ img_warped = cv2.warpAffine(img_bgr, warp_mat, input_size, flags=cv2.INTER_LINEAR)
86
+
87
+ img_rgb = cv2.cvtColor(img_warped, cv2.COLOR_BGR2RGB).astype(np.float32)
88
+ img_rgb = (img_rgb - MEAN) / STD
89
+ inp = img_rgb.transpose(2, 0, 1)[None]
90
+ return inp, center, scale
91
+
92
+
93
+ def get_simcc_maximum(simcc_x, simcc_y):
94
+ N, K, Wx = simcc_x.shape
95
+ Wy = simcc_y.shape[2]
96
+ x_locs = np.argmax(simcc_x, axis=2)
97
+ y_locs = np.argmax(simcc_y, axis=2)
98
+ x_vals = np.take_along_axis(simcc_x, x_locs[:, :, None], axis=2).squeeze(2)
99
+ y_vals = np.take_along_axis(simcc_y, y_locs[:, :, None], axis=2).squeeze(2)
100
+ locs = np.stack([x_locs, y_locs], axis=-1).astype(np.float32)
101
+ scores = np.minimum(x_vals, y_vals)
102
+ return locs, scores
103
+
104
+
105
+ def draw(img, keypoints, scores, thr=0.3):
106
+ for i, ((x, y), s) in enumerate(zip(keypoints, scores)):
107
+ if s < thr:
108
+ continue
109
+ cv2.circle(img, (int(x), int(y)), 4, (0, 255, 0), -1)
110
+ for i, j in COCO_SKELETON:
111
+ if scores[i] >= thr and scores[j] >= thr:
112
+ pt1 = (int(keypoints[i][0]), int(keypoints[i][1]))
113
+ pt2 = (int(keypoints[j][0]), int(keypoints[j][1]))
114
+ cv2.line(img, pt1, pt2, (255, 128, 0), 2)
115
+
116
+
117
+ def main():
118
+ ap = argparse.ArgumentParser()
119
+ ap.add_argument("-m", "--model", default="rtmpose_m_256x192.onnx")
120
+ ap.add_argument("-i", "--image", required=True)
121
+ ap.add_argument("-o", "--output", default="onnx_result.jpg")
122
+ ap.add_argument("--score_thres", type=float, default=0.3)
123
+ args = ap.parse_args()
124
+
125
+ img0 = cv2.imread(args.image)
126
+ assert img0 is not None, f"Cannot read {args.image}"
127
+
128
+ inp, center, scale = preprocess(img0)
129
+ input_size = (192, 256)
130
+
131
+ sess = ort.InferenceSession(args.model, providers=["CPUExecutionProvider"])
132
+ outputs = sess.run(None, {sess.get_inputs()[0].name: inp})
133
+ simcc_x, simcc_y = outputs[0], outputs[1]
134
+
135
+ print(f"simcc_x: shape={simcc_x.shape}, range=[{simcc_x.min():.2f}, {simcc_x.max():.2f}]")
136
+ print(f"simcc_y: shape={simcc_y.shape}, range=[{simcc_y.min():.2f}, {simcc_y.max():.2f}]")
137
+
138
+ locs, scores = get_simcc_maximum(simcc_x, simcc_y)
139
+ keypoints = locs / SIMCC_SPLIT_RATIO
140
+ keypoints = keypoints / np.array(input_size) * scale + center - scale / 2
141
+ keypoints = keypoints[0]
142
+ scores = scores[0]
143
+
144
+ above = (scores >= args.score_thres).sum()
145
+ print(f"kpts above {args.score_thres}: {above}/{NUM_KP}")
146
+ for i, ((x, y), s) in enumerate(zip(keypoints, scores)):
147
+ print(f" kp{i:02d}: ({x:6.1f}, {y:6.1f}) score={s:.3f}")
148
+
149
+ draw(img0, keypoints, scores, args.score_thres)
150
+ cv2.imwrite(args.output, img0)
151
+ print(f"Saved: {args.output}")
152
+
153
+
154
+ if __name__ == "__main__":
155
+ main()
replace_hardsigmoid.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Replace HardSigmoid with Mul + Add + Clip in RTMPose ONNX.
3
+
4
+ Replacing HardSigmoid with standard ops (Mul/Add/Clip)
5
+ allows FP32 or U16 quantization on these nodes.
6
+
7
+ Equivalent: HardSigmoid(x) = Clip(x * alpha + beta, 0, 1)
8
+ """
9
+
10
+ import argparse
11
+
12
+ import numpy as np
13
+ import onnx
14
+ from onnx import TensorProto, helper, numpy_helper
15
+
16
+
17
+ def replace_hardsigmoid(model: onnx.ModelProto) -> int:
18
+ graph = model.graph
19
+ nodes = list(graph.node)
20
+
21
+ new_nodes = []
22
+ initializers_to_add = []
23
+ hs_count = 0
24
+
25
+ for n in nodes:
26
+ if n.op_type == "HardSigmoid":
27
+ hs_count += 1
28
+ inp = n.input[0]
29
+ out = n.output[0]
30
+ prefix = f"hs_replace_{hs_count}"
31
+
32
+ alpha = 0.2
33
+ beta = 0.5
34
+ for attr in n.attribute:
35
+ if attr.name == "alpha":
36
+ alpha = attr.f
37
+ elif attr.name == "beta":
38
+ beta = attr.f
39
+
40
+ alpha_name = f"{prefix}_alpha"
41
+ beta_name = f"{prefix}_beta"
42
+ min_name = f"{prefix}_min"
43
+ max_name = f"{prefix}_max"
44
+
45
+ initializers_to_add.append(
46
+ numpy_helper.from_array(np.array([alpha], dtype=np.float32), alpha_name)
47
+ )
48
+ initializers_to_add.append(
49
+ numpy_helper.from_array(np.array([beta], dtype=np.float32), beta_name)
50
+ )
51
+ initializers_to_add.append(
52
+ numpy_helper.from_array(np.array([0.0], dtype=np.float32), min_name)
53
+ )
54
+ initializers_to_add.append(
55
+ numpy_helper.from_array(np.array([1.0], dtype=np.float32), max_name)
56
+ )
57
+
58
+ mul_out = f"{prefix}_mul_out"
59
+ add_out = f"{prefix}_add_out"
60
+
61
+ mul_node = helper.make_node("Mul", [inp, alpha_name], [mul_out], name=f"{prefix}_Mul")
62
+ add_node = helper.make_node("Add", [mul_out, beta_name], [add_out], name=f"{prefix}_Add")
63
+ clip_node = helper.make_node("Clip", [add_out, min_name, max_name], [out], name=f"{prefix}_Clip")
64
+
65
+ new_nodes.extend([mul_node, add_node, clip_node])
66
+ else:
67
+ new_nodes.append(n)
68
+
69
+ del graph.node[:]
70
+ graph.node.extend(new_nodes)
71
+ for init in initializers_to_add:
72
+ graph.initializer.append(init)
73
+
74
+ return hs_count
75
+
76
+
77
+ def fix_batch_dim(model: onnx.ModelProto):
78
+ for inp in model.graph.input:
79
+ shape = inp.type.tensor_type.shape
80
+ if shape and shape.dim:
81
+ d0 = shape.dim[0]
82
+ if d0.dim_param or d0.dim_value != 1:
83
+ d0.dim_param = ""
84
+ d0.dim_value = 1
85
+
86
+
87
+ def main():
88
+ ap = argparse.ArgumentParser()
89
+ ap.add_argument("--input", default="rtmpose_m_256x192.onnx")
90
+ ap.add_argument("--output", default="rtmpose_m_256x192_no_hs.onnx")
91
+ args = ap.parse_args()
92
+
93
+ model = onnx.load(args.input)
94
+ count = replace_hardsigmoid(model)
95
+ print(f"Replaced {count} HardSigmoid -> Mul+Add+Clip")
96
+
97
+ fix_batch_dim(model)
98
+ print("Fixed dynamic batch dim -> 1")
99
+
100
+ onnx.save(model, args.output)
101
+ print(f"Saved: {args.output}")
102
+
103
+ import onnxruntime as ort
104
+
105
+ sess = ort.InferenceSession(args.output, providers=["CPUExecutionProvider"])
106
+ inp = sess.get_inputs()[0]
107
+ dummy = np.random.randn(*inp.shape).astype(np.float32)
108
+ outs = sess.run(None, {inp.name: dummy})
109
+ print(f"Verify OK: {[o.shape for o in outs]}")
110
+
111
+
112
+ if __name__ == "__main__":
113
+ main()
result_onnx.jpg ADDED

Git LFS Details

  • SHA256: f9799f41d5d5077e32bc4f512f92ef4a0097dd34bc17edb2572c43764b850158
  • Pointer size: 131 Bytes
  • Size of remote file: 106 kB
rtmpose_cali.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fa0db5fe95a54b8959bc9709bdb8427dc2820b4e9362abeb871c05a0e79d3b6
3
+ size 1259520
rtmpose_m_256x192.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f0bbe036e98aa62913e8b2b1e523cb7d614e1be3255fe9cf22df3c4a8342bd3
3
+ size 54330887
rtmpose_m_256x192_no_hs.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87bd9ae001505183d5489065663728728ed6b0a576a11785dcc4efefbe5d4796
3
+ size 54332145
test.jpg ADDED

Git LFS Details

  • SHA256: 83981537a7baeafbeb9c8cb67b3484dc26433f574b3685d021fa537e277e4726
  • Pointer size: 131 Bytes
  • Size of remote file: 134 kB