Upload 13 files

Browse files

Files changed (14) hide show

.gitattributes +5 -0
AX650/rtmpose_m_npu1.axmodel +3 -0
AX650/rtmpose_m_npu3.axmodel +3 -0
ax_infer.py +162 -0
ax_result.jpg +3 -0
config.json +235 -0
export_onnx.py +101 -0
onnx_infer.py +155 -0
replace_hardsigmoid.py +113 -0
result_onnx.jpg +3 -0
rtmpose_cali.tar +3 -0
rtmpose_m_256x192.onnx +3 -0
rtmpose_m_256x192_no_hs.onnx +3 -0
test.jpg +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+ax_result.jpg filter=lfs diff=lfs merge=lfs -text
+AX650/rtmpose_m_npu1.axmodel filter=lfs diff=lfs merge=lfs -text
+AX650/rtmpose_m_npu3.axmodel filter=lfs diff=lfs merge=lfs -text
+result_onnx.jpg filter=lfs diff=lfs merge=lfs -text
+test.jpg filter=lfs diff=lfs merge=lfs -text

AX650/rtmpose_m_npu1.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4ebc503bdf788009706c60876b3ecd6ed12a833888286868f4cf4208a1eb91e
+size 18848816

AX650/rtmpose_m_npu3.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17313d5d20b0070672bf8617960162ea2ac6f7b8da12b42b5bb4974c45570fa7
+size 18247844

ax_infer.py ADDED Viewed

	@@ -0,0 +1,162 @@

+#!/usr/bin/env python3
+"""
+RTMPose-M axengine inference on AXERA NPU.
+"""
+import argparse
+import os
+from time import time
+from typing import Tuple
+import cv2
+import numpy as np
+try:
+    import axengine as axe
+except ImportError:
+    import onnxruntime as axe
+SIMCC_SPLIT_RATIO = 2.0
+NUM_KP = 17
+COCO_SKELETON = [
+    (15, 13), (13, 11), (16, 14), (14, 12), (11, 12),
+    (5, 11), (6, 12), (5, 6), (5, 7), (6, 8),
+    (7, 9), (8, 10), (1, 2), (0, 1), (0, 2),
+    (1, 3), (2, 4), (3, 5), (4, 6),
+]
+def bbox_xyxy2cs(bbox: np.ndarray, padding: float = 1.0) -> Tuple[np.ndarray, np.ndarray]:
+    x1, y1, x2, y2 = bbox
+    center = np.array([(x1 + x2) * 0.5, (y1 + y2) * 0.5], dtype=np.float32)
+    scale = np.array([(x2 - x1) * padding, (y2 - y1) * padding], dtype=np.float32)
+    return center, scale
+def _fix_aspect_ratio(bbox_scale: np.ndarray, aspect_ratio: float) -> np.ndarray:
+    w, h = bbox_scale
+    if w > h * aspect_ratio:
+        return np.array([w, w / aspect_ratio], dtype=np.float32)
+    else:
+        return np.array([h * aspect_ratio, h], dtype=np.float32)
+def _rotate_point(pt: np.ndarray, angle_rad: float) -> np.ndarray:
+    sn, cs = np.sin(angle_rad), np.cos(angle_rad)
+    return np.array([cs * pt[0] - sn * pt[1], sn * pt[0] + cs * pt[1]])
+def _get_3rd_point(a: np.ndarray, b: np.ndarray) -> np.ndarray:
+    direction = a - b
+    return b + np.r_[-direction[1], direction[0]]
+def get_warp_matrix(center, scale, rot, output_size):
+    src_w = scale[0]
+    dst_w, dst_h = output_size
+    rot_rad = np.deg2rad(rot)
+    src_dir = _rotate_point(np.array([0.0, src_w * -0.5]), rot_rad)
+    dst_dir = np.array([0.0, dst_w * -0.5])
+    src_points = np.zeros((3, 2), dtype=np.float32)
+    src_points[0] = center
+    src_points[1] = center + src_dir
+    src_points[2] = _get_3rd_point(src_points[0], src_points[1])
+    dst_points = np.zeros((3, 2), dtype=np.float32)
+    dst_points[0] = [dst_w * 0.5, dst_h * 0.5]
+    dst_points[1] = [dst_w * 0.5, dst_h * 0.5] + dst_dir
+    dst_points[2] = _get_3rd_point(dst_points[0], dst_points[1])
+    return cv2.getAffineTransform(src_points, dst_points)
+def preprocess(img_bgr, input_size=(192, 256)):
+    h, w = img_bgr.shape[:2]
+    bbox = np.array([0, 0, w, h], dtype=np.float32)
+    center, scale = bbox_xyxy2cs(bbox, padding=1.25)
+    scale = _fix_aspect_ratio(scale, input_size[0] / input_size[1])
+    warp_mat = get_warp_matrix(center, scale, 0, input_size)
+    img_warped = cv2.warpAffine(img_bgr, warp_mat, input_size, flags=cv2.INTER_LINEAR)
+    inp = img_warped[None]  # (1, H, W, 3) uint8 NHWC BGR, axmodel handles BGR->RGB
+    return inp, center, scale
+def get_simcc_maximum(simcc_x, simcc_y):
+    N, K, Wx = simcc_x.shape
+    x_locs = np.argmax(simcc_x, axis=2)
+    y_locs = np.argmax(simcc_y, axis=2)
+    x_vals = np.take_along_axis(simcc_x, x_locs[:, :, None], axis=2).squeeze(2)
+    y_vals = np.take_along_axis(simcc_y, y_locs[:, :, None], axis=2).squeeze(2)
+    locs = np.stack([x_locs, y_locs], axis=-1).astype(np.float32)
+    scores = np.minimum(x_vals, y_vals)
+    return locs, scores
+def draw(img, keypoints, scores, thr=0.3):
+    for i, ((x, y), s) in enumerate(zip(keypoints, scores)):
+        if s < thr:
+            continue
+        cv2.circle(img, (int(x), int(y)), 4, (0, 255, 0), -1)
+    for i, j in COCO_SKELETON:
+        if scores[i] >= thr and scores[j] >= thr:
+            pt1 = (int(keypoints[i][0]), int(keypoints[i][1]))
+            pt2 = (int(keypoints[j][0]), int(keypoints[j][1]))
+            cv2.line(img, pt1, pt2, (255, 128, 0), 2)
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("-m", "--model", default="output/rtmpose_m_npu3.axmodel")
+    ap.add_argument("-i", "--image", required=True)
+    ap.add_argument("-o", "--output", default="ax_result.jpg")
+    ap.add_argument("--score_thres", type=float, default=0.3)
+    ap.add_argument("--warmup", type=int, default=3)
+    ap.add_argument("--repeat", type=int, default=10)
+    args = ap.parse_args()
+    img0 = cv2.imread(args.image)
+    assert img0 is not None, f"Cannot read {args.image}"
+    inp, center, scale = preprocess(img0)
+    input_size = (192, 256)
+    model = axe.InferenceSession(args.model)
+    inp_info = model.get_inputs()[0]
+    dtype_str = getattr(inp_info, "dtype", getattr(inp_info, "type", "unknown"))
+    print(f"Model input: name={inp_info.name}, shape={inp_info.shape}, dtype={dtype_str}")
+    for _ in range(args.warmup):
+        model.run(None, {inp_info.name: inp})
+    t0 = time()
+    for _ in range(args.repeat):
+        outputs = model.run(None, {inp_info.name: inp})
+    elapsed = (time() - t0) / args.repeat * 1000
+    print(f"Forward: {elapsed:.2f} ms (avg of {args.repeat} runs)")
+    simcc_x, simcc_y = outputs[0], outputs[1]
+    print(f"simcc_x: shape={simcc_x.shape}, range=[{simcc_x.min():.2f}, {simcc_x.max():.2f}]")
+    print(f"simcc_y: shape={simcc_y.shape}, range=[{simcc_y.min():.2f}, {simcc_y.max():.2f}]")
+    locs, scores = get_simcc_maximum(simcc_x, simcc_y)
+    keypoints = locs / SIMCC_SPLIT_RATIO
+    keypoints = keypoints / np.array(input_size) * scale + center - scale / 2
+    keypoints = keypoints[0]
+    scores = scores[0]
+    above = (scores >= args.score_thres).sum()
+    print(f"kpts above {args.score_thres}: {above}/{NUM_KP}")
+    for i, ((x, y), sc) in enumerate(zip(keypoints, scores)):
+        print(f"  kp{i:02d}: ({x:6.1f}, {y:6.1f})  score={sc:.4f}")
+    draw(img0, keypoints, scores, args.score_thres)
+    cv2.imwrite(args.output, img0)
+    print(f"Saved: {args.output}")
+if __name__ == "__main__":
+    main()

ax_result.jpg ADDED Viewed

Git LFS Details

SHA256: 6a51a0c154d54b6922e83d43c3ec544eee64557e2626aaf52d34864500d69a19
Pointer size: 131 Bytes
Size of remote file: 106 kB

config.json ADDED Viewed

	@@ -0,0 +1,235 @@

+{
+  "model_type": "ONNX",
+  "npu_mode": "NPU3",
+  "input_shapes": "input:1x3x256x192",
+  "quant": {
+    "input_configs": [
+      {
+        "tensor_name": "DEFAULT",
+        "calibration_dataset": "./rtmpose_cali.tar",
+        "calibration_size": 100,
+        "calibration_mean": [
+          123.675,
+          116.28,
+          103.53
+        ],
+        "calibration_std": [
+          58.395,
+          57.12,
+          57.375
+        ]
+      }
+    ],
+    "calibration_method": "MSE",
+    "layer_configs": [
+      {
+        "layer_names": [
+          "Conv_0",
+          "Conv_3",
+          "Conv_6",
+          "Conv_9",
+          "Conv_12",
+          "Conv_15",
+          "Conv_18",
+          "Conv_21",
+          "Conv_24",
+          "Conv_28",
+          "Conv_31",
+          "Conv_34",
+          "Conv_46",
+          "Conv_55",
+          "Conv_58",
+          "Conv_65",
+          "Conv_68",
+          "Conv_75",
+          "Conv_78",
+          "Conv_85",
+          "Conv_88",
+          "Conv_103",
+          "Conv_112",
+          "Conv_115",
+          "Conv_122",
+          "Conv_125",
+          "Conv_132",
+          "Conv_135",
+          "Conv_142",
+          "Conv_145",
+          "Conv_160",
+          "Conv_179",
+          "Conv_182",
+          "Conv_188",
+          "Conv_191",
+          "Conv_205",
+          "Conv_40",
+          "Conv_43",
+          "Conv_49",
+          "Conv_52",
+          "Conv_71",
+          "Conv_81",
+          "Conv_91",
+          "Conv_97",
+          "Conv_100",
+          "Conv_106",
+          "Conv_109",
+          "Conv_118",
+          "Conv_128",
+          "Conv_138",
+          "Conv_148",
+          "Conv_154",
+          "Conv_157",
+          "Conv_163",
+          "Conv_170",
+          "Conv_173",
+          "Conv_176",
+          "Conv_185",
+          "Conv_194",
+          "Conv_199",
+          "Conv_202",
+          "GlobalAveragePool_39",
+          "GlobalAveragePool_153",
+          "GlobalAveragePool_198",
+          "Concat_152",
+          "Concat_169",
+          "op_1:onnx.Silu",
+          "op_2:onnx.Silu",
+          "op_5:onnx.Silu",
+          "op_6:onnx.Silu",
+          "op_7:onnx.Silu",
+          "op_8:onnx.Silu",
+          "op_9:onnx.Silu",
+          "op_10:onnx.Silu",
+          "op_11:onnx.Silu",
+          "op_12:onnx.Silu",
+          "op_13:onnx.Silu",
+          "op_15:onnx.Silu",
+          "op_16:onnx.Silu",
+          "op_17:onnx.Silu",
+          "op_18:onnx.Silu",
+          "op_19:onnx.Silu",
+          "op_20:onnx.Silu",
+          "op_21:onnx.Silu",
+          "op_23:onnx.Silu",
+          "op_25:onnx.Silu",
+          "op_26:onnx.Silu",
+          "op_27:onnx.Silu",
+          "op_29:onnx.Silu",
+          "op_30:onnx.Silu",
+          "op_34:onnx.Silu",
+          "op_49:onnx.Silu",
+          "op_50:onnx.Silu",
+          "op_52:onnx.Silu",
+          "op_54:onnx.Silu",
+          "op_56:onnx.Silu",
+          "op_57:onnx.Silu",
+          "op_58:onnx.Silu",
+          "Add_64",
+          "Add_74",
+          "Add_84",
+          "Add_94",
+          "Add_121",
+          "Add_131",
+          "Add_141",
+          "Add_151",
+          "MaxPool_166",
+          "MaxPool_167",
+          "MaxPool_168",
+          "Reshape_212",
+          "Split_233",
+          "Unsqueeze_234",
+          "Split_237",
+          "Squeeze_238",
+          "Squeeze_239",
+          "Transpose_240"
+        ],
+        "data_type": "U16"
+      },
+      {
+        "layer_names": [
+          "Conv_61",
+          "op_32:onnx.Silu",
+          "op_43:onnx.Silu",
+          "op_53:onnx.Silu",
+          "op_4:onnx.Silu",
+          "op_14:onnx.Silu",
+          "op_22:onnx.Silu",
+          "op_33:onnx.Silu",
+          "op_35:onnx.Silu",
+          "op_37:onnx.Silu",
+          "op_38:onnx.Silu",
+          "op_39:onnx.Silu",
+          "op_40:onnx.Silu",
+          "hs_replace_1_Mul",
+          "hs_replace_1_Add",
+          "hs_replace_1_Clip",
+          "Mul_42",
+          "Concat_95",
+          "GlobalAveragePool_96",
+          "hs_replace_2_Mul",
+          "hs_replace_2_Add",
+          "hs_replace_2_Clip",
+          "Mul_99",
+          "hs_replace_3_Mul",
+          "hs_replace_3_Add",
+          "hs_replace_3_Clip",
+          "Mul_156",
+          "hs_replace_4_Mul",
+          "hs_replace_4_Add",
+          "hs_replace_4_Clip",
+          "Mul_201",
+          "op_66:onnx.RMSNormalization",
+          "op_61:onnx.FullyConnected",
+          "Mul_249",
+          "op_67:onnx.RMSNormalization",
+          "op_62:onnx.FullyConnected",
+          "op_36:onnx.Silu",
+          "Mul_235",
+          "Add_236",
+          "MatMul_241",
+          "op_68:onnx.Mul",
+          "Relu_244",
+          "Mul_245",
+          "MatMul_246",
+          "Mul_247",
+          "op_63:onnx.FullyConnected",
+          "Add_250",
+          "op_64:onnx.FullyConnected",
+          "op_65:onnx.FullyConnected",
+          "op_48:onnx.Silu",
+          "op_41:onnx.Silu",
+          "op_42:onnx.Silu",
+          "op_44:onnx.Silu",
+          "op_45:onnx.Silu",
+          "op_46:onnx.Silu",
+          "op_47:onnx.Silu",
+          "Add_27",
+          "Add_37",
+          "op_24:onnx.Silu",
+          "Concat_38",
+          "op_28:onnx.Silu",
+          "Concat_197",
+          "op_3:onnx.Silu",
+          "op_55:onnx.Silu",
+          "op_31:onnx.Silu",
+          "op_51:onnx.Silu"
+        ],
+        "data_type": "FP32",
+        "weight_data_type": "FP32",
+        "output_data_type": "FP32"
+      }
+    ],
+    "precision_analysis": true,
+    "precision_analysis_method": "EndToEnd"
+  },
+  "input_processors": [
+    {
+      "tensor_name": "DEFAULT",
+      "tensor_format": "RGB",
+      "src_format": "BGR",
+      "src_dtype": "U8",
+      "src_layout": "NHWC"
+    }
+  ],
+  "compiler": {
+    "check": 0
+  }
+}

export_onnx.py ADDED Viewed

	@@ -0,0 +1,101 @@

+#!/usr/bin/env python3
+"""Export RTMPose-M 256x192 ONNX from official OpenMMLab pretrained model.
+Downloads the official pre-exported ONNX from OpenMMLab model zoo,
+converts opset if needed, and fixes the batch dimension to static 1
+Model: RTMPose-M (13.58M params)
+Input: 1x3x256x192 (RGB, float32)
+Output: simcc_x (1,17,384), simcc_y (1,17,512)
+"""
+import argparse
+import io
+import os
+import zipfile
+import numpy as np
+import onnx
+import onnx.version_converter
+import requests
+ONNX_URL = (
+    "https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/"
+    "onnx_sdk/rtmpose-m_simcc-body7_pt-body7_420e-256x192-"
+    "e48f03d0_20230504.zip"
+)
+CACHE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".model_cache")
+def download_onnx() -> str:
+    """Download official RTMPose-M ONNX and cache locally."""
+    cache_path = os.path.join(CACHE_DIR, "rtmpose_m_official.onnx")
+    if os.path.exists(cache_path):
+        print(f"Using cached ONNX: {cache_path}")
+        return cache_path
+    os.makedirs(CACHE_DIR, exist_ok=True)
+    print("Downloading official RTMPose-M ONNX from OpenMMLab...")
+    resp = requests.get(ONNX_URL, timeout=120)
+    resp.raise_for_status()
+    with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
+        for name in zf.namelist():
+            if name.endswith(".onnx"):
+                with zf.open(name) as src, open(cache_path, "wb") as dst:
+                    dst.write(src.read())
+                print(f"Cached: {cache_path}")
+                return cache_path
+    raise RuntimeError("No .onnx found in downloaded zip")
+def convert_opset(model: onnx.ModelProto, target_opset: int) -> onnx.ModelProto:
+    """Convert ONNX model to target opset version if needed."""
+    current_opset = model.opset_import[0].version
+    if current_opset == target_opset:
+        return model
+    print(f"Converting opset {current_opset} -> {target_opset}")
+    return onnx.version_converter.convert_version(model, target_opset)
+def fix_batch_dim(model: onnx.ModelProto, batch: int = 1) -> None:
+    """Replace dynamic batch dim (dim_param) with static dim_value."""
+    for tensor in list(model.graph.input) + list(model.graph.output):
+        dim0 = tensor.type.tensor_type.shape.dim[0]
+        if dim0.dim_param:
+            dim0.ClearField("dim_param")
+            dim0.dim_value = batch
+def print_model_info(model: onnx.ModelProto) -> None:
+    """Print model parameter count and IO shapes."""
+    total_params = sum(int(np.prod(init.dims)) for init in model.graph.initializer)
+    print(f"Parameters: {total_params / 1e6:.2f}M")
+    for inp in model.graph.input:
+        dims = [d.dim_value for d in inp.type.tensor_type.shape.dim]
+        print(f"  Input:  {inp.name} {dims}")
+    for out in model.graph.output:
+        dims = [d.dim_value for d in out.type.tensor_type.shape.dim]
+        print(f"  Output: {out.name} {dims}")
+def main():
+    ap = argparse.ArgumentParser(description="Export RTMPose-M 256x192 ONNX")
+    ap.add_argument("--opset", type=int, default=13, help="Target ONNX opset version")
+    ap.add_argument("--output", default="rtmpose_m_256x192.onnx", help="Output path")
+    ap.add_argument("--batch", type=int, default=1, help="Static batch size")
+    args = ap.parse_args()
+    source_path = download_onnx()
+    model = onnx.load(source_path)
+    model = convert_opset(model, args.opset)
+    fix_batch_dim(model, args.batch)
+    onnx.save(model, args.output)
+    print(f"\nExported: {args.output} ({os.path.getsize(args.output) / 1e6:.2f} MB)")
+    print_model_info(model)
+if __name__ == "__main__":
+    main()

onnx_infer.py ADDED Viewed

	@@ -0,0 +1,155 @@

+#!/usr/bin/env python3
+"""
+ONNX Runtime inference for RTMPose-M 256x192
+"""
+import argparse
+import os
+from typing import Tuple
+import cv2
+import numpy as np
+import onnxruntime as ort
+SIMCC_SPLIT_RATIO = 2.0
+NUM_KP = 17
+MEAN = np.array([123.675, 116.28, 103.53], dtype=np.float32)
+STD = np.array([58.395, 57.12, 57.375], dtype=np.float32)
+COCO_SKELETON = [
+    (15, 13), (13, 11), (16, 14), (14, 12), (11, 12),
+    (5, 11), (6, 12), (5, 6), (5, 7), (6, 8),
+    (7, 9), (8, 10), (1, 2), (0, 1), (0, 2),
+    (1, 3), (2, 4), (3, 5), (4, 6),
+]
+def bbox_xyxy2cs(bbox: np.ndarray, padding: float = 1.0) -> Tuple[np.ndarray, np.ndarray]:
+    dim = bbox.ndim
+    if dim == 1:
+        bbox = bbox[None, :]
+    x1, y1, x2, y2 = np.hsplit(bbox, [1, 2, 3])
+    center = np.hstack([x1 + x2, y1 + y2]) * 0.5
+    scale = np.hstack([x2 - x1, y2 - y1]) * padding
+    if dim == 1:
+        center = center[0]
+        scale = scale[0]
+    return center, scale
+def _fix_aspect_ratio(bbox_scale: np.ndarray, aspect_ratio: float) -> np.ndarray:
+    w, h = np.hsplit(bbox_scale, [1])
+    return np.where(w > h * aspect_ratio,
+                    np.hstack([w, w / aspect_ratio]),
+                    np.hstack([h * aspect_ratio, h]))
+def _rotate_point(pt: np.ndarray, angle_rad: float) -> np.ndarray:
+    sn, cs = np.sin(angle_rad), np.cos(angle_rad)
+    return np.array([cs * pt[0] - sn * pt[1], sn * pt[0] + cs * pt[1]])
+def _get_3rd_point(a: np.ndarray, b: np.ndarray) -> np.ndarray:
+    direction = a - b
+    return b + np.r_[-direction[1], direction[0]]
+def get_warp_matrix(center, scale, rot, output_size):
+    shift = np.array([0, 0], dtype=np.float32)
+    src_w = scale[0]
+    dst_w, dst_h = output_size
+    rot_rad = np.deg2rad(rot)
+    src_dir = _rotate_point(np.array([0.0, src_w * -0.5]), rot_rad)
+    dst_dir = np.array([0.0, dst_w * -0.5])
+    src_points = np.zeros((3, 2), dtype=np.float32)
+    src_points[0] = center + scale * shift
+    src_points[1] = center + src_dir + scale * shift
+    src_points[2] = _get_3rd_point(src_points[0], src_points[1])
+    dst_points = np.zeros((3, 2), dtype=np.float32)
+    dst_points[0] = [dst_w * 0.5, dst_h * 0.5]
+    dst_points[1] = [dst_w * 0.5, dst_h * 0.5] + dst_dir
+    dst_points[2] = _get_3rd_point(dst_points[0], dst_points[1])
+    return cv2.getAffineTransform(src_points, dst_points)
+def preprocess(img_bgr, input_size=(192, 256)):
+    h, w = img_bgr.shape[:2]
+    bbox = np.array([0, 0, w, h], dtype=np.float32)
+    center, scale = bbox_xyxy2cs(bbox, padding=1.25)
+    scale = _fix_aspect_ratio(scale.reshape(1, 2), input_size[0] / input_size[1])[0]
+    warp_mat = get_warp_matrix(center, scale, 0, input_size)
+    img_warped = cv2.warpAffine(img_bgr, warp_mat, input_size, flags=cv2.INTER_LINEAR)
+    img_rgb = cv2.cvtColor(img_warped, cv2.COLOR_BGR2RGB).astype(np.float32)
+    img_rgb = (img_rgb - MEAN) / STD
+    inp = img_rgb.transpose(2, 0, 1)[None]
+    return inp, center, scale
+def get_simcc_maximum(simcc_x, simcc_y):
+    N, K, Wx = simcc_x.shape
+    Wy = simcc_y.shape[2]
+    x_locs = np.argmax(simcc_x, axis=2)
+    y_locs = np.argmax(simcc_y, axis=2)
+    x_vals = np.take_along_axis(simcc_x, x_locs[:, :, None], axis=2).squeeze(2)
+    y_vals = np.take_along_axis(simcc_y, y_locs[:, :, None], axis=2).squeeze(2)
+    locs = np.stack([x_locs, y_locs], axis=-1).astype(np.float32)
+    scores = np.minimum(x_vals, y_vals)
+    return locs, scores
+def draw(img, keypoints, scores, thr=0.3):
+    for i, ((x, y), s) in enumerate(zip(keypoints, scores)):
+        if s < thr:
+            continue
+        cv2.circle(img, (int(x), int(y)), 4, (0, 255, 0), -1)
+    for i, j in COCO_SKELETON:
+        if scores[i] >= thr and scores[j] >= thr:
+            pt1 = (int(keypoints[i][0]), int(keypoints[i][1]))
+            pt2 = (int(keypoints[j][0]), int(keypoints[j][1]))
+            cv2.line(img, pt1, pt2, (255, 128, 0), 2)
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("-m", "--model", default="rtmpose_m_256x192.onnx")
+    ap.add_argument("-i", "--image", required=True)
+    ap.add_argument("-o", "--output", default="onnx_result.jpg")
+    ap.add_argument("--score_thres", type=float, default=0.3)
+    args = ap.parse_args()
+    img0 = cv2.imread(args.image)
+    assert img0 is not None, f"Cannot read {args.image}"
+    inp, center, scale = preprocess(img0)
+    input_size = (192, 256)
+    sess = ort.InferenceSession(args.model, providers=["CPUExecutionProvider"])
+    outputs = sess.run(None, {sess.get_inputs()[0].name: inp})
+    simcc_x, simcc_y = outputs[0], outputs[1]
+    print(f"simcc_x: shape={simcc_x.shape}, range=[{simcc_x.min():.2f}, {simcc_x.max():.2f}]")
+    print(f"simcc_y: shape={simcc_y.shape}, range=[{simcc_y.min():.2f}, {simcc_y.max():.2f}]")
+    locs, scores = get_simcc_maximum(simcc_x, simcc_y)
+    keypoints = locs / SIMCC_SPLIT_RATIO
+    keypoints = keypoints / np.array(input_size) * scale + center - scale / 2
+    keypoints = keypoints[0]
+    scores = scores[0]
+    above = (scores >= args.score_thres).sum()
+    print(f"kpts above {args.score_thres}: {above}/{NUM_KP}")
+    for i, ((x, y), s) in enumerate(zip(keypoints, scores)):
+        print(f"  kp{i:02d}: ({x:6.1f}, {y:6.1f})  score={s:.3f}")
+    draw(img0, keypoints, scores, args.score_thres)
+    cv2.imwrite(args.output, img0)
+    print(f"Saved: {args.output}")
+if __name__ == "__main__":
+    main()

replace_hardsigmoid.py ADDED Viewed

	@@ -0,0 +1,113 @@

+#!/usr/bin/env python3
+"""Replace HardSigmoid with Mul + Add + Clip in RTMPose ONNX.
+Replacing HardSigmoid with standard ops (Mul/Add/Clip)
+allows FP32 or U16 quantization on these nodes.
+Equivalent: HardSigmoid(x) = Clip(x * alpha + beta, 0, 1)
+"""
+import argparse
+import numpy as np
+import onnx
+from onnx import TensorProto, helper, numpy_helper
+def replace_hardsigmoid(model: onnx.ModelProto) -> int:
+    graph = model.graph
+    nodes = list(graph.node)
+    new_nodes = []
+    initializers_to_add = []
+    hs_count = 0
+    for n in nodes:
+        if n.op_type == "HardSigmoid":
+            hs_count += 1
+            inp = n.input[0]
+            out = n.output[0]
+            prefix = f"hs_replace_{hs_count}"
+            alpha = 0.2
+            beta = 0.5
+            for attr in n.attribute:
+                if attr.name == "alpha":
+                    alpha = attr.f
+                elif attr.name == "beta":
+                    beta = attr.f
+            alpha_name = f"{prefix}_alpha"
+            beta_name = f"{prefix}_beta"
+            min_name = f"{prefix}_min"
+            max_name = f"{prefix}_max"
+            initializers_to_add.append(
+                numpy_helper.from_array(np.array([alpha], dtype=np.float32), alpha_name)
+            )
+            initializers_to_add.append(
+                numpy_helper.from_array(np.array([beta], dtype=np.float32), beta_name)
+            )
+            initializers_to_add.append(
+                numpy_helper.from_array(np.array([0.0], dtype=np.float32), min_name)
+            )
+            initializers_to_add.append(
+                numpy_helper.from_array(np.array([1.0], dtype=np.float32), max_name)
+            )
+            mul_out = f"{prefix}_mul_out"
+            add_out = f"{prefix}_add_out"
+            mul_node = helper.make_node("Mul", [inp, alpha_name], [mul_out], name=f"{prefix}_Mul")
+            add_node = helper.make_node("Add", [mul_out, beta_name], [add_out], name=f"{prefix}_Add")
+            clip_node = helper.make_node("Clip", [add_out, min_name, max_name], [out], name=f"{prefix}_Clip")
+            new_nodes.extend([mul_node, add_node, clip_node])
+        else:
+            new_nodes.append(n)
+    del graph.node[:]
+    graph.node.extend(new_nodes)
+    for init in initializers_to_add:
+        graph.initializer.append(init)
+    return hs_count
+def fix_batch_dim(model: onnx.ModelProto):
+    for inp in model.graph.input:
+        shape = inp.type.tensor_type.shape
+        if shape and shape.dim:
+            d0 = shape.dim[0]
+            if d0.dim_param or d0.dim_value != 1:
+                d0.dim_param = ""
+                d0.dim_value = 1
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--input", default="rtmpose_m_256x192.onnx")
+    ap.add_argument("--output", default="rtmpose_m_256x192_no_hs.onnx")
+    args = ap.parse_args()
+    model = onnx.load(args.input)
+    count = replace_hardsigmoid(model)
+    print(f"Replaced {count} HardSigmoid -> Mul+Add+Clip")
+    fix_batch_dim(model)
+    print("Fixed dynamic batch dim -> 1")
+    onnx.save(model, args.output)
+    print(f"Saved: {args.output}")
+    import onnxruntime as ort
+    sess = ort.InferenceSession(args.output, providers=["CPUExecutionProvider"])
+    inp = sess.get_inputs()[0]
+    dummy = np.random.randn(*inp.shape).astype(np.float32)
+    outs = sess.run(None, {inp.name: dummy})
+    print(f"Verify OK: {[o.shape for o in outs]}")
+if __name__ == "__main__":
+    main()

result_onnx.jpg ADDED Viewed

Git LFS Details

SHA256: f9799f41d5d5077e32bc4f512f92ef4a0097dd34bc17edb2572c43764b850158
Pointer size: 131 Bytes
Size of remote file: 106 kB

rtmpose_cali.tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3fa0db5fe95a54b8959bc9709bdb8427dc2820b4e9362abeb871c05a0e79d3b6
+size 1259520

rtmpose_m_256x192.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3f0bbe036e98aa62913e8b2b1e523cb7d614e1be3255fe9cf22df3c4a8342bd3
+size 54330887

rtmpose_m_256x192_no_hs.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:87bd9ae001505183d5489065663728728ed6b0a576a11785dcc4efefbe5d4796
+size 54332145

test.jpg ADDED Viewed

Git LFS Details

SHA256: 83981537a7baeafbeb9c8cb67b3484dc26433f574b3685d021fa537e277e4726
Pointer size: 131 Bytes
Size of remote file: 134 kB