| |
| """Replace HardSigmoid with Mul + Add + Clip in RTMPose ONNX. |
| |
| Replacing HardSigmoid with standard ops (Mul/Add/Clip) |
| allows FP32 or U16 quantization on these nodes. |
| |
| Equivalent: HardSigmoid(x) = Clip(x * alpha + beta, 0, 1) |
| """ |
|
|
| import argparse |
|
|
| import numpy as np |
| import onnx |
| from onnx import TensorProto, helper, numpy_helper |
|
|
|
|
| def replace_hardsigmoid(model: onnx.ModelProto) -> int: |
| graph = model.graph |
| nodes = list(graph.node) |
|
|
| new_nodes = [] |
| initializers_to_add = [] |
| hs_count = 0 |
|
|
| for n in nodes: |
| if n.op_type == "HardSigmoid": |
| hs_count += 1 |
| inp = n.input[0] |
| out = n.output[0] |
| prefix = f"hs_replace_{hs_count}" |
|
|
| alpha = 0.2 |
| beta = 0.5 |
| for attr in n.attribute: |
| if attr.name == "alpha": |
| alpha = attr.f |
| elif attr.name == "beta": |
| beta = attr.f |
|
|
| alpha_name = f"{prefix}_alpha" |
| beta_name = f"{prefix}_beta" |
| min_name = f"{prefix}_min" |
| max_name = f"{prefix}_max" |
|
|
| initializers_to_add.append( |
| numpy_helper.from_array(np.array([alpha], dtype=np.float32), alpha_name) |
| ) |
| initializers_to_add.append( |
| numpy_helper.from_array(np.array([beta], dtype=np.float32), beta_name) |
| ) |
| initializers_to_add.append( |
| numpy_helper.from_array(np.array([0.0], dtype=np.float32), min_name) |
| ) |
| initializers_to_add.append( |
| numpy_helper.from_array(np.array([1.0], dtype=np.float32), max_name) |
| ) |
|
|
| mul_out = f"{prefix}_mul_out" |
| add_out = f"{prefix}_add_out" |
|
|
| mul_node = helper.make_node("Mul", [inp, alpha_name], [mul_out], name=f"{prefix}_Mul") |
| add_node = helper.make_node("Add", [mul_out, beta_name], [add_out], name=f"{prefix}_Add") |
| clip_node = helper.make_node("Clip", [add_out, min_name, max_name], [out], name=f"{prefix}_Clip") |
|
|
| new_nodes.extend([mul_node, add_node, clip_node]) |
| else: |
| new_nodes.append(n) |
|
|
| del graph.node[:] |
| graph.node.extend(new_nodes) |
| for init in initializers_to_add: |
| graph.initializer.append(init) |
|
|
| return hs_count |
|
|
|
|
| def fix_batch_dim(model: onnx.ModelProto): |
| for inp in model.graph.input: |
| shape = inp.type.tensor_type.shape |
| if shape and shape.dim: |
| d0 = shape.dim[0] |
| if d0.dim_param or d0.dim_value != 1: |
| d0.dim_param = "" |
| d0.dim_value = 1 |
|
|
|
|
| def main(): |
| ap = argparse.ArgumentParser() |
| ap.add_argument("--input", default="rtmpose_m_256x192.onnx") |
| ap.add_argument("--output", default="rtmpose_m_256x192_no_hs.onnx") |
| args = ap.parse_args() |
|
|
| model = onnx.load(args.input) |
| count = replace_hardsigmoid(model) |
| print(f"Replaced {count} HardSigmoid -> Mul+Add+Clip") |
|
|
| fix_batch_dim(model) |
| print("Fixed dynamic batch dim -> 1") |
|
|
| onnx.save(model, args.output) |
| print(f"Saved: {args.output}") |
|
|
| import onnxruntime as ort |
|
|
| sess = ort.InferenceSession(args.output, providers=["CPUExecutionProvider"]) |
| inp = sess.get_inputs()[0] |
| dummy = np.random.randn(*inp.shape).astype(np.float32) |
| outs = sess.run(None, {inp.name: dummy}) |
| print(f"Verify OK: {[o.shape for o in outs]}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|