0xiviel commited on
Commit
a640dc8
·
verified ·
1 Parent(s): 50e0517

PoC: ExecuTorch DataLoader offset+size integer overflow (CWE-190 -> CWE-125)

Browse files
Files changed (1) hide show
  1. poc_F2_dataloader_offset_overflow.py +316 -0
poc_F2_dataloader_offset_overflow.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ ExecuTorch DataLoader offset+size Integer Overflow (CWE-190 -> CWE-125)
4
+ =======================================================================
5
+
6
+ Target: ExecuTorch (pytorch/executorch)
7
+ Commit: 90e6e4ca4ef369ce4288ffcd2a0210d5137117dd
8
+
9
+ Affected Files:
10
+ - runtime/executor/mmap_data_loader.cpp:163
11
+ https://github.com/pytorch/executorch/blob/90e6e4ca4ef369ce4288ffcd2a0210d5137117dd/runtime/executor/mmap_data_loader.cpp#L163
12
+ - runtime/executor/file_data_loader.cpp:150
13
+ https://github.com/pytorch/executorch/blob/90e6e4ca4ef369ce4288ffcd2a0210d5137117dd/runtime/executor/file_data_loader.cpp#L150
14
+ - runtime/executor/file_descriptor_data_loader.cpp:161
15
+ https://github.com/pytorch/executorch/blob/90e6e4ca4ef369ce4288ffcd2a0210d5137117dd/runtime/executor/file_descriptor_data_loader.cpp#L161
16
+
17
+ Safe Reference (uses overflow check):
18
+ - extension/data_loader/buffer_data_loader.h:38-41
19
+ https://github.com/pytorch/executorch/blob/90e6e4ca4ef369ce4288ffcd2a0210d5137117dd/extension/data_loader/buffer_data_loader.h#L38-L41
20
+
21
+ Additionally affected — segment offset calculations:
22
+ - runtime/executor/program.cpp:96 (segment_base_offset + segment_data_size)
23
+ - runtime/executor/program.cpp:504 (segment_base_offset_ + segment->offset())
24
+ - runtime/executor/program.cpp:589 (segment_base_offset_ + segment->offset() + segment_info->segment_index())
25
+
26
+ CWE-190: Integer Overflow or Wraparound
27
+ CWE-125: Out-of-bounds Read
28
+
29
+ Description:
30
+ 3 of 4 DataLoader implementations in ExecuTorch check `offset + size <= file_size_`
31
+ to validate that a Load request stays within bounds. However, when offset and size
32
+ are both attacker-controlled 64-bit values from a malicious .pte file, their sum
33
+ can overflow past UINT64_MAX and wrap around to a small value, bypassing the check.
34
+
35
+ BufferDataLoader is the ONLY implementation that correctly uses c10::add_overflows()
36
+ to detect the wraparound before performing the comparison.
37
+
38
+ A malicious .pte model file controls these values through the FlatBuffer schema:
39
+ - DataSegment.offset and DataSegment.size in the Program flatbuffer
40
+ - These flow directly into DataLoader::load(offset, size) calls
41
+
42
+ Impact:
43
+ An attacker crafting a malicious .pte file can cause out-of-bounds memory reads
44
+ (and potentially writes via mmap) by overflowing the offset+size bounds check.
45
+ This can lead to information disclosure or code execution.
46
+ """
47
+
48
+ import struct
49
+ import sys
50
+
51
+ UINT64_MAX = (1 << 64) - 1
52
+
53
+ def simulate_unsafe_check(offset: int, size: int, file_size: int) -> dict:
54
+ """
55
+ Simulates the UNSAFE bounds check used in 3 of 4 DataLoaders:
56
+
57
+ if (offset + size > file_size_) {
58
+ return Error::InvalidArgument;
59
+ }
60
+
61
+ This is vulnerable because offset + size can overflow uint64_t.
62
+ """
63
+ # C++ uint64_t addition wraps modulo 2^64
64
+ wrapped_sum = (offset + size) & UINT64_MAX
65
+ check_passes = wrapped_sum <= file_size
66
+ return {
67
+ "offset": offset,
68
+ "size": size,
69
+ "file_size": file_size,
70
+ "offset_plus_size_wrapped": wrapped_sum,
71
+ "offset_plus_size_true": offset + size,
72
+ "check_passes": check_passes,
73
+ "is_actually_valid": (offset + size) <= file_size, # Python arbitrary precision
74
+ }
75
+
76
+
77
+ def simulate_safe_check(offset: int, size: int, file_size: int) -> dict:
78
+ """
79
+ Simulates the SAFE bounds check used in BufferDataLoader:
80
+
81
+ size_t total;
82
+ if (c10::add_overflows(offset, size, &total) || total > file_size_) {
83
+ return Error::InvalidArgument;
84
+ }
85
+
86
+ c10::add_overflows() detects the wraparound and rejects it.
87
+ """
88
+ true_sum = offset + size
89
+ overflows = true_sum > UINT64_MAX
90
+ if overflows:
91
+ check_passes = False # add_overflows catches it
92
+ else:
93
+ check_passes = true_sum <= file_size
94
+ return {
95
+ "offset": offset,
96
+ "size": size,
97
+ "file_size": file_size,
98
+ "overflow_detected": overflows,
99
+ "check_passes": check_passes,
100
+ "is_actually_valid": true_sum <= file_size,
101
+ }
102
+
103
+
104
+ def print_result(label: str, result: dict, safe: bool = False):
105
+ status = "PASS (allows load)" if result["check_passes"] else "FAIL (rejects load)"
106
+ valid = "YES" if result["is_actually_valid"] else "NO"
107
+ print(f" [{label}]")
108
+ print(f" offset = 0x{result['offset']:016X} ({result['offset']})")
109
+ print(f" size = 0x{result['size']:016X} ({result['size']})")
110
+ print(f" file_size = 0x{result['file_size']:016X} ({result['file_size']})")
111
+ if not safe:
112
+ print(f" offset+size (uint64 wrapped) = 0x{result['offset_plus_size_wrapped']:016X} ({result['offset_plus_size_wrapped']})")
113
+ print(f" offset+size (true) = 0x{result['offset_plus_size_true']:X}")
114
+ else:
115
+ print(f" overflow_detected = {result['overflow_detected']}")
116
+ print(f" Bounds check: {status}")
117
+ print(f" Actually within file? {valid}")
118
+ if result["check_passes"] and not result["is_actually_valid"]:
119
+ print(f" >>> VULNERABILITY: check passes but access is OUT OF BOUNDS <<<")
120
+ print()
121
+
122
+
123
+ def main():
124
+ print("=" * 78)
125
+ print("ExecuTorch DataLoader offset+size Integer Overflow PoC")
126
+ print("CWE-190 (Integer Overflow) -> CWE-125 (Out-of-bounds Read)")
127
+ print("=" * 78)
128
+ print()
129
+
130
+ # -------------------------------------------------------------------------
131
+ # Scenario 1: Classic overflow — large offset + small size wraps to small value
132
+ # -------------------------------------------------------------------------
133
+ print("-" * 78)
134
+ print("SCENARIO 1: Classic overflow (large offset + small size)")
135
+ print("-" * 78)
136
+ print()
137
+ print(" Attacker sets offset=0xFFFFFFFFFFFFFFF5, size=100 in malicious .pte file.")
138
+ print(" Real file is only 1024 bytes.")
139
+ print()
140
+
141
+ file_size = 1024
142
+ offset = 0xFFFFFFFFFFFFFFF5 # Very large, near UINT64_MAX
143
+ size = 100
144
+
145
+ # Show the math explicitly
146
+ true_sum = offset + size
147
+ wrapped = true_sum & UINT64_MAX
148
+ print(f" Math:")
149
+ print(f" 0xFFFFFFFFFFFFFFF5 + 100 = 0x{true_sum:X}")
150
+ print(f" Truncated to uint64: 0x{wrapped:016X} = {wrapped}")
151
+ print(f" {wrapped} <= {file_size}? {'YES => check passes!' if wrapped <= file_size else 'NO'}")
152
+ print()
153
+
154
+ unsafe = simulate_unsafe_check(offset, size, file_size)
155
+ safe = simulate_safe_check(offset, size, file_size)
156
+ print_result("UNSAFE (mmap/file/fd DataLoader)", unsafe, safe=False)
157
+ print_result("SAFE (BufferDataLoader)", safe, safe=True)
158
+
159
+ # -------------------------------------------------------------------------
160
+ # Scenario 2: Offset just barely overflows
161
+ # -------------------------------------------------------------------------
162
+ print("-" * 78)
163
+ print("SCENARIO 2: Minimal overflow — offset = UINT64_MAX, size = 1")
164
+ print("-" * 78)
165
+ print()
166
+
167
+ offset2 = UINT64_MAX
168
+ size2 = 1
169
+ file_size2 = 4096
170
+
171
+ wrapped2 = (offset2 + size2) & UINT64_MAX
172
+ print(f" Math:")
173
+ print(f" 0x{offset2:016X} + 1 = 0x{(offset2+size2):X}")
174
+ print(f" Truncated to uint64: 0x{wrapped2:016X} = {wrapped2}")
175
+ print(f" {wrapped2} <= {file_size2}? {'YES => check passes!' if wrapped2 <= file_size2 else 'NO'}")
176
+ print()
177
+
178
+ unsafe2 = simulate_unsafe_check(offset2, size2, file_size2)
179
+ safe2 = simulate_safe_check(offset2, size2, file_size2)
180
+ print_result("UNSAFE (mmap/file/fd DataLoader)", unsafe2, safe=False)
181
+ print_result("SAFE (BufferDataLoader)", safe2, safe=True)
182
+
183
+ # -------------------------------------------------------------------------
184
+ # Scenario 3: Both offset and size are large
185
+ # -------------------------------------------------------------------------
186
+ print("-" * 78)
187
+ print("SCENARIO 3: Both offset and size large — read 1GB at offset near UINT64_MAX")
188
+ print("-" * 78)
189
+ print()
190
+
191
+ size3 = 1 * 1024 * 1024 * 1024 # 1 GB
192
+ offset3 = UINT64_MAX - size3 + 2 # Wraps to exactly 1
193
+ file_size3 = 1024 * 1024 # 1 MB file
194
+
195
+ wrapped3 = (offset3 + size3) & UINT64_MAX
196
+ print(f" offset = UINT64_MAX - 1GB + 2 = 0x{offset3:016X}")
197
+ print(f" size = 1 GB = 0x{size3:016X}")
198
+ print(f" file = 1 MB = 0x{file_size3:016X}")
199
+ print(f" Math:")
200
+ print(f" offset + size = 0x{(offset3 + size3):X}")
201
+ print(f" Truncated: 0x{wrapped3:016X} = {wrapped3}")
202
+ print(f" {wrapped3} <= {file_size3}? {'YES => check passes!' if wrapped3 <= file_size3 else 'NO'}")
203
+ print()
204
+
205
+ unsafe3 = simulate_unsafe_check(offset3, size3, file_size3)
206
+ safe3 = simulate_safe_check(offset3, size3, file_size3)
207
+ print_result("UNSAFE (mmap/file/fd DataLoader)", unsafe3, safe=False)
208
+ print_result("SAFE (BufferDataLoader)", safe3, safe=True)
209
+
210
+ # -------------------------------------------------------------------------
211
+ # Scenario 4: Legitimate access (no overflow)
212
+ # -------------------------------------------------------------------------
213
+ print("-" * 78)
214
+ print("SCENARIO 4: Legitimate access (sanity check — no overflow)")
215
+ print("-" * 78)
216
+ print()
217
+
218
+ offset4 = 256
219
+ size4 = 512
220
+ file_size4 = 1024
221
+
222
+ unsafe4 = simulate_unsafe_check(offset4, size4, file_size4)
223
+ safe4 = simulate_safe_check(offset4, size4, file_size4)
224
+ print_result("UNSAFE (mmap/file/fd DataLoader)", unsafe4, safe=False)
225
+ print_result("SAFE (BufferDataLoader)", safe4, safe=True)
226
+
227
+ # -------------------------------------------------------------------------
228
+ # Code comparison
229
+ # -------------------------------------------------------------------------
230
+ print("=" * 78)
231
+ print("CODE COMPARISON")
232
+ print("=" * 78)
233
+ print()
234
+ print("VULNERABLE (mmap_data_loader.cpp:163, file_data_loader.cpp:150,")
235
+ print(" file_descriptor_data_loader.cpp:161):")
236
+ print()
237
+ print(' if (offset + size > file_size_) {')
238
+ print(' ET_LOG(Error, "offset %zu + size %zu > file_size_ %zu",')
239
+ print(' offset, size, file_size_);')
240
+ print(' return Error::InvalidArgument;')
241
+ print(' }')
242
+ print()
243
+ print("SAFE (buffer_data_loader.h:38-41):")
244
+ print()
245
+ print(' size_t total;')
246
+ print(' if (c10::add_overflows(offset, size, &total) || total > data_size_) {')
247
+ print(' return Error::InvalidArgument;')
248
+ print(' }')
249
+ print()
250
+
251
+ # -------------------------------------------------------------------------
252
+ # Segment offset overflows in program.cpp
253
+ # -------------------------------------------------------------------------
254
+ print("=" * 78)
255
+ print("RELATED: Segment Offset Overflows in program.cpp")
256
+ print("=" * 78)
257
+ print()
258
+ print("The same pattern appears in segment offset calculations:")
259
+ print()
260
+
261
+ # program.cpp:96 — segment_base_offset + segment_data_size
262
+ print(" program.cpp:96:")
263
+ print(" size_t segment_base_offset = program_data_size;")
264
+ print(" // segment_base_offset + segment_data_size can overflow")
265
+ print()
266
+ seg_base = 0xFFFFFFFFFFFFFF00
267
+ seg_data_size = 0x200
268
+ wrapped_seg = (seg_base + seg_data_size) & UINT64_MAX
269
+ print(f" segment_base_offset = 0x{seg_base:016X}")
270
+ print(f" segment_data_size = 0x{seg_data_size:016X}")
271
+ print(f" Sum (uint64 wrapped) = 0x{wrapped_seg:016X} = {wrapped_seg}")
272
+ print(f" >>> Overflows to small value, subsequent offset checks use wrong base")
273
+ print()
274
+
275
+ # program.cpp:504 — segment_base_offset_ + segment->offset()
276
+ print(" program.cpp:504:")
277
+ print(' const void* segment_data = static_cast<const uint8_t*>(segment_data_.data)')
278
+ print(' + segment_base_offset_ + segment->offset();')
279
+ print()
280
+ seg_base2 = 0x8000000000000000
281
+ seg_offset = 0x8000000000000001
282
+ wrapped_ptr = (seg_base2 + seg_offset) & UINT64_MAX
283
+ print(f" segment_base_offset_ = 0x{seg_base2:016X}")
284
+ print(f" segment->offset() = 0x{seg_offset:016X}")
285
+ print(f" Sum (uint64 wrapped) = 0x{wrapped_ptr:016X} = {wrapped_ptr}")
286
+ print(f" >>> Pointer arithmetic wraps, points to attacker-controlled offset")
287
+ print()
288
+
289
+ # -------------------------------------------------------------------------
290
+ # Summary
291
+ # -------------------------------------------------------------------------
292
+ print("=" * 78)
293
+ print("SUMMARY")
294
+ print("=" * 78)
295
+ print()
296
+ print(" 3 of 4 DataLoader implementations use `offset + size > file_size_`")
297
+ print(" which is vulnerable to uint64_t overflow. The 4th (BufferDataLoader)")
298
+ print(" correctly uses c10::add_overflows() to detect wraparound.")
299
+ print()
300
+ print(" Attack vector: Malicious .pte model file with crafted segment offsets")
301
+ print(" and sizes that cause the bounds check to pass via integer overflow,")
302
+ print(" leading to out-of-bounds memory access.")
303
+ print()
304
+ print(" Fix: Use c10::add_overflows() in all DataLoader implementations,")
305
+ print(" matching the pattern already used in BufferDataLoader.")
306
+ print()
307
+
308
+ # Return non-zero if any vulnerability was demonstrated
309
+ vuln_count = sum(1 for r in [unsafe, unsafe2, unsafe3]
310
+ if r["check_passes"] and not r["is_actually_valid"])
311
+ print(f" Vulnerabilities demonstrated: {vuln_count}/3 overflow scenarios bypass check")
312
+ return 0 if vuln_count == 0 else 1
313
+
314
+
315
+ if __name__ == "__main__":
316
+ sys.exit(main())