#!/usr/bin/env python3
"""
ExecuTorch DataLoader offset+size Integer Overflow (CWE-190 -> CWE-125)
=======================================================================

Target: ExecuTorch (pytorch/executorch)
Commit: 90e6e4ca4ef369ce4288ffcd2a0210d5137117dd

Affected Files:
  - runtime/executor/mmap_data_loader.cpp:163
    https://github.com/pytorch/executorch/blob/90e6e4ca4ef369ce4288ffcd2a0210d5137117dd/runtime/executor/mmap_data_loader.cpp#L163
  - runtime/executor/file_data_loader.cpp:150
    https://github.com/pytorch/executorch/blob/90e6e4ca4ef369ce4288ffcd2a0210d5137117dd/runtime/executor/file_data_loader.cpp#L150
  - runtime/executor/file_descriptor_data_loader.cpp:161
    https://github.com/pytorch/executorch/blob/90e6e4ca4ef369ce4288ffcd2a0210d5137117dd/runtime/executor/file_descriptor_data_loader.cpp#L161

Safe Reference (uses overflow check):
  - extension/data_loader/buffer_data_loader.h:38-41
    https://github.com/pytorch/executorch/blob/90e6e4ca4ef369ce4288ffcd2a0210d5137117dd/extension/data_loader/buffer_data_loader.h#L38-L41

Additionally affected — segment offset calculations:
  - runtime/executor/program.cpp:96   (segment_base_offset + segment_data_size)
  - runtime/executor/program.cpp:504  (segment_base_offset_ + segment->offset())
  - runtime/executor/program.cpp:589  (segment_base_offset_ + segment->offset() + segment_info->segment_index())

CWE-190: Integer Overflow or Wraparound
CWE-125: Out-of-bounds Read

Description:
  3 of 4 DataLoader implementations in ExecuTorch check `offset + size <= file_size_`
  to validate that a Load request stays within bounds. However, when offset and size
  are both attacker-controlled 64-bit values from a malicious .pte file, their sum
  can overflow past UINT64_MAX and wrap around to a small value, bypassing the check.

  BufferDataLoader is the ONLY implementation that correctly uses c10::add_overflows()
  to detect the wraparound before performing the comparison.

  A malicious .pte model file controls these values through the FlatBuffer schema:
  - DataSegment.offset and DataSegment.size in the Program flatbuffer
  - These flow directly into DataLoader::load(offset, size) calls

Impact:
  An attacker crafting a malicious .pte file can cause out-of-bounds memory reads
  (and potentially writes via mmap) by overflowing the offset+size bounds check.
  This can lead to information disclosure or code execution.
"""

import struct
import sys

UINT64_MAX = (1 << 64) - 1

def simulate_unsafe_check(offset: int, size: int, file_size: int) -> dict:
    """
    Simulates the UNSAFE bounds check used in 3 of 4 DataLoaders:

        if (offset + size > file_size_) {
            return Error::InvalidArgument;
        }

    This is vulnerable because offset + size can overflow uint64_t.
    """
    # C++ uint64_t addition wraps modulo 2^64
    wrapped_sum = (offset + size) & UINT64_MAX
    check_passes = wrapped_sum <= file_size
    return {
        "offset": offset,
        "size": size,
        "file_size": file_size,
        "offset_plus_size_wrapped": wrapped_sum,
        "offset_plus_size_true": offset + size,
        "check_passes": check_passes,
        "is_actually_valid": (offset + size) <= file_size,  # Python arbitrary precision
    }


def simulate_safe_check(offset: int, size: int, file_size: int) -> dict:
    """
    Simulates the SAFE bounds check used in BufferDataLoader:

        size_t total;
        if (c10::add_overflows(offset, size, &total) || total > file_size_) {
            return Error::InvalidArgument;
        }

    c10::add_overflows() detects the wraparound and rejects it.
    """
    true_sum = offset + size
    overflows = true_sum > UINT64_MAX
    if overflows:
        check_passes = False  # add_overflows catches it
    else:
        check_passes = true_sum <= file_size
    return {
        "offset": offset,
        "size": size,
        "file_size": file_size,
        "overflow_detected": overflows,
        "check_passes": check_passes,
        "is_actually_valid": true_sum <= file_size,
    }


def print_result(label: str, result: dict, safe: bool = False):
    status = "PASS (allows load)" if result["check_passes"] else "FAIL (rejects load)"
    valid = "YES" if result["is_actually_valid"] else "NO"
    print(f"  [{label}]")
    print(f"    offset     = 0x{result['offset']:016X} ({result['offset']})")
    print(f"    size       = 0x{result['size']:016X} ({result['size']})")
    print(f"    file_size  = 0x{result['file_size']:016X} ({result['file_size']})")
    if not safe:
        print(f"    offset+size (uint64 wrapped) = 0x{result['offset_plus_size_wrapped']:016X} ({result['offset_plus_size_wrapped']})")
        print(f"    offset+size (true)           = 0x{result['offset_plus_size_true']:X}")
    else:
        print(f"    overflow_detected = {result['overflow_detected']}")
    print(f"    Bounds check: {status}")
    print(f"    Actually within file? {valid}")
    if result["check_passes"] and not result["is_actually_valid"]:
        print(f"    >>> VULNERABILITY: check passes but access is OUT OF BOUNDS <<<")
    print()


def main():
    print("=" * 78)
    print("ExecuTorch DataLoader offset+size Integer Overflow PoC")
    print("CWE-190 (Integer Overflow) -> CWE-125 (Out-of-bounds Read)")
    print("=" * 78)
    print()

    # -------------------------------------------------------------------------
    # Scenario 1: Classic overflow — large offset + small size wraps to small value
    # -------------------------------------------------------------------------
    print("-" * 78)
    print("SCENARIO 1: Classic overflow (large offset + small size)")
    print("-" * 78)
    print()
    print("  Attacker sets offset=0xFFFFFFFFFFFFFFF5, size=100 in malicious .pte file.")
    print("  Real file is only 1024 bytes.")
    print()

    file_size = 1024
    offset = 0xFFFFFFFFFFFFFFF5  # Very large, near UINT64_MAX
    size = 100

    # Show the math explicitly
    true_sum = offset + size
    wrapped = true_sum & UINT64_MAX
    print(f"  Math:")
    print(f"    0xFFFFFFFFFFFFFFF5 + 100 = 0x{true_sum:X}")
    print(f"    Truncated to uint64:       0x{wrapped:016X} = {wrapped}")
    print(f"    {wrapped} <= {file_size}?  {'YES => check passes!' if wrapped <= file_size else 'NO'}")
    print()

    unsafe = simulate_unsafe_check(offset, size, file_size)
    safe = simulate_safe_check(offset, size, file_size)
    print_result("UNSAFE (mmap/file/fd DataLoader)", unsafe, safe=False)
    print_result("SAFE (BufferDataLoader)", safe, safe=True)

    # -------------------------------------------------------------------------
    # Scenario 2: Offset just barely overflows
    # -------------------------------------------------------------------------
    print("-" * 78)
    print("SCENARIO 2: Minimal overflow — offset = UINT64_MAX, size = 1")
    print("-" * 78)
    print()

    offset2 = UINT64_MAX
    size2 = 1
    file_size2 = 4096

    wrapped2 = (offset2 + size2) & UINT64_MAX
    print(f"  Math:")
    print(f"    0x{offset2:016X} + 1 = 0x{(offset2+size2):X}")
    print(f"    Truncated to uint64: 0x{wrapped2:016X} = {wrapped2}")
    print(f"    {wrapped2} <= {file_size2}?  {'YES => check passes!' if wrapped2 <= file_size2 else 'NO'}")
    print()

    unsafe2 = simulate_unsafe_check(offset2, size2, file_size2)
    safe2 = simulate_safe_check(offset2, size2, file_size2)
    print_result("UNSAFE (mmap/file/fd DataLoader)", unsafe2, safe=False)
    print_result("SAFE (BufferDataLoader)", safe2, safe=True)

    # -------------------------------------------------------------------------
    # Scenario 3: Both offset and size are large
    # -------------------------------------------------------------------------
    print("-" * 78)
    print("SCENARIO 3: Both offset and size large — read 1GB at offset near UINT64_MAX")
    print("-" * 78)
    print()

    size3 = 1 * 1024 * 1024 * 1024  # 1 GB
    offset3 = UINT64_MAX - size3 + 2  # Wraps to exactly 1
    file_size3 = 1024 * 1024  # 1 MB file

    wrapped3 = (offset3 + size3) & UINT64_MAX
    print(f"  offset = UINT64_MAX - 1GB + 2 = 0x{offset3:016X}")
    print(f"  size   = 1 GB = 0x{size3:016X}")
    print(f"  file   = 1 MB = 0x{file_size3:016X}")
    print(f"  Math:")
    print(f"    offset + size = 0x{(offset3 + size3):X}")
    print(f"    Truncated:      0x{wrapped3:016X} = {wrapped3}")
    print(f"    {wrapped3} <= {file_size3}?  {'YES => check passes!' if wrapped3 <= file_size3 else 'NO'}")
    print()

    unsafe3 = simulate_unsafe_check(offset3, size3, file_size3)
    safe3 = simulate_safe_check(offset3, size3, file_size3)
    print_result("UNSAFE (mmap/file/fd DataLoader)", unsafe3, safe=False)
    print_result("SAFE (BufferDataLoader)", safe3, safe=True)

    # -------------------------------------------------------------------------
    # Scenario 4: Legitimate access (no overflow)
    # -------------------------------------------------------------------------
    print("-" * 78)
    print("SCENARIO 4: Legitimate access (sanity check — no overflow)")
    print("-" * 78)
    print()

    offset4 = 256
    size4 = 512
    file_size4 = 1024

    unsafe4 = simulate_unsafe_check(offset4, size4, file_size4)
    safe4 = simulate_safe_check(offset4, size4, file_size4)
    print_result("UNSAFE (mmap/file/fd DataLoader)", unsafe4, safe=False)
    print_result("SAFE (BufferDataLoader)", safe4, safe=True)

    # -------------------------------------------------------------------------
    # Code comparison
    # -------------------------------------------------------------------------
    print("=" * 78)
    print("CODE COMPARISON")
    print("=" * 78)
    print()
    print("VULNERABLE (mmap_data_loader.cpp:163, file_data_loader.cpp:150,")
    print("            file_descriptor_data_loader.cpp:161):")
    print()
    print('    if (offset + size > file_size_) {')
    print('        ET_LOG(Error, "offset %zu + size %zu > file_size_ %zu",')
    print('               offset, size, file_size_);')
    print('        return Error::InvalidArgument;')
    print('    }')
    print()
    print("SAFE (buffer_data_loader.h:38-41):")
    print()
    print('    size_t total;')
    print('    if (c10::add_overflows(offset, size, &total) || total > data_size_) {')
    print('        return Error::InvalidArgument;')
    print('    }')
    print()

    # -------------------------------------------------------------------------
    # Segment offset overflows in program.cpp
    # -------------------------------------------------------------------------
    print("=" * 78)
    print("RELATED: Segment Offset Overflows in program.cpp")
    print("=" * 78)
    print()
    print("The same pattern appears in segment offset calculations:")
    print()

    # program.cpp:96 — segment_base_offset + segment_data_size
    print("  program.cpp:96:")
    print("    size_t segment_base_offset = program_data_size;")
    print("    // segment_base_offset + segment_data_size can overflow")
    print()
    seg_base = 0xFFFFFFFFFFFFFF00
    seg_data_size = 0x200
    wrapped_seg = (seg_base + seg_data_size) & UINT64_MAX
    print(f"    segment_base_offset = 0x{seg_base:016X}")
    print(f"    segment_data_size   = 0x{seg_data_size:016X}")
    print(f"    Sum (uint64 wrapped) = 0x{wrapped_seg:016X} = {wrapped_seg}")
    print(f"    >>> Overflows to small value, subsequent offset checks use wrong base")
    print()

    # program.cpp:504 — segment_base_offset_ + segment->offset()
    print("  program.cpp:504:")
    print('    const void* segment_data = static_cast<const uint8_t*>(segment_data_.data)')
    print('        + segment_base_offset_ + segment->offset();')
    print()
    seg_base2 = 0x8000000000000000
    seg_offset = 0x8000000000000001
    wrapped_ptr = (seg_base2 + seg_offset) & UINT64_MAX
    print(f"    segment_base_offset_ = 0x{seg_base2:016X}")
    print(f"    segment->offset()    = 0x{seg_offset:016X}")
    print(f"    Sum (uint64 wrapped) = 0x{wrapped_ptr:016X} = {wrapped_ptr}")
    print(f"    >>> Pointer arithmetic wraps, points to attacker-controlled offset")
    print()

    # -------------------------------------------------------------------------
    # Summary
    # -------------------------------------------------------------------------
    print("=" * 78)
    print("SUMMARY")
    print("=" * 78)
    print()
    print("  3 of 4 DataLoader implementations use `offset + size > file_size_`")
    print("  which is vulnerable to uint64_t overflow. The 4th (BufferDataLoader)")
    print("  correctly uses c10::add_overflows() to detect wraparound.")
    print()
    print("  Attack vector: Malicious .pte model file with crafted segment offsets")
    print("  and sizes that cause the bounds check to pass via integer overflow,")
    print("  leading to out-of-bounds memory access.")
    print()
    print("  Fix: Use c10::add_overflows() in all DataLoader implementations,")
    print("  matching the pattern already used in BufferDataLoader.")
    print()

    # Return non-zero if any vulnerability was demonstrated
    vuln_count = sum(1 for r in [unsafe, unsafe2, unsafe3]
                     if r["check_passes"] and not r["is_actually_valid"])
    print(f"  Vulnerabilities demonstrated: {vuln_count}/3 overflow scenarios bypass check")
    return 0 if vuln_count == 0 else 1


if __name__ == "__main__":
    sys.exit(main())