| | |
| | """ |
| | PoC: Apache ORC String Length Integer Overflow |
| | =============================================== |
| | |
| | Root cause: ColumnReader.cc:697 |
| | totalLength += static_cast<size_t>(lengths[i]); |
| | // lengths[i] is int64_t from unsigned RLE decoder — NO negative check |
| | |
| | String lengths decoded by unsigned RLE (isSigned=false, line 645) |
| | are stored as int64_t. Values >= 2^63 become negative. static_cast<size_t> |
| | on negative int64_t produces huge size_t → OOM or wrap-to-zero. |
| | """ |
| | import struct |
| |
|
| | def show_overflow(): |
| | print("ORC StringDirectColumnReader::computeSize() Integer Overflow") |
| | print("=" * 60) |
| | print() |
| | print("Vulnerable code (ColumnReader.cc:691-706):") |
| | print(" size_t totalLength = 0;") |
| | print(" for (i = 0; i < numValues; ++i)") |
| | print(" totalLength += static_cast<size_t>(lengths[i]); // NO CHECK!") |
| | print() |
| | print("How negative lengths occur:") |
| | print(" 1. RLE decoder is unsigned (isSigned=false, line 645)") |
| | print(" 2. Output stored in int64_t* (line 715-718)") |
| | print(" 3. uint64 values >= 2^63 become negative int64_t") |
| | print(" 4. static_cast<size_t>(negative) = huge positive") |
| | print() |
| |
|
| | test_cases = [ |
| | ("OOM DoS: single huge length", [2**63]), |
| | ("Wrap to 0: two lengths", [2**64 - 1, 1]), |
| | ("Wrap to 1: crafted pair", [2**64 - 100, 101]), |
| | ] |
| |
|
| | for desc, uint_lengths in test_cases: |
| | print(f"Case: {desc}") |
| | total = 0 |
| | for ul in uint_lengths: |
| | as_int64 = struct.unpack('q', struct.pack('Q', ul % (2**64)))[0] |
| | as_size_t = ul % (2**64) |
| | total = (total + as_size_t) % (2**64) |
| | print(f" RLE uint64={ul:#018x} -> int64={as_int64} -> size_t={as_size_t:#018x}") |
| | print(f" totalLength = {total} ({total:#018x})") |
| | if total == 0: |
| | print(" -> blob.resize(0) -> EMPTY BLOB") |
| | print(" -> ptr += lengths[i] with negative -> WILD POINTER -> OOB read") |
| | elif total < 1000: |
| | print(f" -> blob.resize({total}) -> UNDERSIZED BUFFER") |
| | print(" -> actual data > blob size -> HEAP CORRUPTION") |
| | elif total > 2**40: |
| | print(f" -> blob.resize({total}) -> ALLOCATION FAILURE -> OOM CRASH") |
| | print() |
| |
|
| | print("Additional: Stripe offset overflow (Reader.cc:591)") |
| | print(" stripeFooterStart = offset + index_length + data_length") |
| | print(" All uint64 from protobuf, no overflow check") |
| | print(" -> wraps around -> reads wrong file location") |
| | print() |
| | print("ORC C++ has ZERO safe arithmetic in core parsing code") |
| | print(" Reader.cc: 0 overflow checks in 1830 lines") |
| | print(" ColumnReader.cc: 0 overflow checks in 1793 lines") |
| |
|
| | if __name__ == "__main__": |
| | show_overflow() |
| |
|