File size: 7,016 Bytes
0513a39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
/**
 * Empirical proof: uint32_t integer overflow in llama_hparams::n_embd_s()
 *
 * This test directly instantiates llama_hparams from llama.cpp and calls
 * n_embd_s() with overflow-inducing values, proving the overflow occurs
 * in the actual production code (not a simulation).
 *
 * Build:
 *   cd llama.cpp
 *   g++ -std=c++17 -I src -I include -I ggml/include \
 *       ../gguf_poc/test_hparams_overflow.cpp src/llama-hparams.cpp \
 *       -o ../gguf_poc/test_hparams_overflow
 *
 * Run:
 *   ./test_hparams_overflow
 */

#include "llama-hparams.h"
#include <cstdio>
#include <cstdint>
#include <cstring>

int main() {
    printf("================================================================\n");
    printf("EMPIRICAL PROOF: uint32_t overflow in llama_hparams::n_embd_s()\n");
    printf("================================================================\n\n");

    // --- Test 1: Mamba path (ssm_d_state * ssm_d_inner) ---
    {
        printf("--- Test 1: Mamba path (line 169: return ssm_d_state * ssm_d_inner) ---\n");

        llama_hparams hp = {};
        memset(&hp, 0, sizeof(hp));
        hp.n_layer = 1;

        // Set values that overflow when multiplied as uint32_t
        hp.ssm_d_state = 1073741825;  // (2^30 + 1)
        hp.ssm_d_inner = 4;

        // Ensure we hit the Mamba path (wkv_head_size=0, n_embd_head_kda=0)
        hp.wkv_head_size = 0;
        hp.n_embd_head_kda = 0;

        uint32_t result = hp.n_embd_s();

        // Correct value in 64-bit: 1073741825 * 4 = 4,294,967,300
        uint64_t correct = (uint64_t)hp.ssm_d_state * hp.ssm_d_inner;

        printf("  ssm_d_state      = %u\n", hp.ssm_d_state);
        printf("  ssm_d_inner      = %u\n", hp.ssm_d_inner);
        printf("  n_embd_s() returned: %u\n", result);
        printf("  Correct value:       %llu\n", (unsigned long long)correct);
        printf("  OVERFLOW: %llu -> %u (buffer is %llux too small!)\n",
               (unsigned long long)correct, result,
               (unsigned long long)(correct / result));
        printf("  Status: %s\n\n", result < correct ? "VULNERABLE (overflow confirmed)" : "SAFE");
    }

    // --- Test 2: RWKV path (n_embd * wkv_head_size) ---
    {
        printf("--- Test 2: RWKV6 path (line 158: return n_embd * wkv_head_size) ---\n");

        llama_hparams hp = {};
        memset(&hp, 0, sizeof(hp));
        hp.n_layer = 1;

        hp.n_embd = 65537;
        hp.wkv_head_size = 65537;

        uint32_t result = hp.n_embd_s();
        uint64_t correct = (uint64_t)hp.n_embd * hp.wkv_head_size;

        printf("  n_embd           = %u\n", hp.n_embd);
        printf("  wkv_head_size    = %u\n", hp.wkv_head_size);
        printf("  n_embd_s() returned: %u\n", result);
        printf("  Correct value:       %llu\n", (unsigned long long)correct);
        printf("  OVERFLOW: %llu -> %u (buffer is %llux too small!)\n",
               (unsigned long long)correct, result,
               (unsigned long long)(correct / result));
        printf("  Status: %s\n\n", result < correct ? "VULNERABLE (overflow confirmed)" : "SAFE");
    }

    // --- Test 3: Kimi KDA path (n_embd_head_kda^2 * n_head) ---
    {
        printf("--- Test 3: Kimi KDA path (line 165: return n_embd_head_kda * n_embd_head_kda * n_head()) ---\n");

        llama_hparams hp = {};
        memset(&hp, 0, sizeof(hp));
        hp.n_layer = 1;
        hp.wkv_head_size = 0;

        hp.n_embd_head_kda = 65536;  // 65536^2 = 2^32, wraps to 0
        // n_head() reads n_head_arr[0], must be set
        hp.n_head_arr[0] = 1;

        uint32_t result = hp.n_embd_s();
        uint64_t correct = (uint64_t)hp.n_embd_head_kda * hp.n_embd_head_kda * hp.n_head_arr[0];

        printf("  n_embd_head_kda  = %u\n", hp.n_embd_head_kda);
        printf("  n_head           = %u\n", hp.n_head_arr[0]);
        printf("  n_embd_s() returned: %u\n", result);
        printf("  Correct value:       %llu\n", (unsigned long long)correct);
        if (result == 0) {
            printf("  OVERFLOW: wraps to ZERO! ggml_new_tensor_1d allocates 0 bytes!\n");
        } else {
            printf("  OVERFLOW: %llu -> %u\n", (unsigned long long)correct, result);
        }
        printf("  Status: %s\n\n", result < correct ? "VULNERABLE (overflow confirmed)" : "SAFE");
    }

    // --- Test 4: n_embd_r() Mamba path ---
    {
        printf("--- Test 4: n_embd_r() Mamba path (line 152) ---\n");

        llama_hparams hp = {};
        memset(&hp, 0, sizeof(hp));
        hp.n_layer = 1;
        hp.wkv_head_size = 0;
        hp.n_shortconv_l_cache = 0;
        hp.n_embd_head_kda = 0;

        hp.ssm_d_conv = 4;
        hp.ssm_d_inner = 4;
        hp.ssm_n_group = 1;
        hp.ssm_d_state = 715827883;  // chosen so (d_conv-1)*(d_inner + 2*1*d_state) overflows

        uint32_t result = hp.n_embd_r();
        // (4-1) * (4 + 2*1*715827883) = 3 * (4 + 1431655766) = 3 * 1431655770 = 4294967310
        uint64_t sub = (uint64_t)hp.ssm_d_inner + 2ULL * hp.ssm_n_group * hp.ssm_d_state;
        uint64_t correct = ((uint64_t)hp.ssm_d_conv - 1) * sub;

        printf("  ssm_d_conv   = %u\n", hp.ssm_d_conv);
        printf("  ssm_d_inner  = %u\n", hp.ssm_d_inner);
        printf("  ssm_n_group  = %u\n", hp.ssm_n_group);
        printf("  ssm_d_state  = %u\n", hp.ssm_d_state);
        printf("  n_embd_r() returned: %u\n", result);
        printf("  Correct value:       %llu\n", (unsigned long long)correct);
        printf("  Status: %s\n\n", result < correct ? "VULNERABLE (overflow confirmed)" : "SAFE");
    }

    // --- Test 5: Double overflow at allocation site ---
    {
        printf("--- Test 5: Double overflow simulation (allocation site lines 94-95) ---\n");
        printf("  Code: hparams.n_embd_s() * mem_size\n");
        printf("  Both are uint32_t, multiplication overflows before widening to int64_t\n\n");

        uint32_t n_embd_s_val = 65536;  // legitimate n_embd_s() return value
        uint32_t mem_size = 65537;       // batch/sequence size

        uint32_t result = n_embd_s_val * mem_size;  // uint32_t * uint32_t
        uint64_t correct = (uint64_t)n_embd_s_val * mem_size;

        printf("  n_embd_s()   = %u\n", n_embd_s_val);
        printf("  mem_size     = %u\n", mem_size);
        printf("  uint32 mul   = %u\n", result);
        printf("  Correct mul  = %llu\n", (unsigned long long)correct);
        printf("  Status: %s\n\n", result < correct ? "VULNERABLE (double overflow)" : "SAFE");
    }

    printf("================================================================\n");
    printf("CONCLUSION: All overflow paths in n_embd_s()/n_embd_r() are\n");
    printf("confirmed vulnerable. An attacker crafting GGUF metadata with\n");
    printf("these values causes undersized buffer allocation in\n");
    printf("llama_memory_recurrent (lines 94-95), leading to heap overflow\n");
    printf("when recurrent state data is written during inference.\n");
    printf("================================================================\n");

    return 0;
}