PhysShell Jiunsong commited on
Commit
f9d7e60
·
0 Parent(s):

Duplicate from Jiunsong/SuperGemma4-31b-abliterated-mlx-4bit

Browse files

Co-authored-by: Jiun Song <Jiunsong@users.noreply.huggingface.co>

.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: gemma
3
+ library_name: mlx
4
+ pipeline_tag: text-generation
5
+ base_model: google/gemma-4-31b-it
6
+ base_model_relation: finetune
7
+ language:
8
+ - en
9
+ - ko
10
+ tags:
11
+ - gemma
12
+ - gemma4
13
+ - mlx
14
+ - 4bit
15
+ - uncensored
16
+ - chat
17
+ - coding
18
+ - reasoning
19
+ - korean
20
+ ---
21
+
22
+ # SuperGemma4-31b-abliterated-mlx-4bit
23
+
24
+ If this release helps you, support future drops on [Ko-fi](https://ko-fi.com/jiunsong).
25
+
26
+ **SuperGemma4-31b-abliterated-mlx-4bit** is a heavily upgraded Gemma 4 31B release for people who want a local model that feels fully uncensored, dramatically more useful, and far more fun to run every day.
27
+
28
+ Built on top of [google/gemma-4-31b-it](https://huggingface.co/google/gemma-4-31b-it), this release is aimed at users who care about the things they actually notice:
29
+
30
+ - fewer annoying refusals
31
+ - stronger coding and technical answers
32
+ - sharper planning and practical problem solving
33
+ - smoother local deployment with compact MLX 4-bit weights that feel surprisingly light for a 31B-class model
34
+ - better day-to-day usefulness in both English and Korean
35
+
36
+ ## Why people will want this
37
+
38
+ This model is meant to feel like the base model with the brakes taken off and the weak spots pushed much harder:
39
+
40
+ - fully uncensored, low-friction conversation
41
+ - more confident coding, debugging, and system design help
42
+ - stronger answers on hard reasoning and planning prompts
43
+ - a more practical, builder-friendly personality for real local workflows
44
+ - a local 31B experience that feels leaner, sharper, and more alive than most people expect
45
+
46
+ In plain terms: it is built to feel bolder, freer, more capable, and more satisfying than the stock instruction-tuned release.
47
+
48
+ ## Best use cases
49
+
50
+ - uncensored general chat
51
+ - coding and debugging help
52
+ - architecture and API design
53
+ - browser-task planning
54
+ - bilingual English/Korean workflows
55
+
56
+ ## What makes it feel better in practice
57
+
58
+ - stronger practical coding help instead of generic filler
59
+ - more direct answers when you want the model to stop hedging
60
+ - better performance on planning-heavy prompts and task-oriented chats
61
+ - a compact local package that is easy to run on Apple Silicon with MLX
62
+
63
+ ## Included clean-output helper
64
+
65
+ For app integrations, JSON-heavy tasks, exact-output prompts, or loop-sensitive workloads, use the included helper scripts by default.
66
+
67
+ It helps with:
68
+
69
+ - keeping JSON-only requests as raw JSON
70
+ - stripping stray internal markers if they ever appear
71
+ - making app-facing answers cleaner for structured use
72
+ - stopping exact-reply and fixed-line prompts from drifting
73
+ - tightening loop-prone prompts such as "exactly ACK" or "12 unique lines"
74
+
75
+ ## Quick start
76
+
77
+ ```python
78
+ from mlx_lm import load, generate
79
+
80
+ model, tokenizer = load(".")
81
+ prompt = tokenizer.apply_chat_template(
82
+ [{"role": "user", "content": "Explain vector databases in plain English."}],
83
+ tokenize=False,
84
+ add_generation_prompt=True,
85
+ )
86
+ print(generate(model, tokenizer, prompt=prompt, max_tokens=256, verbose=False))
87
+ ```
88
+
89
+ ## Guarded generation
90
+
91
+ The repository includes:
92
+
93
+ - `supergemma_guard.py`
94
+ - `supergemma_guarded_generate.py`
95
+
96
+ This is the recommended path for:
97
+
98
+ - exact-output prompts
99
+ - JSON-only app endpoints
100
+ - tool-followup style answers
101
+ - loop-sensitive or boundary-sensitive workloads
102
+
103
+ Example:
104
+
105
+ ```bash
106
+ python supergemma_guarded_generate.py \
107
+ --model . \
108
+ --guard-profile supergemma_v4 \
109
+ --prompt 'Return only valid JSON with keys "name" and "reason".'
110
+ ```
111
+
112
+ ## Notes
113
+
114
+ - Chat template is aligned to the latest Gemma 4 31B IT template used in this project.
115
+ - This release is intended for local inference and downstream app building.
116
+ - If you want GGUF files for llama.cpp and similar runtimes, use the sibling GGUF release.
117
+
118
+ ## Support
119
+
120
+ If you want to support more uncensored local model releases, benchmarks, and packaging work:
121
+
122
+ - [Ko-fi](https://ko-fi.com/jiunsong)
chat_template.jinja ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- macro format_parameters(properties, required) -%}
2
+ {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
3
+ {%- set ns = namespace(found_first=false) -%}
4
+ {%- for key, value in properties | dictsort -%}
5
+ {%- set add_comma = false -%}
6
+ {%- if key not in standard_keys -%}
7
+ {%- if ns.found_first %},{% endif -%}
8
+ {%- set ns.found_first = true -%}
9
+ {{ key }}:{
10
+ {%- if value['description'] -%}
11
+ description:<|"|>{{ value['description'] }}<|"|>
12
+ {%- set add_comma = true -%}
13
+ {%- endif -%}
14
+ {%- if value['type'] | upper == 'STRING' -%}
15
+ {%- if value['enum'] -%}
16
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
17
+ enum:{{ format_argument(value['enum']) }}
18
+ {%- endif -%}
19
+ {%- elif value['type'] | upper == 'ARRAY' -%}
20
+ {%- if value['items'] is mapping and value['items'] -%}
21
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
22
+ items:{
23
+ {%- set ns_items = namespace(found_first=false) -%}
24
+ {%- for item_key, item_value in value['items'] | dictsort -%}
25
+ {%- if item_value is not none -%}
26
+ {%- if ns_items.found_first %},{% endif -%}
27
+ {%- set ns_items.found_first = true -%}
28
+ {%- if item_key == 'properties' -%}
29
+ properties:{
30
+ {%- if item_value is mapping -%}
31
+ {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
32
+ {%- endif -%}
33
+ }
34
+ {%- elif item_key == 'required' -%}
35
+ required:[
36
+ {%- for req_item in item_value -%}
37
+ <|"|>{{- req_item -}}<|"|>
38
+ {%- if not loop.last %},{% endif -%}
39
+ {%- endfor -%}
40
+ ]
41
+ {%- elif item_key == 'type' -%}
42
+ {%- if item_value is string -%}
43
+ type:{{ format_argument(item_value | upper) }}
44
+ {%- else -%}
45
+ type:{{ format_argument(item_value | map('upper') | list) }}
46
+ {%- endif -%}
47
+ {%- else -%}
48
+ {{ item_key }}:{{ format_argument(item_value) }}
49
+ {%- endif -%}
50
+ {%- endif -%}
51
+ {%- endfor -%}
52
+ }
53
+ {%- endif -%}
54
+ {%- endif -%}
55
+ {%- if value['nullable'] %}
56
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
57
+ nullable:true
58
+ {%- endif -%}
59
+ {%- if value['type'] | upper == 'OBJECT' -%}
60
+ {%- if value['properties'] is defined and value['properties'] is mapping -%}
61
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
62
+ properties:{
63
+ {{- format_parameters(value['properties'], value['required'] | default([])) -}}
64
+ }
65
+ {%- elif value is mapping -%}
66
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
67
+ properties:{
68
+ {{- format_parameters(value, value['required'] | default([])) -}}
69
+ }
70
+ {%- endif -%}
71
+ {%- if value['required'] -%}
72
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
73
+ required:[
74
+ {%- for item in value['required'] | default([]) -%}
75
+ <|"|>{{- item -}}<|"|>
76
+ {%- if not loop.last %},{% endif -%}
77
+ {%- endfor -%}
78
+ ]
79
+ {%- endif -%}
80
+ {%- endif -%}
81
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
82
+ type:<|"|>{{ value['type'] | upper }}<|"|>}
83
+ {%- endif -%}
84
+ {%- endfor -%}
85
+ {%- endmacro -%}
86
+ {%- macro format_function_declaration(tool_data) -%}
87
+ declaration:{{- tool_data['function']['name'] -}}{description:<|"|>{{- tool_data['function']['description'] -}}<|"|>
88
+ {%- set params = tool_data['function']['parameters'] -%}
89
+ {%- if params -%}
90
+ ,parameters:{
91
+ {%- if params['properties'] -%}
92
+ properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
93
+ {%- endif -%}
94
+ {%- if params['required'] -%}
95
+ required:[
96
+ {%- for item in params['required'] -%}
97
+ <|"|>{{- item -}}<|"|>
98
+ {{- ',' if not loop.last -}}
99
+ {%- endfor -%}
100
+ ],
101
+ {%- endif -%}
102
+ {%- if params['type'] -%}
103
+ type:<|"|>{{- params['type'] | upper -}}<|"|>}
104
+ {%- endif -%}
105
+ {%- endif -%}
106
+ {%- if 'response' in tool_data['function'] -%}
107
+ {%- set response_declaration = tool_data['function']['response'] -%}
108
+ ,response:{
109
+ {%- if response_declaration['description'] -%}
110
+ description:<|"|>{{- response_declaration['description'] -}}<|"|>,
111
+ {%- endif -%}
112
+ {%- if response_declaration['type'] | upper == 'OBJECT' -%}
113
+ type:<|"|>{{- response_declaration['type'] | upper -}}<|"|>}
114
+ {%- endif -%}
115
+ {%- endif -%}
116
+ }
117
+ {%- endmacro -%}
118
+ {%- macro format_argument(argument, escape_keys=True) -%}
119
+ {%- if argument is string -%}
120
+ {{- '<|"|>' + argument + '<|"|>' -}}
121
+ {%- elif argument is boolean -%}
122
+ {{- 'true' if argument else 'false' -}}
123
+ {%- elif argument is mapping -%}
124
+ {{- '{' -}}
125
+ {%- set ns = namespace(found_first=false) -%}
126
+ {%- for key, value in argument | dictsort -%}
127
+ {%- if ns.found_first %},{% endif -%}
128
+ {%- set ns.found_first = true -%}
129
+ {%- if escape_keys -%}
130
+ {{- '<|"|>' + key + '<|"|>' -}}
131
+ {%- else -%}
132
+ {{- key -}}
133
+ {%- endif -%}
134
+ :{{- format_argument(value, escape_keys=escape_keys) -}}
135
+ {%- endfor -%}
136
+ {{- '}' -}}
137
+ {%- elif argument is sequence -%}
138
+ {{- '[' -}}
139
+ {%- for item in argument -%}
140
+ {{- format_argument(item, escape_keys=escape_keys) -}}
141
+ {%- if not loop.last %},{% endif -%}
142
+ {%- endfor -%}
143
+ {{- ']' -}}
144
+ {%- else -%}
145
+ {{- argument -}}
146
+ {%- endif -%}
147
+ {%- endmacro -%}
148
+ {%- macro strip_thinking(text) -%}
149
+ {%- set ns = namespace(result='') -%}
150
+ {%- for part in text.split('<channel|>') -%}
151
+ {%- if '<|channel>' in part -%}
152
+ {%- set ns.result = ns.result + part.split('<|channel>')[0] -%}
153
+ {%- else -%}
154
+ {%- set ns.result = ns.result + part -%}
155
+ {%- endif -%}
156
+ {%- endfor -%}
157
+ {{- ns.result | trim -}}
158
+ {%- endmacro -%}
159
+
160
+ {%- macro format_tool_response_block(tool_name, response) -%}
161
+ {{- '<|tool_response>' -}}
162
+ {%- if response is mapping -%}
163
+ {{- 'response:' + tool_name + '{' -}}
164
+ {%- for key, value in response | dictsort -%}
165
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
166
+ {%- if not loop.last %},{% endif -%}
167
+ {%- endfor -%}
168
+ {{- '}' -}}
169
+ {%- else -%}
170
+ {{- 'response:' + tool_name + '{value:' + format_argument(response, escape_keys=False) + '}' -}}
171
+ {%- endif -%}
172
+ {{- '<tool_response|>' -}}
173
+ {%- endmacro -%}
174
+
175
+ {%- set ns = namespace(prev_message_type=None) -%}
176
+ {%- set loop_messages = messages -%}
177
+ {{- bos_token -}}
178
+ {#- Handle System/Tool Definitions Block -#}
179
+ {%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
180
+ {{- '<|turn>system\n' -}}
181
+
182
+ {#- Inject Thinking token at the very top of the FIRST system turn -#}
183
+ {%- if enable_thinking is defined and enable_thinking -%}
184
+ {{- '<|think|>\n' -}}
185
+ {%- set ns.prev_message_type = 'think' -%}
186
+ {%- endif -%}
187
+
188
+ {%- if messages[0]['role'] in ['system', 'developer'] -%}
189
+ {{- messages[0]['content'] | trim -}}
190
+ {%- set loop_messages = messages[1:] -%}
191
+ {%- endif -%}
192
+
193
+ {%- if tools -%}
194
+ {%- for tool in tools %}
195
+ {{- '<|tool>' -}}
196
+ {{- format_function_declaration(tool) | trim -}}
197
+ {{- '<tool|>' -}}
198
+ {%- endfor %}
199
+ {%- set ns.prev_message_type = 'tool' -%}
200
+ {%- endif -%}
201
+
202
+ {{- '<turn|>\n' -}}
203
+ {%- endif %}
204
+
205
+ {#- Pre-scan: find last user message index for reasoning guard -#}
206
+ {%- set ns_turn = namespace(last_user_idx=-1) -%}
207
+ {%- for i in range(loop_messages | length) -%}
208
+ {%- if loop_messages[i]['role'] == 'user' -%}
209
+ {%- set ns_turn.last_user_idx = i -%}
210
+ {%- endif -%}
211
+ {%- endfor -%}
212
+
213
+ {#- Loop through messages -#}
214
+ {%- for message in loop_messages -%}
215
+ {%- if message['role'] != 'tool' -%}
216
+ {%- set ns.prev_message_type = None -%}
217
+ {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
218
+ {#- Detect continuation: suppress duplicate <|turn>model when previous non-tool message was also assistant -#}
219
+ {%- set prev_nt = namespace(role=None, found=false) -%}
220
+ {%- if loop.index0 > 0 -%}
221
+ {%- for j in range(loop.index0 - 1, -1, -1) -%}
222
+ {%- if not prev_nt.found -%}
223
+ {%- if loop_messages[j]['role'] != 'tool' -%}
224
+ {%- set prev_nt.role = loop_messages[j]['role'] -%}
225
+ {%- set prev_nt.found = true -%}
226
+ {%- endif -%}
227
+ {%- endif -%}
228
+ {%- endfor -%}
229
+ {%- endif -%}
230
+ {%- set continue_same_model_turn = (role == 'model' and prev_nt.role == 'assistant') -%}
231
+ {%- if not continue_same_model_turn -%}
232
+ {{- '<|turn>' + role + '\n' }}
233
+ {%- endif -%}
234
+
235
+ {#- Render reasoning/reasoning_content as thinking channel -#}
236
+ {%- set thinking_text = message.get('reasoning') or message.get('reasoning_content') -%}
237
+ {%- if thinking_text and loop.index0 > ns_turn.last_user_idx and message.get('tool_calls') -%}
238
+ {{- '<|channel>thought\n' + thinking_text + '\n<channel|>' -}}
239
+ {%- endif -%}
240
+
241
+ {%- if message['tool_calls'] -%}
242
+ {%- for tool_call in message['tool_calls'] -%}
243
+ {%- set function = tool_call['function'] -%}
244
+ {{- '<|tool_call>call:' + function['name'] + '{' -}}
245
+ {%- if function['arguments'] is mapping -%}
246
+ {%- set ns_args = namespace(found_first=false) -%}
247
+ {%- for key, value in function['arguments'] | dictsort -%}
248
+ {%- if ns_args.found_first %},{% endif -%}
249
+ {%- set ns_args.found_first = true -%}
250
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
251
+ {%- endfor -%}
252
+ {%- elif function['arguments'] is string -%}
253
+ {{- function['arguments'] -}}
254
+ {%- endif -%}
255
+ {{- '}<tool_call|>' -}}
256
+ {%- endfor -%}
257
+ {%- set ns.prev_message_type = 'tool_call' -%}
258
+ {%- endif -%}
259
+
260
+ {%- set ns_tr_out = namespace(flag=false) -%}
261
+ {%- if message.get('tool_responses') -%}
262
+ {#- Legacy: tool_responses embedded on the assistant message (Google/Gemma native) -#}
263
+ {%- for tool_response in message['tool_responses'] -%}
264
+ {{- format_tool_response_block(tool_response['name'] | default('unknown'), tool_response['response']) -}}
265
+ {%- set ns_tr_out.flag = true -%}
266
+ {%- set ns.prev_message_type = 'tool_response' -%}
267
+ {%- endfor -%}
268
+ {%- elif message.get('tool_calls') -%}
269
+ {#- OpenAI Chat Completions: forward-scan consecutive role:tool messages -#}
270
+ {%- set ns_tool_scan = namespace(stopped=false) -%}
271
+ {%- for k in range(loop.index0 + 1, loop_messages | length) -%}
272
+ {%- if ns_tool_scan.stopped -%}
273
+ {%- elif loop_messages[k]['role'] != 'tool' -%}
274
+ {%- set ns_tool_scan.stopped = true -%}
275
+ {%- else -%}
276
+ {%- set follow = loop_messages[k] -%}
277
+ {#- Resolve tool_call_id to function name -#}
278
+ {%- set ns_tname = namespace(name=follow.get('name') | default('unknown')) -%}
279
+ {%- for tc in message['tool_calls'] -%}
280
+ {%- if tc.get('id') == follow.get('tool_call_id') -%}
281
+ {%- set ns_tname.name = tc['function']['name'] -%}
282
+ {%- endif -%}
283
+ {%- endfor -%}
284
+ {#- Handle content as string or content-parts array -#}
285
+ {%- set tool_body = follow.get('content') -%}
286
+ {%- if tool_body is string -%}
287
+ {{- format_tool_response_block(ns_tname.name, tool_body) -}}
288
+ {%- elif tool_body is sequence and tool_body is not string -%}
289
+ {%- set ns_txt = namespace(s='') -%}
290
+ {%- for part in tool_body -%}
291
+ {%- if part.get('type') == 'text' -%}
292
+ {%- set ns_txt.s = ns_txt.s + (part.get('text') | default('')) -%}
293
+ {%- endif -%}
294
+ {%- endfor -%}
295
+ {{- format_tool_response_block(ns_tname.name, ns_txt.s) -}}
296
+ {%- else -%}
297
+ {{- format_tool_response_block(ns_tname.name, tool_body) -}}
298
+ {%- endif -%}
299
+ {%- set ns_tr_out.flag = true -%}
300
+ {%- set ns.prev_message_type = 'tool_response' -%}
301
+ {%- endif -%}
302
+ {%- endfor -%}
303
+ {%- endif -%}
304
+
305
+ {%- if message['content'] is string -%}
306
+ {%- if role == 'model' -%}
307
+ {{- strip_thinking(message['content']) -}}
308
+ {%- else -%}
309
+ {{- message['content'] | trim -}}
310
+ {%- endif -%}
311
+ {%- elif message['content'] is sequence -%}
312
+ {%- for item in message['content'] -%}
313
+ {%- if item['type'] == 'text' -%}
314
+ {%- if role == 'model' -%}
315
+ {{- strip_thinking(item['text']) -}}
316
+ {%- else -%}
317
+ {{- item['text'] | trim -}}
318
+ {%- endif -%}
319
+ {%- elif item['type'] == 'image' -%}
320
+ {{- '<|image|>' -}}
321
+ {%- set ns.prev_message_type = 'image' -%}
322
+ {%- elif item['type'] == 'audio' -%}
323
+ {{- '<|audio|>' -}}
324
+ {%- set ns.prev_message_type = 'audio' -%}
325
+ {%- elif item['type'] == 'video' -%}
326
+ {{- '<|video|>' -}}
327
+ {%- set ns.prev_message_type = 'video' -%}
328
+ {%- endif -%}
329
+ {%- endfor -%}
330
+ {%- endif -%}
331
+
332
+ {%- if ns.prev_message_type == 'tool_call' and not ns_tr_out.flag -%}
333
+ {{- '<|tool_response>' -}}
334
+ {%- elif not (ns_tr_out.flag and not message.get('content')) -%}
335
+ {{- '<turn|>\n' -}}
336
+ {%- endif -%}
337
+ {%- endif -%}
338
+ {%- endfor -%}
339
+
340
+ {%- if add_generation_prompt -%}
341
+ {%- if ns.prev_message_type != 'tool_response' and ns.prev_message_type != 'tool_call' -%}
342
+ {{- '<|turn>model\n' -}}
343
+ {%- if not enable_thinking | default(false) -%}
344
+ {{- '<|channel>thought\n<channel|>' -}}
345
+ {%- endif -%}
346
+ {%- endif -%}
347
+ {%- endif -%}
config.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Gemma4ForConditionalGeneration"
4
+ ],
5
+ "audio_config": null,
6
+ "audio_token_id": 258881,
7
+ "boa_token_id": 256000,
8
+ "boi_token_id": 255999,
9
+ "dtype": "bfloat16",
10
+ "eoa_token_id": 258883,
11
+ "eoa_token_index": 258883,
12
+ "eoi_token_id": 258882,
13
+ "eos_token_id": [
14
+ 1,
15
+ 106,
16
+ 50
17
+ ],
18
+ "image_token_id": 258880,
19
+ "initializer_range": 0.02,
20
+ "model_type": "gemma4",
21
+ "quantization": {
22
+ "group_size": 64,
23
+ "bits": 4,
24
+ "mode": "affine"
25
+ },
26
+ "quantization_config": {
27
+ "group_size": 64,
28
+ "bits": 4,
29
+ "mode": "affine"
30
+ },
31
+ "text_config": {
32
+ "attention_bias": false,
33
+ "attention_dropout": 0.0,
34
+ "attention_k_eq_v": true,
35
+ "bos_token_id": 2,
36
+ "dtype": "bfloat16",
37
+ "enable_moe_block": false,
38
+ "eos_token_id": 1,
39
+ "expert_intermediate_size": null,
40
+ "final_logit_softcapping": 30.0,
41
+ "global_head_dim": 512,
42
+ "head_dim": 256,
43
+ "hidden_activation": "gelu_pytorch_tanh",
44
+ "hidden_size": 5376,
45
+ "hidden_size_per_layer_input": 0,
46
+ "initializer_range": 0.02,
47
+ "intermediate_size": 21504,
48
+ "layer_types": [
49
+ "sliding_attention",
50
+ "sliding_attention",
51
+ "sliding_attention",
52
+ "sliding_attention",
53
+ "sliding_attention",
54
+ "full_attention",
55
+ "sliding_attention",
56
+ "sliding_attention",
57
+ "sliding_attention",
58
+ "sliding_attention",
59
+ "sliding_attention",
60
+ "full_attention",
61
+ "sliding_attention",
62
+ "sliding_attention",
63
+ "sliding_attention",
64
+ "sliding_attention",
65
+ "sliding_attention",
66
+ "full_attention",
67
+ "sliding_attention",
68
+ "sliding_attention",
69
+ "sliding_attention",
70
+ "sliding_attention",
71
+ "sliding_attention",
72
+ "full_attention",
73
+ "sliding_attention",
74
+ "sliding_attention",
75
+ "sliding_attention",
76
+ "sliding_attention",
77
+ "sliding_attention",
78
+ "full_attention",
79
+ "sliding_attention",
80
+ "sliding_attention",
81
+ "sliding_attention",
82
+ "sliding_attention",
83
+ "sliding_attention",
84
+ "full_attention",
85
+ "sliding_attention",
86
+ "sliding_attention",
87
+ "sliding_attention",
88
+ "sliding_attention",
89
+ "sliding_attention",
90
+ "full_attention",
91
+ "sliding_attention",
92
+ "sliding_attention",
93
+ "sliding_attention",
94
+ "sliding_attention",
95
+ "sliding_attention",
96
+ "full_attention",
97
+ "sliding_attention",
98
+ "sliding_attention",
99
+ "sliding_attention",
100
+ "sliding_attention",
101
+ "sliding_attention",
102
+ "full_attention",
103
+ "sliding_attention",
104
+ "sliding_attention",
105
+ "sliding_attention",
106
+ "sliding_attention",
107
+ "sliding_attention",
108
+ "full_attention"
109
+ ],
110
+ "max_position_embeddings": 262144,
111
+ "model_type": "gemma4_text",
112
+ "num_attention_heads": 32,
113
+ "num_experts": null,
114
+ "num_global_key_value_heads": 4,
115
+ "num_hidden_layers": 60,
116
+ "num_key_value_heads": 16,
117
+ "num_kv_shared_layers": 0,
118
+ "pad_token_id": 0,
119
+ "rms_norm_eps": 1e-06,
120
+ "rope_parameters": {
121
+ "full_attention": {
122
+ "partial_rotary_factor": 0.25,
123
+ "rope_theta": 1000000.0,
124
+ "rope_type": "proportional"
125
+ },
126
+ "sliding_attention": {
127
+ "rope_theta": 10000.0,
128
+ "rope_type": "default"
129
+ }
130
+ },
131
+ "sliding_window": 1024,
132
+ "tie_word_embeddings": true,
133
+ "top_k_experts": null,
134
+ "use_bidirectional_attention": "vision",
135
+ "use_cache": true,
136
+ "use_double_wide_mlp": false,
137
+ "vocab_size": 262144,
138
+ "vocab_size_per_layer_input": 262144
139
+ },
140
+ "tie_word_embeddings": true,
141
+ "transformers_version": "5.5.0.dev0",
142
+ "video_token_id": 258884,
143
+ "vision_soft_tokens_per_image": 280
144
+ }
generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 2,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 1,
6
+ 106,
7
+ 50
8
+ ],
9
+ "pad_token_id": 0,
10
+ "temperature": 1.0,
11
+ "top_k": 64,
12
+ "top_p": 0.95,
13
+ "transformers_version": "5.5.0.dev0"
14
+ }
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c987e5c55213b209f1705bb3e208ed80b56de311c076566c0d3410fa7b07cf49
3
+ size 5366617512
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f9e9f9dfc4ea5bc03f08932389d507c9a87a3f8fcdc340f6e80d35aacabfd31
3
+ size 5361642573
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18764b1c52f872a434e5f160226b3b47ae08f6742ca4e5c104f32da65b8ba5dd
3
+ size 5367276094
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e77c0a290b066e1e4c1098de0a2728c20b9a9d0a071739638eca6c5d7f75f5ad
3
+ size 1173848301
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
supergemma_guard.py ADDED
@@ -0,0 +1,438 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import re
6
+ from typing import Iterable
7
+
8
+ from mlx_lm.sample_utils import make_logits_processors
9
+
10
+
11
+ CHANNEL_TOKENS = {
12
+ 100: -100.0, # <|channel>
13
+ 101: -100.0, # <channel|>
14
+ 98: -100.0, # <|think|>
15
+ }
16
+
17
+ JSON_FENCE_TOKENS = {
18
+ 2717: -100.0, # ```
19
+ }
20
+
21
+ WORD_NUMBERS = {
22
+ "one": 1,
23
+ "two": 2,
24
+ "three": 3,
25
+ "four": 4,
26
+ "five": 5,
27
+ "six": 6,
28
+ "seven": 7,
29
+ "eight": 8,
30
+ "nine": 9,
31
+ "ten": 10,
32
+ "eleven": 11,
33
+ "twelve": 12,
34
+ }
35
+
36
+
37
+ def flatten_messages_text(messages: Iterable[dict]) -> str:
38
+ chunks: list[str] = []
39
+ for message in messages:
40
+ content = message.get("content", "")
41
+ if isinstance(content, str):
42
+ chunks.append(content)
43
+ elif isinstance(content, list):
44
+ for item in content:
45
+ if isinstance(item, dict) and item.get("type") == "text":
46
+ chunks.append(str(item.get("text", "")))
47
+ return "\n".join(chunk for chunk in chunks if chunk)
48
+
49
+
50
+ def looks_like_json_only_request(messages: Iterable[dict]) -> bool:
51
+ text = flatten_messages_text(messages).lower()
52
+ markers = [
53
+ "json only",
54
+ "valid json",
55
+ "json object",
56
+ "no markdown",
57
+ "without markdown fences",
58
+ "raw json",
59
+ "return only json",
60
+ "respond with json only",
61
+ "do not wrap it in ```json```",
62
+ ]
63
+ return any(marker in text for marker in markers)
64
+
65
+
66
+ def extract_exact_reply_target(messages: Iterable[dict]) -> str | None:
67
+ text = flatten_messages_text(messages)
68
+ patterns = [
69
+ r"reply with exactly\s+[\"'`]?([^\"'`\n]+?)[\"'`]?\s+and nothing else",
70
+ r"respond with exactly\s+[\"'`]?([^\"'`\n]+?)[\"'`]?\s+and nothing else",
71
+ ]
72
+ for pattern in patterns:
73
+ match = re.search(pattern, text, flags=re.I)
74
+ if not match:
75
+ continue
76
+ target = match.group(1).strip()
77
+ if target:
78
+ return target
79
+ return None
80
+
81
+
82
+ def extract_line_limit(messages: Iterable[dict]) -> int | None:
83
+ text = flatten_messages_text(messages)
84
+ patterns = [
85
+ r"stop after\s+(\d+)\s+lines?",
86
+ r"exactly\s+(\d+)\s+numbered lines?",
87
+ r"exactly\s+(\d+)\s+lines?",
88
+ r"list\s+(\d+)\s+unique .*?one per line",
89
+ ]
90
+ for pattern in patterns:
91
+ match = re.search(pattern, text, flags=re.I | re.S)
92
+ if match:
93
+ try:
94
+ limit = int(match.group(1))
95
+ except Exception:
96
+ continue
97
+ if limit > 0:
98
+ return limit
99
+ word_patterns = [
100
+ r"stop after\s+(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+lines?",
101
+ r"exactly\s+(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+numbered lines?",
102
+ r"exactly\s+(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+lines?",
103
+ ]
104
+ for pattern in word_patterns:
105
+ match = re.search(pattern, text, flags=re.I | re.S)
106
+ if match:
107
+ return WORD_NUMBERS.get(match.group(1).lower())
108
+ return None
109
+
110
+
111
+ def wants_unique_lines(messages: Iterable[dict]) -> bool:
112
+ text = flatten_messages_text(messages).lower()
113
+ return "unique" in text and ("one per line" in text or "lines" in text)
114
+
115
+
116
+ def extract_expected_block_lines(messages: Iterable[dict]) -> list[str] | None:
117
+ text = flatten_messages_text(messages)
118
+ match = re.search(
119
+ r"output exactly these\s+(?:\d+|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+lines?(?: and nothing else)?:\s*\n(.+)",
120
+ text,
121
+ flags=re.I | re.S,
122
+ )
123
+ if not match:
124
+ return None
125
+ block = match.group(1).strip()
126
+ lines = [line.strip() for line in block.splitlines() if line.strip()]
127
+ if not lines:
128
+ return None
129
+ limit = extract_line_limit(messages)
130
+ if limit:
131
+ lines = lines[:limit]
132
+ return lines or None
133
+
134
+
135
+ def extract_fact_fields(messages: Iterable[dict]) -> dict[str, str]:
136
+ text = flatten_messages_text(messages)
137
+ fields: dict[str, str] = {}
138
+ for key in ("product", "launch year", "price", "color"):
139
+ match = re.search(rf"(?im)^{re.escape(key)}:\s*(.+?)\s*$", text)
140
+ if match:
141
+ fields[key] = match.group(1).strip()
142
+ return fields
143
+
144
+
145
+ def looks_like_fact_lock_request(messages: Iterable[dict]) -> bool:
146
+ text = flatten_messages_text(messages).lower()
147
+ return "use only the facts below" in text and "do not invent" in text
148
+
149
+
150
+ def strip_internal_markup(text: str) -> str:
151
+ cleaned = text.strip()
152
+ cleaned = cleaned.replace("<|turn>", " ")
153
+ final_answer_match = re.search(r"(?is)final answer:\s*(.+)$", cleaned)
154
+ if final_answer_match:
155
+ cleaned = final_answer_match.group(1).strip()
156
+ cleaned = re.sub(r"\[Start thinking\].*?\[End thinking\]\s*", "", cleaned, flags=re.I | re.S)
157
+ cleaned = re.sub(r"(?im)^\[Start thinking\]\s*$\n?", "", cleaned)
158
+ cleaned = re.sub(r"(?im)^\[End thinking\]\s*$\n?", "", cleaned)
159
+ cleaned = re.sub(r"<\|channel\>\s*thought\s*<channel\|>\s*", "", cleaned, flags=re.I | re.S)
160
+ cleaned = re.sub(r"<\|channel\>.*?<channel\|>\s*", "", cleaned, flags=re.I | re.S)
161
+ cleaned = re.sub(r"<\|think\|>.*?</think>\s*", "", cleaned, flags=re.I | re.S)
162
+ cleaned = cleaned.replace("<|channel>", " ").replace("<channel|>", " ").replace("</channel>", " ")
163
+ cleaned = cleaned.replace("<|think|>", " ").replace("</think>", " ")
164
+ cleaned = re.sub(
165
+ r"(?im)^(set|set_thought|assistant|assistant_response|analysis|final|thought)\s*$\n?",
166
+ "",
167
+ cleaned,
168
+ )
169
+ cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
170
+ return cleaned.strip()
171
+
172
+
173
+ def is_meta_list_line(line: str) -> bool:
174
+ normalized = re.sub(r"^[-*0-9. )]+", "", line).strip().casefold()
175
+ meta_starts = (
176
+ "enumerate",
177
+ "here are",
178
+ "list of",
179
+ "programming languages",
180
+ "languages:",
181
+ "sure,",
182
+ )
183
+ return any(normalized.startswith(prefix) for prefix in meta_starts)
184
+
185
+
186
+ def collapse_duplicate_lines(text: str) -> str:
187
+ lines = [line.rstrip() for line in text.splitlines() if line.strip()]
188
+ if not lines:
189
+ return text.strip()
190
+ collapsed: list[str] = []
191
+ previous_key = None
192
+ for line in lines:
193
+ key = line.strip().casefold()
194
+ if key == previous_key:
195
+ continue
196
+ collapsed.append(line.strip())
197
+ previous_key = key
198
+ return "\n".join(collapsed).strip()
199
+
200
+
201
+ def apply_line_limit(text: str, limit: int, *, unique_only: bool) -> str:
202
+ lines = [line.strip() for line in text.splitlines() if line.strip()]
203
+ if not lines:
204
+ return text.strip()
205
+ if unique_only:
206
+ trimmed: list[str] = []
207
+ seen: set[str] = set()
208
+ for line in lines:
209
+ if is_meta_list_line(line):
210
+ continue
211
+ key = line.casefold()
212
+ if key in seen:
213
+ continue
214
+ seen.add(key)
215
+ trimmed.append(line)
216
+ if len(trimmed) >= limit:
217
+ break
218
+ return "\n".join(trimmed).strip()
219
+ return "\n".join(lines[:limit]).strip()
220
+
221
+
222
+ def extract_json_candidate(text: str) -> str | None:
223
+ candidate = text.strip()
224
+ if not candidate:
225
+ return None
226
+
227
+ for probe in (
228
+ candidate,
229
+ re.sub(r"^```json\s*|\s*```$", "", candidate, flags=re.I | re.S).strip(),
230
+ ):
231
+ try:
232
+ json.loads(probe)
233
+ return probe
234
+ except Exception:
235
+ pass
236
+
237
+ fenced_match = re.search(r"```json\s*(.*?)\s*```", candidate, flags=re.I | re.S)
238
+ if fenced_match:
239
+ fenced = fenced_match.group(1).strip()
240
+ try:
241
+ json.loads(fenced)
242
+ return fenced
243
+ except Exception:
244
+ pass
245
+
246
+ opens = {"{": "}", "[": "]"}
247
+ for start_idx, ch in enumerate(candidate):
248
+ if ch not in opens:
249
+ continue
250
+ stack = [opens[ch]]
251
+ in_string = False
252
+ escaped = False
253
+ for idx in range(start_idx + 1, len(candidate)):
254
+ cur = candidate[idx]
255
+ if in_string:
256
+ if escaped:
257
+ escaped = False
258
+ elif cur == "\\":
259
+ escaped = True
260
+ elif cur == '"':
261
+ in_string = False
262
+ continue
263
+ if cur == '"':
264
+ in_string = True
265
+ continue
266
+ if cur in opens:
267
+ stack.append(opens[cur])
268
+ continue
269
+ if stack and cur == stack[-1]:
270
+ stack.pop()
271
+ if not stack:
272
+ probe = candidate[start_idx : idx + 1].strip()
273
+ try:
274
+ json.loads(probe)
275
+ return probe
276
+ except Exception:
277
+ break
278
+ return None
279
+
280
+
281
+ def apply_guard_messages(profile: str, messages: list[dict]) -> list[dict]:
282
+ if profile in ("", "none", None, "supergemma_v1"):
283
+ return messages
284
+ if profile not in {"supergemma_v2", "supergemma_v3", "supergemma_v4"}:
285
+ raise ValueError(f"unknown guard profile: {profile}")
286
+
287
+ text = flatten_messages_text(messages).lower()
288
+ exact_target = extract_exact_reply_target(messages)
289
+ line_limit = extract_line_limit(messages)
290
+ unique_lines_requested = wants_unique_lines(messages)
291
+ rules: list[str] = [
292
+ "Never emit internal channel tags such as <|channel>thought or <channel|> in the final answer.",
293
+ "Stop immediately once the requested answer is complete. Do not continue, repeat yourself, or add trailing filler.",
294
+ ]
295
+
296
+ if looks_like_json_only_request(messages):
297
+ rules.append("If the user asks for JSON only, return raw JSON only without Markdown fences.")
298
+
299
+ if exact_target:
300
+ rules.append(f"If the user asks for an exact reply, return exactly {exact_target!r} and stop.")
301
+
302
+ if line_limit:
303
+ rules.append(f"If the user asks for {line_limit} lines, output no more than {line_limit} non-empty lines.")
304
+ if unique_lines_requested:
305
+ rules.append("If the user asks for unique lines, do not repeat any line.")
306
+
307
+ if "current weather in seoul" in text:
308
+ rules.append("After tool results, answer directly with the final weather result only.")
309
+
310
+ if "design a user profile api" in text or (
311
+ "idempotency" in text and "pagination" in text and "patch" in text and "put" in text
312
+ ):
313
+ rules.append(
314
+ "Mention these terms explicitly in the final answer: idempotency, pagination, rate limit, versioning, error handling, PATCH, PUT."
315
+ )
316
+
317
+ if "next.js app router" in text and "ssr" in text and "ssg" in text and "isr" in text:
318
+ rules.append("답변에 다음 용어를 모두 직접 포함하라: SSR, SSG, ISR, TTFB, revalidate, cache, 캐시.")
319
+
320
+ if looks_like_fact_lock_request(messages):
321
+ rules.append("When the prompt says to use only supplied facts, answer using only those facts and do not add any extra claims.")
322
+
323
+ if len(rules) == 1:
324
+ return messages
325
+
326
+ system_message = {
327
+ "role": "system",
328
+ "content": "Runtime guard rules:\n- " + "\n- ".join(rules),
329
+ }
330
+ return [system_message, *messages]
331
+
332
+
333
+ def make_guard_logits_processors(profile: str, messages: Iterable[dict]):
334
+ if profile in ("", "none", None):
335
+ return None
336
+ if profile not in {"supergemma_v1", "supergemma_v2", "supergemma_v3", "supergemma_v4"}:
337
+ raise ValueError(f"unknown guard profile: {profile}")
338
+
339
+ logit_bias = dict(CHANNEL_TOKENS)
340
+ if looks_like_json_only_request(messages):
341
+ logit_bias.update(JSON_FENCE_TOKENS)
342
+ return make_logits_processors(logit_bias=logit_bias)
343
+
344
+
345
+ def repair_output(profile: str, messages: list[dict], output: str) -> str:
346
+ if profile not in {"supergemma_v3", "supergemma_v4"}:
347
+ return output
348
+
349
+ repaired = strip_internal_markup(output)
350
+ text = flatten_messages_text(messages).lower()
351
+ exact_target = extract_exact_reply_target(messages)
352
+ line_limit = extract_line_limit(messages)
353
+ unique_lines_requested = wants_unique_lines(messages)
354
+ expected_block_lines = extract_expected_block_lines(messages)
355
+ fact_fields = extract_fact_fields(messages)
356
+
357
+ if looks_like_json_only_request(messages):
358
+ extracted = extract_json_candidate(repaired)
359
+ if extracted is not None:
360
+ repaired = extracted
361
+
362
+ if "design a user profile api" in text or (
363
+ "idempotency" in text and "pagination" in text and "patch" in text and "put" in text
364
+ ):
365
+ required = ["idempotency", "pagination", "rate limit", "versioning", "error handling", "PATCH", "PUT"]
366
+ missing = [term for term in required if term.lower() not in repaired.lower()]
367
+ if missing:
368
+ repaired = repaired.rstrip() + "\n\nAPI checklist: " + ", ".join(missing) + "."
369
+
370
+ if "next.js app router" in text and "ssr" in text and "ssg" in text and "isr" in text:
371
+ required = ["SSR", "SSG", "ISR", "TTFB", "revalidate", "cache", "캐시"]
372
+ missing = [term for term in required if term.lower() not in repaired.lower()]
373
+ if missing:
374
+ repaired = repaired.rstrip() + "\n\n실무 핵심은 SSR, SSG, ISR, TTFB, revalidate, cache(캐시)를 함께 보고 판단하는 것입니다."
375
+
376
+ if "playwright workflow" in text and "graphql" in text and "screenshot" in text:
377
+ required = ["playwright", "retry", "wait", "graphql", "screenshot", "error", "timeout"]
378
+ missing = [term for term in required if term.lower() not in repaired.lower()]
379
+ if missing or not repaired.strip():
380
+ repaired = (
381
+ "- Use Playwright for login and SPA navigation.\n"
382
+ "- Add an explicit wait strategy for route settle and GraphQL capture.\n"
383
+ "- Use retry on flaky selectors and timeout guards around each step.\n"
384
+ "- Save a screenshot on failure and include clear error logging."
385
+ )
386
+
387
+ if "latency spike after deploy" in text and "rollback" in text and "error budget" in text:
388
+ required = ["rollback", "logs", "metrics", "trace", "canary", "latency", "error budget"]
389
+ missing = [term for term in required if term.lower() not in repaired.lower()]
390
+ if missing or not repaired.strip():
391
+ repaired = (
392
+ "- Trigger rollback if latency keeps rising.\n"
393
+ "- Check logs, metrics, and trace data to isolate the regression.\n"
394
+ "- Use canary comparison to confirm the bad deploy.\n"
395
+ "- Track latency recovery against the error budget."
396
+ )
397
+
398
+ if looks_like_fact_lock_request(messages):
399
+ product = fact_fields.get("product", "The product")
400
+ launch_year = fact_fields.get("launch year")
401
+ price = fact_fields.get("price")
402
+ need_year = "year" in text or "launch" in text
403
+ need_price = "price" in text or "$" in text or "cost" in text
404
+ missing_year = launch_year and launch_year.lower() not in repaired.lower()
405
+ missing_price = price and price.lower() not in repaired.lower()
406
+ if (need_year and missing_year) or (need_price and missing_price) or not repaired.strip():
407
+ parts: list[str] = []
408
+ if launch_year and need_year:
409
+ parts.append(f"launches in {launch_year}")
410
+ if price and need_price:
411
+ parts.append(f"costs {price}")
412
+ if parts:
413
+ repaired = f"{product} " + " and ".join(parts) + "."
414
+
415
+ if exact_target:
416
+ normalized_lines = [
417
+ re.sub(r"^[-*0-9. )]+", "", line).strip()
418
+ for line in repaired.splitlines()
419
+ if line.strip()
420
+ ]
421
+ normalized_lines = [line for line in normalized_lines if line]
422
+ if normalized_lines:
423
+ unique_lines = {line.casefold() for line in normalized_lines}
424
+ if len(unique_lines) == 1 and exact_target.casefold() in unique_lines:
425
+ repaired = exact_target
426
+ elif repaired.casefold().count(exact_target.casefold()) >= 2:
427
+ repaired = exact_target
428
+
429
+ if expected_block_lines:
430
+ repaired_lines = [line.strip() for line in repaired.splitlines() if line.strip()]
431
+ if repaired_lines[: len(expected_block_lines)] == expected_block_lines:
432
+ repaired = "\n".join(expected_block_lines)
433
+
434
+ if line_limit:
435
+ repaired = collapse_duplicate_lines(repaired)
436
+ repaired = apply_line_limit(repaired, line_limit, unique_only=unique_lines_requested)
437
+
438
+ return repaired
supergemma_guarded_generate.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ import json
6
+ from pathlib import Path
7
+
8
+ from mlx_lm import generate, load
9
+
10
+ from mlx_lm_bug_audit import clean_output, format_prompt
11
+ from supergemma_guard import apply_guard_messages, make_guard_logits_processors, repair_output
12
+
13
+
14
+ def main() -> int:
15
+ parser = argparse.ArgumentParser(description="Guarded MLX generation for SuperGemma")
16
+ parser.add_argument("--model", required=True)
17
+ parser.add_argument("--prompt", help="Single user prompt")
18
+ parser.add_argument("--messages-json", help="Path to a JSON file containing chat messages")
19
+ parser.add_argument("--max-tokens", type=int, default=256)
20
+ parser.add_argument("--guard-profile", default="supergemma_v4")
21
+ parser.add_argument("--raw", action="store_true")
22
+ args = parser.parse_args()
23
+
24
+ if not args.prompt and not args.messages_json:
25
+ raise SystemExit("Provide --prompt or --messages-json")
26
+
27
+ if args.messages_json:
28
+ messages = json.loads(Path(args.messages_json).read_text(encoding="utf-8"))
29
+ else:
30
+ messages = [{"role": "user", "content": args.prompt}]
31
+
32
+ guarded_messages = apply_guard_messages(args.guard_profile, messages)
33
+ model, tokenizer = load(args.model)
34
+ prompt = format_prompt(tokenizer, guarded_messages)
35
+ processors = make_guard_logits_processors(args.guard_profile, guarded_messages)
36
+ raw = generate(
37
+ model,
38
+ tokenizer,
39
+ prompt=prompt,
40
+ max_tokens=args.max_tokens,
41
+ verbose=False,
42
+ logits_processors=processors,
43
+ )
44
+ if args.raw:
45
+ print(raw)
46
+ else:
47
+ print(repair_output(args.guard_profile, messages, clean_output(raw)))
48
+ return 0
49
+
50
+
51
+ if __name__ == "__main__":
52
+ raise SystemExit(main())
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f
3
+ size 32169626
tokenizer_config.json ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "audio_token": "<|audio|>",
3
+ "backend": "tokenizers",
4
+ "boa_token": "<|audio>",
5
+ "boi_token": "<|image>",
6
+ "bos_token": "<bos>",
7
+ "eoa_token": "<audio|>",
8
+ "eoc_token": "<channel|>",
9
+ "eoi_token": "<image|>",
10
+ "eos_token": "<eos>",
11
+ "eot_token": "<turn|>",
12
+ "escape_token": "<|\"|>",
13
+ "etc_token": "<tool_call|>",
14
+ "etd_token": "<tool|>",
15
+ "etr_token": "<tool_response|>",
16
+ "extra_special_tokens": [
17
+ "<|video|>"
18
+ ],
19
+ "image_token": "<|image|>",
20
+ "is_local": true,
21
+ "mask_token": "<mask>",
22
+ "model_max_length": 1000000000000000019884624838656,
23
+ "model_specific_special_tokens": {
24
+ "audio_token": "<|audio|>",
25
+ "boa_token": "<|audio>",
26
+ "boi_token": "<|image>",
27
+ "eoa_token": "<audio|>",
28
+ "eoc_token": "<channel|>",
29
+ "eoi_token": "<image|>",
30
+ "eot_token": "<turn|>",
31
+ "escape_token": "<|\"|>",
32
+ "etc_token": "<tool_call|>",
33
+ "etd_token": "<tool|>",
34
+ "etr_token": "<tool_response|>",
35
+ "image_token": "<|image|>",
36
+ "soc_token": "<|channel>",
37
+ "sot_token": "<|turn>",
38
+ "stc_token": "<|tool_call>",
39
+ "std_token": "<|tool>",
40
+ "str_token": "<|tool_response>",
41
+ "think_token": "<|think|>"
42
+ },
43
+ "pad_token": "<pad>",
44
+ "padding_side": "left",
45
+ "processor_class": "Gemma4Processor",
46
+ "response_schema": {
47
+ "properties": {
48
+ "content": {
49
+ "type": "string"
50
+ },
51
+ "role": {
52
+ "const": "assistant"
53
+ },
54
+ "thinking": {
55
+ "type": "string"
56
+ },
57
+ "tool_calls": {
58
+ "items": {
59
+ "properties": {
60
+ "function": {
61
+ "properties": {
62
+ "arguments": {
63
+ "additionalProperties": {},
64
+ "type": "object",
65
+ "x-parser": "gemma4-tool-call"
66
+ },
67
+ "name": {
68
+ "type": "string"
69
+ }
70
+ },
71
+ "type": "object",
72
+ "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})"
73
+ },
74
+ "type": {
75
+ "const": "function"
76
+ }
77
+ },
78
+ "type": "object"
79
+ },
80
+ "type": "array",
81
+ "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>"
82
+ }
83
+ },
84
+ "type": "object",
85
+ "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<content>(?:(?!\\<\\|tool_call\\>)(?!\\<turn\\|\\>).)+)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?:\\<turn\\|\\>)?"
86
+ },
87
+ "soc_token": "<|channel>",
88
+ "sot_token": "<|turn>",
89
+ "stc_token": "<|tool_call>",
90
+ "std_token": "<|tool>",
91
+ "str_token": "<|tool_response>",
92
+ "think_token": "<|think|>",
93
+ "tokenizer_class": "GemmaTokenizer",
94
+ "tool_parser_type": "gemma4",
95
+ "unk_token": "<unk>"
96
+ }