Snider Virgil commited on
Commit
39a4bd4
·
1 Parent(s): 27e241e

feat: merge LEK into lemrd weights

Browse files

LEK-2 LoRA merged into Gemma 4 31B Dense attention projections.
Converged at loss 0.0002 in 278 steps via patience-stop (best at 248).
Gradient checkpointing enabled. KV-shared layers restored from base.

Co-Authored-By: Virgil <virgil@lethean.io>

README.md CHANGED
@@ -1,9 +1,7 @@
1
  ---
2
- library_name: mlx
3
- license: apache-2.0
4
- license_link: https://ai.google.dev/gemma/docs/gemma_4_license
5
- pipeline_tag: text-generation
6
- base_model: google/gemma-4-31b-it
7
  tags:
8
  - mlx
 
 
9
  ---
 
1
  ---
2
+ language: en
 
 
 
 
3
  tags:
4
  - mlx
5
+ pipeline_tag: image-text-to-text
6
+ library_name: mlx
7
  ---
chat_template.jinja CHANGED
@@ -11,15 +11,34 @@
11
  description:<|"|>{{ value['description'] }}<|"|>
12
  {%- set add_comma = true -%}
13
  {%- endif -%}
 
 
 
 
14
  {%- if value['type'] | upper == 'STRING' -%}
15
  {%- if value['enum'] -%}
16
  {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
17
  enum:{{ format_argument(value['enum']) }}
18
  {%- endif -%}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  {%- elif value['type'] | upper == 'ARRAY' -%}
20
  {%- if value['items'] is mapping and value['items'] -%}
21
- {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
22
- items:{
23
  {%- set ns_items = namespace(found_first=false) -%}
24
  {%- for item_key, item_value in value['items'] | dictsort -%}
25
  {%- if item_value is not none -%}
@@ -52,32 +71,6 @@
52
  }
53
  {%- endif -%}
54
  {%- endif -%}
55
- {%- if value['nullable'] %}
56
- {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
57
- nullable:true
58
- {%- endif -%}
59
- {%- if value['type'] | upper == 'OBJECT' -%}
60
- {%- if value['properties'] is defined and value['properties'] is mapping -%}
61
- {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
62
- properties:{
63
- {{- format_parameters(value['properties'], value['required'] | default([])) -}}
64
- }
65
- {%- elif value is mapping -%}
66
- {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
67
- properties:{
68
- {{- format_parameters(value, value['required'] | default([])) -}}
69
- }
70
- {%- endif -%}
71
- {%- if value['required'] -%}
72
- {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
73
- required:[
74
- {%- for item in value['required'] | default([]) -%}
75
- <|"|>{{- item -}}<|"|>
76
- {%- if not loop.last %},{% endif -%}
77
- {%- endfor -%}
78
- ]
79
- {%- endif -%}
80
- {%- endif -%}
81
  {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
82
  type:<|"|>{{ value['type'] | upper }}<|"|>}
83
  {%- endif -%}
@@ -157,31 +150,16 @@
157
  {{- ns.result | trim -}}
158
  {%- endmacro -%}
159
 
160
- {%- macro format_tool_response_block(tool_name, response) -%}
161
- {{- '<|tool_response>' -}}
162
- {%- if response is mapping -%}
163
- {{- 'response:' + tool_name + '{' -}}
164
- {%- for key, value in response | dictsort -%}
165
- {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
166
- {%- if not loop.last %},{% endif -%}
167
- {%- endfor -%}
168
- {{- '}' -}}
169
- {%- else -%}
170
- {{- 'response:' + tool_name + '{value:' + format_argument(response, escape_keys=False) + '}' -}}
171
- {%- endif -%}
172
- {{- '<tool_response|>' -}}
173
- {%- endmacro -%}
174
-
175
  {%- set ns = namespace(prev_message_type=None) -%}
176
  {%- set loop_messages = messages -%}
177
- {{- bos_token -}}
178
  {#- Handle System/Tool Definitions Block -#}
179
  {%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
180
  {{- '<|turn>system\n' -}}
181
 
182
  {#- Inject Thinking token at the very top of the FIRST system turn -#}
183
  {%- if enable_thinking is defined and enable_thinking -%}
184
- {{- '<|think|>\n' -}}
185
  {%- set ns.prev_message_type = 'think' -%}
186
  {%- endif -%}
187
 
@@ -202,41 +180,11 @@
202
  {{- '<turn|>\n' -}}
203
  {%- endif %}
204
 
205
- {#- Pre-scan: find last user message index for reasoning guard -#}
206
- {%- set ns_turn = namespace(last_user_idx=-1) -%}
207
- {%- for i in range(loop_messages | length) -%}
208
- {%- if loop_messages[i]['role'] == 'user' -%}
209
- {%- set ns_turn.last_user_idx = i -%}
210
- {%- endif -%}
211
- {%- endfor -%}
212
-
213
  {#- Loop through messages -#}
214
  {%- for message in loop_messages -%}
215
- {%- if message['role'] != 'tool' -%}
216
  {%- set ns.prev_message_type = None -%}
217
  {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
218
- {#- Detect continuation: suppress duplicate <|turn>model when previous non-tool message was also assistant -#}
219
- {%- set prev_nt = namespace(role=None, found=false) -%}
220
- {%- if loop.index0 > 0 -%}
221
- {%- for j in range(loop.index0 - 1, -1, -1) -%}
222
- {%- if not prev_nt.found -%}
223
- {%- if loop_messages[j]['role'] != 'tool' -%}
224
- {%- set prev_nt.role = loop_messages[j]['role'] -%}
225
- {%- set prev_nt.found = true -%}
226
- {%- endif -%}
227
- {%- endif -%}
228
- {%- endfor -%}
229
- {%- endif -%}
230
- {%- set continue_same_model_turn = (role == 'model' and prev_nt.role == 'assistant') -%}
231
- {%- if not continue_same_model_turn -%}
232
  {{- '<|turn>' + role + '\n' }}
233
- {%- endif -%}
234
-
235
- {#- Render reasoning/reasoning_content as thinking channel -#}
236
- {%- set thinking_text = message.get('reasoning') or message.get('reasoning_content') -%}
237
- {%- if thinking_text and loop.index0 > ns_turn.last_user_idx and message.get('tool_calls') -%}
238
- {{- '<|channel>thought\n' + thinking_text + '\n<channel|>' -}}
239
- {%- endif -%}
240
 
241
  {%- if message['tool_calls'] -%}
242
  {%- for tool_call in message['tool_calls'] -%}
@@ -257,49 +205,23 @@
257
  {%- set ns.prev_message_type = 'tool_call' -%}
258
  {%- endif -%}
259
 
260
- {%- set ns_tr_out = namespace(flag=false) -%}
261
- {%- if message.get('tool_responses') -%}
262
- {#- Legacy: tool_responses embedded on the assistant message (Google/Gemma native) -#}
263
  {%- for tool_response in message['tool_responses'] -%}
264
- {{- format_tool_response_block(tool_response['name'] | default('unknown'), tool_response['response']) -}}
265
- {%- set ns_tr_out.flag = true -%}
266
- {%- set ns.prev_message_type = 'tool_response' -%}
267
- {%- endfor -%}
268
- {%- elif message.get('tool_calls') -%}
269
- {#- OpenAI Chat Completions: forward-scan consecutive role:tool messages -#}
270
- {%- set ns_tool_scan = namespace(stopped=false) -%}
271
- {%- for k in range(loop.index0 + 1, loop_messages | length) -%}
272
- {%- if ns_tool_scan.stopped -%}
273
- {%- elif loop_messages[k]['role'] != 'tool' -%}
274
- {%- set ns_tool_scan.stopped = true -%}
275
- {%- else -%}
276
- {%- set follow = loop_messages[k] -%}
277
- {#- Resolve tool_call_id to function name -#}
278
- {%- set ns_tname = namespace(name=follow.get('name') | default('unknown')) -%}
279
- {%- for tc in message['tool_calls'] -%}
280
- {%- if tc.get('id') == follow.get('tool_call_id') -%}
281
- {%- set ns_tname.name = tc['function']['name'] -%}
282
- {%- endif -%}
283
  {%- endfor -%}
284
- {#- Handle content as string or content-parts array -#}
285
- {%- set tool_body = follow.get('content') -%}
286
- {%- if tool_body is string -%}
287
- {{- format_tool_response_block(ns_tname.name, tool_body) -}}
288
- {%- elif tool_body is sequence and tool_body is not string -%}
289
- {%- set ns_txt = namespace(s='') -%}
290
- {%- for part in tool_body -%}
291
- {%- if part.get('type') == 'text' -%}
292
- {%- set ns_txt.s = ns_txt.s + (part.get('text') | default('')) -%}
293
- {%- endif -%}
294
- {%- endfor -%}
295
- {{- format_tool_response_block(ns_tname.name, ns_txt.s) -}}
296
- {%- else -%}
297
- {{- format_tool_response_block(ns_tname.name, tool_body) -}}
298
- {%- endif -%}
299
- {%- set ns_tr_out.flag = true -%}
300
- {%- set ns.prev_message_type = 'tool_response' -%}
301
  {%- endif -%}
 
302
  {%- endfor -%}
 
303
  {%- endif -%}
304
 
305
  {%- if message['content'] is string -%}
@@ -317,31 +239,28 @@
317
  {{- item['text'] | trim -}}
318
  {%- endif -%}
319
  {%- elif item['type'] == 'image' -%}
320
- {{- '<|image|>' -}}
321
  {%- set ns.prev_message_type = 'image' -%}
322
  {%- elif item['type'] == 'audio' -%}
323
  {{- '<|audio|>' -}}
324
  {%- set ns.prev_message_type = 'audio' -%}
325
  {%- elif item['type'] == 'video' -%}
326
- {{- '<|video|>' -}}
327
  {%- set ns.prev_message_type = 'video' -%}
328
  {%- endif -%}
329
  {%- endfor -%}
330
  {%- endif -%}
331
 
332
- {%- if ns.prev_message_type == 'tool_call' and not ns_tr_out.flag -%}
333
- {{- '<|tool_response>' -}}
334
- {%- elif not (ns_tr_out.flag and not message.get('content')) -%}
335
  {{- '<turn|>\n' -}}
336
  {%- endif -%}
337
- {%- endif -%}
338
  {%- endfor -%}
339
 
340
  {%- if add_generation_prompt -%}
341
- {%- if ns.prev_message_type != 'tool_response' and ns.prev_message_type != 'tool_call' -%}
342
  {{- '<|turn>model\n' -}}
343
- {%- if not enable_thinking | default(false) -%}
344
- {{- '<|channel>thought\n<channel|>' -}}
345
- {%- endif -%}
346
  {%- endif -%}
347
  {%- endif -%}
 
11
  description:<|"|>{{ value['description'] }}<|"|>
12
  {%- set add_comma = true -%}
13
  {%- endif -%}
14
+ {%- if value['nullable'] %}
15
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
16
+ nullable:true
17
+ {%- endif -%}
18
  {%- if value['type'] | upper == 'STRING' -%}
19
  {%- if value['enum'] -%}
20
  {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
21
  enum:{{ format_argument(value['enum']) }}
22
  {%- endif -%}
23
+ {%- elif value['type'] | upper == 'OBJECT' -%}
24
+ ,properties:{
25
+ {%- if value['properties'] is defined and value['properties'] is mapping -%}
26
+ {{- format_parameters(value['properties'], value['required'] | default([])) -}}
27
+ {%- elif value is mapping -%}
28
+ {{- format_parameters(value, value['required'] | default([])) -}}
29
+ {%- endif -%}
30
+ }
31
+ {%- if value['required'] -%}
32
+ ,required:[
33
+ {%- for item in value['required'] | default([]) -%}
34
+ <|"|>{{- item -}}<|"|>
35
+ {%- if not loop.last %},{% endif -%}
36
+ {%- endfor -%}
37
+ ]
38
+ {%- endif -%}
39
  {%- elif value['type'] | upper == 'ARRAY' -%}
40
  {%- if value['items'] is mapping and value['items'] -%}
41
+ ,items:{
 
42
  {%- set ns_items = namespace(found_first=false) -%}
43
  {%- for item_key, item_value in value['items'] | dictsort -%}
44
  {%- if item_value is not none -%}
 
71
  }
72
  {%- endif -%}
73
  {%- endif -%}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
75
  type:<|"|>{{ value['type'] | upper }}<|"|>}
76
  {%- endif -%}
 
150
  {{- ns.result | trim -}}
151
  {%- endmacro -%}
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  {%- set ns = namespace(prev_message_type=None) -%}
154
  {%- set loop_messages = messages -%}
155
+ {{ bos_token }}
156
  {#- Handle System/Tool Definitions Block -#}
157
  {%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
158
  {{- '<|turn>system\n' -}}
159
 
160
  {#- Inject Thinking token at the very top of the FIRST system turn -#}
161
  {%- if enable_thinking is defined and enable_thinking -%}
162
+ {{- '<|think|>' -}}
163
  {%- set ns.prev_message_type = 'think' -%}
164
  {%- endif -%}
165
 
 
180
  {{- '<turn|>\n' -}}
181
  {%- endif %}
182
 
 
 
 
 
 
 
 
 
183
  {#- Loop through messages -#}
184
  {%- for message in loop_messages -%}
 
185
  {%- set ns.prev_message_type = None -%}
186
  {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  {{- '<|turn>' + role + '\n' }}
 
 
 
 
 
 
 
188
 
189
  {%- if message['tool_calls'] -%}
190
  {%- for tool_call in message['tool_calls'] -%}
 
205
  {%- set ns.prev_message_type = 'tool_call' -%}
206
  {%- endif -%}
207
 
208
+ {%- if message['tool_responses'] -%}
209
+ {#- Tool Response handling -#}
 
210
  {%- for tool_response in message['tool_responses'] -%}
211
+ {{- '<|tool_response>' -}}
212
+ {%- if tool_response['response'] is mapping -%}
213
+ {{- 'response:' + tool_response['name'] | default('unknown') + '{' -}}
214
+ {%- for key, value in tool_response['response'] | dictsort -%}
215
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
216
+ {%- if not loop.last %},{% endif -%}
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  {%- endfor -%}
218
+ {{- '}' -}}
219
+ {%- else -%}
220
+ {{- 'response:' + tool_response['name'] | default('unknown') + '{value:' + format_argument(tool_response['response'], escape_keys=False) + '}' -}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  {%- endif -%}
222
+ {{- '<tool_response|>' -}}
223
  {%- endfor -%}
224
+ {%- set ns.prev_message_type = 'tool_response' -%}
225
  {%- endif -%}
226
 
227
  {%- if message['content'] is string -%}
 
239
  {{- item['text'] | trim -}}
240
  {%- endif -%}
241
  {%- elif item['type'] == 'image' -%}
242
+ {{- '\n\n<|image|>\n\n' -}}
243
  {%- set ns.prev_message_type = 'image' -%}
244
  {%- elif item['type'] == 'audio' -%}
245
  {{- '<|audio|>' -}}
246
  {%- set ns.prev_message_type = 'audio' -%}
247
  {%- elif item['type'] == 'video' -%}
248
+ {{- '\n\n<|video|>\n\n' -}}
249
  {%- set ns.prev_message_type = 'video' -%}
250
  {%- endif -%}
251
  {%- endfor -%}
252
  {%- endif -%}
253
 
254
+ {%- if not (message['tool_responses'] and not message['content']) -%}
 
 
255
  {{- '<turn|>\n' -}}
256
  {%- endif -%}
 
257
  {%- endfor -%}
258
 
259
  {%- if add_generation_prompt -%}
260
+ {%- if ns.prev_message_type != 'tool_response' -%}
261
  {{- '<|turn>model\n' -}}
262
+ {%- endif -%}
263
+ {%- if not enable_thinking | default(false) -%}
264
+ {{- '<|channel>thought\n<channel|>' -}}
265
  {%- endif -%}
266
  {%- endif -%}
config.json CHANGED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00004.safetensors → model-00001-of-00006.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:988e2b1fd41d93b62b8c432f52f632c43b8cb7f86df4b957db36a3cc0dab40ca
3
- size 5366617512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a93ebff49222bf837fec630cca73782ff2662da0db93e74549fa743085d36dd
3
+ size 5326590887
model-00002-of-00004.safetensors → model-00002-of-00006.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a496a96fbd39cd11a9871f91d026013d91069dcac15f89ca861b93976f3857cf
3
- size 5361642573
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b60c6d7570b9ceb1bcfd22091888121311882505d5bf7758db3fabe769f2d5bb
3
+ size 5351454850
model-00003-of-00004.safetensors → model-00003-of-00006.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afa555ff0e1bc458c5b08aeef1f4499dce63e2bfb5d3a2aac716e47c0a5672c1
3
- size 5367276094
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f889e77a2debd1346a4df0661e3daa0ff8c12fe0332a29f54d1d71e2ebf8a00
3
+ size 5363842431
model-00004-of-00004.safetensors → model-00004-of-00006.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bde02793a3d2ed3f29c3d1629ea9fbeb9c25720bc42db0eca9720abe094235a
3
- size 1173848301
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b367bddf857b2a78f72d2c079d331e9a4d62354bfb05fc4b83cb20a724c3a5c
3
+ size 5363842477
model-00005-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a8d313c5486671508be95e040fcf429cf1184f859c27cc800507402a867c3f8
3
+ size 5351456119
model-00006-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fd7362728304f102b1c1c11118ee7b2e3baa2182f9e718364abf29d78cc827c
3
+ size 2059325268
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
processor_config.json CHANGED
@@ -28,5 +28,15 @@
28
  }
29
  },
30
  "image_seq_length": 280,
31
- "processor_class": "Gemma4Processor"
32
- }
 
 
 
 
 
 
 
 
 
 
 
28
  }
29
  },
30
  "image_seq_length": 280,
31
+ "processor_class": "Gemma4Processor",
32
+ "feature_extractor": {
33
+ "feature_extractor_type": "Gemma4AudioFeatureExtractor",
34
+ "sampling_rate": 16000,
35
+ "num_mel_filters": 128,
36
+ "fft_length": 512,
37
+ "hop_length": 160,
38
+ "chunk_duration": 8.0,
39
+ "overlap_duration": 1.0
40
+ },
41
+ "audio_ms_per_token": 40
42
+ }
tokenizer_config.json CHANGED
@@ -13,6 +13,9 @@
13
  "etc_token": "<tool_call|>",
14
  "etd_token": "<tool|>",
15
  "etr_token": "<tool_response|>",
 
 
 
16
  "image_token": "<|image|>",
17
  "is_local": true,
18
  "mask_token": "<mask>",
@@ -35,8 +38,7 @@
35
  "stc_token": "<|tool_call>",
36
  "std_token": "<|tool>",
37
  "str_token": "<|tool_response>",
38
- "think_token": "<|think|>",
39
- "video_token": "<|video|>"
40
  },
41
  "pad_token": "<pad>",
42
  "padding_side": "left",
@@ -89,6 +91,5 @@
89
  "str_token": "<|tool_response>",
90
  "think_token": "<|think|>",
91
  "tokenizer_class": "GemmaTokenizer",
92
- "unk_token": "<unk>",
93
- "video_token": "<|video|>"
94
  }
 
13
  "etc_token": "<tool_call|>",
14
  "etd_token": "<tool|>",
15
  "etr_token": "<tool_response|>",
16
+ "extra_special_tokens": [
17
+ "<|video|>"
18
+ ],
19
  "image_token": "<|image|>",
20
  "is_local": true,
21
  "mask_token": "<mask>",
 
38
  "stc_token": "<|tool_call>",
39
  "std_token": "<|tool>",
40
  "str_token": "<|tool_response>",
41
+ "think_token": "<|think|>"
 
42
  },
43
  "pad_token": "<pad>",
44
  "padding_side": "left",
 
91
  "str_token": "<|tool_response>",
92
  "think_token": "<|think|>",
93
  "tokenizer_class": "GemmaTokenizer",
94
+ "unk_token": "<unk>"
 
95
  }