danielhanchen commited on
Commit
125ba9e
·
verified ·
1 Parent(s): 17ae814

Upload folder using huggingface_hub

Browse files
chat_template.jinja CHANGED
@@ -1,4 +1,5 @@
1
- {# Unsloth template fixes #}[gMASK]<sop>
 
2
  {%- if tools -%}
3
  <|system|>
4
  # Tools
@@ -43,25 +44,19 @@ For each function call, output the function name and arguments within the follow
43
  {%- endfor %}
44
  {% for m in messages %}
45
  {%- if m.role == 'user' -%}<|user|>
46
- {%- set content = visible_text(m.content)|string %}{{ content }}
47
- {{- '/nothink' if (enable_thinking is defined and not enable_thinking and not content.endswith("/nothink")) else '' -}}
48
  {%- elif m.role == 'assistant' -%}
49
  <|assistant|>
50
  {%- set reasoning_content = '' %}
51
- {%- set content = visible_text(m.content)|string %}
52
- {%- if m.reasoning_content is defined and m.reasoning_content is string %}
53
  {%- set reasoning_content = m.reasoning_content %}
54
  {%- else %}
55
- {# Unsloth template fixes - must change to for loop since llama.cpp will error out if not #}
56
- {%- set parts = content.split('</think>') %}
57
- {% for part in parts %}
58
- {%- if loop.index0 == 0 -%}
59
- {%- set reasoning_content = (part.split("<think>")|last) %}
60
- {%- set reasoning_content = reasoning_content.lstrip('\n').rstrip('\n') -%}
61
- {%- else -%}
62
- {%- set content = part.lstrip('\n') %}
63
- {%- endif %}
64
- {%- endfor %}
65
  {%- endif %}
66
  {%- if loop.index0 > ns.last_user_index and reasoning_content -%}
67
  {{ '\n<think>' + reasoning_content.strip() + '</think>'}}
@@ -77,14 +72,11 @@ For each function call, output the function name and arguments within the follow
77
  {%- set tc = tc.function %}
78
  {%- endif %}
79
  {{ '\n<tool_call>' + tc.name }}
80
- {% set _args = tc.arguments %}
81
- {%- if _args is not mapping -%}
82
- {%- set _args = {} %}
83
- {%- endif -%}
84
  {% for k, v in _args|items %}
85
  <arg_key>{{ k }}</arg_key>
86
  <arg_value>{{ v | tojson|string if v is not string else v }}</arg_value>
87
- {% endfor %}
88
  </tool_call>{% endfor %}
89
  {% endif %}
90
  {%- elif m.role == 'tool' -%}
@@ -104,9 +96,10 @@ For each function call, output the function name and arguments within the follow
104
  {% endif -%}
105
  {%- elif m.role == 'system' -%}
106
  <|system|>
107
- {{ visible_text(m.content)|string }}
108
  {%- endif -%}
109
  {%- endfor -%}
110
  {%- if add_generation_prompt -%}
111
  <|assistant|>{{- '\n<think></think>' if (enable_thinking is defined and not enable_thinking) else '' -}}
112
- {%- endif -%}{# Copyright 2025-present Unsloth. Apache 2.0 License. #}
 
 
1
+ {# Unsloth template fixes #}
2
+ [gMASK]<sop>
3
  {%- if tools -%}
4
  <|system|>
5
  # Tools
 
44
  {%- endfor %}
45
  {% for m in messages %}
46
  {%- if m.role == 'user' -%}<|user|>
47
+ {{ visible_text(m.content) }}
48
+ {{- '/nothink' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith("/nothink")) else '' -}}
49
  {%- elif m.role == 'assistant' -%}
50
  <|assistant|>
51
  {%- set reasoning_content = '' %}
52
+ {%- set content = visible_text(m.content) %}
53
+ {%- if m.reasoning_content is string %}
54
  {%- set reasoning_content = m.reasoning_content %}
55
  {%- else %}
56
+ {%- if '</think>' in content %}
57
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
58
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
59
+ {%- endif %}
 
 
 
 
 
 
60
  {%- endif %}
61
  {%- if loop.index0 > ns.last_user_index and reasoning_content -%}
62
  {{ '\n<think>' + reasoning_content.strip() + '</think>'}}
 
72
  {%- set tc = tc.function %}
73
  {%- endif %}
74
  {{ '\n<tool_call>' + tc.name }}
75
+ {% set _args = tc.arguments %}{%- if _args is mapping %}
 
 
 
76
  {% for k, v in _args|items %}
77
  <arg_key>{{ k }}</arg_key>
78
  <arg_value>{{ v | tojson|string if v is not string else v }}</arg_value>
79
+ {% endfor %}{%- endif %}
80
  </tool_call>{% endfor %}
81
  {% endif %}
82
  {%- elif m.role == 'tool' -%}
 
96
  {% endif -%}
97
  {%- elif m.role == 'system' -%}
98
  <|system|>
99
+ {{ visible_text(m.content) }}
100
  {%- endif -%}
101
  {%- endfor -%}
102
  {%- if add_generation_prompt -%}
103
  <|assistant|>{{- '\n<think></think>' if (enable_thinking is defined and not enable_thinking) else '' -}}
104
+ {%- endif -%}
105
+ {# Copyright 2025-present Unsloth. Apache 2.0 License. #}
config.json CHANGED
@@ -36,7 +36,7 @@
36
  "routed_scaling_factor": 2.5,
37
  "tie_word_embeddings": false,
38
  "topk_group": 1,
39
- "transformers_version": "4.56.2",
40
  "unsloth_fixed": true,
41
  "use_cache": true,
42
  "use_qk_norm": true,
 
36
  "routed_scaling_factor": 2.5,
37
  "tie_word_embeddings": false,
38
  "topk_group": 1,
39
+ "transformers_version": "4.57.3",
40
  "unsloth_fixed": true,
41
  "use_cache": true,
42
  "use_qk_norm": true,
model.safetensors.index.json CHANGED
@@ -44192,505 +44192,505 @@
44192
  "model.layers.91.self_attn.v_proj.bias": "model-00092-of-00092.safetensors",
44193
  "model.layers.91.self_attn.v_proj.weight": "model-00092-of-00092.safetensors",
44194
  "model.norm.weight": "model-00092-of-00092.safetensors",
44195
- "model.layers.92.eh_proj.weight": "model-mtp.safetensors",
44196
- "model.layers.92.enorm.weight": "model-mtp.safetensors",
44197
- "model.layers.92.hnorm.weight": "model-mtp.safetensors",
44198
- "model.layers.92.input_layernorm.weight": "model-mtp.safetensors",
44199
- "model.layers.92.mlp.experts.0.down_proj.weight": "model-mtp.safetensors",
44200
- "model.layers.92.mlp.experts.0.gate_proj.weight": "model-mtp.safetensors",
44201
- "model.layers.92.mlp.experts.0.up_proj.weight": "model-mtp.safetensors",
44202
- "model.layers.92.mlp.experts.1.down_proj.weight": "model-mtp.safetensors",
44203
- "model.layers.92.mlp.experts.1.gate_proj.weight": "model-mtp.safetensors",
44204
- "model.layers.92.mlp.experts.1.up_proj.weight": "model-mtp.safetensors",
44205
- "model.layers.92.mlp.experts.10.down_proj.weight": "model-mtp.safetensors",
44206
- "model.layers.92.mlp.experts.10.gate_proj.weight": "model-mtp.safetensors",
44207
- "model.layers.92.mlp.experts.10.up_proj.weight": "model-mtp.safetensors",
44208
- "model.layers.92.mlp.experts.100.down_proj.weight": "model-mtp.safetensors",
44209
- "model.layers.92.mlp.experts.100.gate_proj.weight": "model-mtp.safetensors",
44210
- "model.layers.92.mlp.experts.100.up_proj.weight": "model-mtp.safetensors",
44211
- "model.layers.92.mlp.experts.101.down_proj.weight": "model-mtp.safetensors",
44212
- "model.layers.92.mlp.experts.101.gate_proj.weight": "model-mtp.safetensors",
44213
- "model.layers.92.mlp.experts.101.up_proj.weight": "model-mtp.safetensors",
44214
- "model.layers.92.mlp.experts.102.down_proj.weight": "model-mtp.safetensors",
44215
- "model.layers.92.mlp.experts.102.gate_proj.weight": "model-mtp.safetensors",
44216
- "model.layers.92.mlp.experts.102.up_proj.weight": "model-mtp.safetensors",
44217
- "model.layers.92.mlp.experts.103.down_proj.weight": "model-mtp.safetensors",
44218
- "model.layers.92.mlp.experts.103.gate_proj.weight": "model-mtp.safetensors",
44219
- "model.layers.92.mlp.experts.103.up_proj.weight": "model-mtp.safetensors",
44220
- "model.layers.92.mlp.experts.104.down_proj.weight": "model-mtp.safetensors",
44221
- "model.layers.92.mlp.experts.104.gate_proj.weight": "model-mtp.safetensors",
44222
- "model.layers.92.mlp.experts.104.up_proj.weight": "model-mtp.safetensors",
44223
- "model.layers.92.mlp.experts.105.down_proj.weight": "model-mtp.safetensors",
44224
- "model.layers.92.mlp.experts.105.gate_proj.weight": "model-mtp.safetensors",
44225
- "model.layers.92.mlp.experts.105.up_proj.weight": "model-mtp.safetensors",
44226
- "model.layers.92.mlp.experts.106.down_proj.weight": "model-mtp.safetensors",
44227
- "model.layers.92.mlp.experts.106.gate_proj.weight": "model-mtp.safetensors",
44228
- "model.layers.92.mlp.experts.106.up_proj.weight": "model-mtp.safetensors",
44229
- "model.layers.92.mlp.experts.107.down_proj.weight": "model-mtp.safetensors",
44230
- "model.layers.92.mlp.experts.107.gate_proj.weight": "model-mtp.safetensors",
44231
- "model.layers.92.mlp.experts.107.up_proj.weight": "model-mtp.safetensors",
44232
- "model.layers.92.mlp.experts.108.down_proj.weight": "model-mtp.safetensors",
44233
- "model.layers.92.mlp.experts.108.gate_proj.weight": "model-mtp.safetensors",
44234
- "model.layers.92.mlp.experts.108.up_proj.weight": "model-mtp.safetensors",
44235
- "model.layers.92.mlp.experts.109.down_proj.weight": "model-mtp.safetensors",
44236
- "model.layers.92.mlp.experts.109.gate_proj.weight": "model-mtp.safetensors",
44237
- "model.layers.92.mlp.experts.109.up_proj.weight": "model-mtp.safetensors",
44238
- "model.layers.92.mlp.experts.11.down_proj.weight": "model-mtp.safetensors",
44239
- "model.layers.92.mlp.experts.11.gate_proj.weight": "model-mtp.safetensors",
44240
- "model.layers.92.mlp.experts.11.up_proj.weight": "model-mtp.safetensors",
44241
- "model.layers.92.mlp.experts.110.down_proj.weight": "model-mtp.safetensors",
44242
- "model.layers.92.mlp.experts.110.gate_proj.weight": "model-mtp.safetensors",
44243
- "model.layers.92.mlp.experts.110.up_proj.weight": "model-mtp.safetensors",
44244
- "model.layers.92.mlp.experts.111.down_proj.weight": "model-mtp.safetensors",
44245
- "model.layers.92.mlp.experts.111.gate_proj.weight": "model-mtp.safetensors",
44246
- "model.layers.92.mlp.experts.111.up_proj.weight": "model-mtp.safetensors",
44247
- "model.layers.92.mlp.experts.112.down_proj.weight": "model-mtp.safetensors",
44248
- "model.layers.92.mlp.experts.112.gate_proj.weight": "model-mtp.safetensors",
44249
- "model.layers.92.mlp.experts.112.up_proj.weight": "model-mtp.safetensors",
44250
- "model.layers.92.mlp.experts.113.down_proj.weight": "model-mtp.safetensors",
44251
- "model.layers.92.mlp.experts.113.gate_proj.weight": "model-mtp.safetensors",
44252
- "model.layers.92.mlp.experts.113.up_proj.weight": "model-mtp.safetensors",
44253
- "model.layers.92.mlp.experts.114.down_proj.weight": "model-mtp.safetensors",
44254
- "model.layers.92.mlp.experts.114.gate_proj.weight": "model-mtp.safetensors",
44255
- "model.layers.92.mlp.experts.114.up_proj.weight": "model-mtp.safetensors",
44256
- "model.layers.92.mlp.experts.115.down_proj.weight": "model-mtp.safetensors",
44257
- "model.layers.92.mlp.experts.115.gate_proj.weight": "model-mtp.safetensors",
44258
- "model.layers.92.mlp.experts.115.up_proj.weight": "model-mtp.safetensors",
44259
- "model.layers.92.mlp.experts.116.down_proj.weight": "model-mtp.safetensors",
44260
- "model.layers.92.mlp.experts.116.gate_proj.weight": "model-mtp.safetensors",
44261
- "model.layers.92.mlp.experts.116.up_proj.weight": "model-mtp.safetensors",
44262
- "model.layers.92.mlp.experts.117.down_proj.weight": "model-mtp.safetensors",
44263
- "model.layers.92.mlp.experts.117.gate_proj.weight": "model-mtp.safetensors",
44264
- "model.layers.92.mlp.experts.117.up_proj.weight": "model-mtp.safetensors",
44265
- "model.layers.92.mlp.experts.118.down_proj.weight": "model-mtp.safetensors",
44266
- "model.layers.92.mlp.experts.118.gate_proj.weight": "model-mtp.safetensors",
44267
- "model.layers.92.mlp.experts.118.up_proj.weight": "model-mtp.safetensors",
44268
- "model.layers.92.mlp.experts.119.down_proj.weight": "model-mtp.safetensors",
44269
- "model.layers.92.mlp.experts.119.gate_proj.weight": "model-mtp.safetensors",
44270
- "model.layers.92.mlp.experts.119.up_proj.weight": "model-mtp.safetensors",
44271
- "model.layers.92.mlp.experts.12.down_proj.weight": "model-mtp.safetensors",
44272
- "model.layers.92.mlp.experts.12.gate_proj.weight": "model-mtp.safetensors",
44273
- "model.layers.92.mlp.experts.12.up_proj.weight": "model-mtp.safetensors",
44274
- "model.layers.92.mlp.experts.120.down_proj.weight": "model-mtp.safetensors",
44275
- "model.layers.92.mlp.experts.120.gate_proj.weight": "model-mtp.safetensors",
44276
- "model.layers.92.mlp.experts.120.up_proj.weight": "model-mtp.safetensors",
44277
- "model.layers.92.mlp.experts.121.down_proj.weight": "model-mtp.safetensors",
44278
- "model.layers.92.mlp.experts.121.gate_proj.weight": "model-mtp.safetensors",
44279
- "model.layers.92.mlp.experts.121.up_proj.weight": "model-mtp.safetensors",
44280
- "model.layers.92.mlp.experts.122.down_proj.weight": "model-mtp.safetensors",
44281
- "model.layers.92.mlp.experts.122.gate_proj.weight": "model-mtp.safetensors",
44282
- "model.layers.92.mlp.experts.122.up_proj.weight": "model-mtp.safetensors",
44283
- "model.layers.92.mlp.experts.123.down_proj.weight": "model-mtp.safetensors",
44284
- "model.layers.92.mlp.experts.123.gate_proj.weight": "model-mtp.safetensors",
44285
- "model.layers.92.mlp.experts.123.up_proj.weight": "model-mtp.safetensors",
44286
- "model.layers.92.mlp.experts.124.down_proj.weight": "model-mtp.safetensors",
44287
- "model.layers.92.mlp.experts.124.gate_proj.weight": "model-mtp.safetensors",
44288
- "model.layers.92.mlp.experts.124.up_proj.weight": "model-mtp.safetensors",
44289
- "model.layers.92.mlp.experts.125.down_proj.weight": "model-mtp.safetensors",
44290
- "model.layers.92.mlp.experts.125.gate_proj.weight": "model-mtp.safetensors",
44291
- "model.layers.92.mlp.experts.125.up_proj.weight": "model-mtp.safetensors",
44292
- "model.layers.92.mlp.experts.126.down_proj.weight": "model-mtp.safetensors",
44293
- "model.layers.92.mlp.experts.126.gate_proj.weight": "model-mtp.safetensors",
44294
- "model.layers.92.mlp.experts.126.up_proj.weight": "model-mtp.safetensors",
44295
- "model.layers.92.mlp.experts.127.down_proj.weight": "model-mtp.safetensors",
44296
- "model.layers.92.mlp.experts.127.gate_proj.weight": "model-mtp.safetensors",
44297
- "model.layers.92.mlp.experts.127.up_proj.weight": "model-mtp.safetensors",
44298
- "model.layers.92.mlp.experts.128.down_proj.weight": "model-mtp.safetensors",
44299
- "model.layers.92.mlp.experts.128.gate_proj.weight": "model-mtp.safetensors",
44300
- "model.layers.92.mlp.experts.128.up_proj.weight": "model-mtp.safetensors",
44301
- "model.layers.92.mlp.experts.129.down_proj.weight": "model-mtp.safetensors",
44302
- "model.layers.92.mlp.experts.129.gate_proj.weight": "model-mtp.safetensors",
44303
- "model.layers.92.mlp.experts.129.up_proj.weight": "model-mtp.safetensors",
44304
- "model.layers.92.mlp.experts.13.down_proj.weight": "model-mtp.safetensors",
44305
- "model.layers.92.mlp.experts.13.gate_proj.weight": "model-mtp.safetensors",
44306
- "model.layers.92.mlp.experts.13.up_proj.weight": "model-mtp.safetensors",
44307
- "model.layers.92.mlp.experts.130.down_proj.weight": "model-mtp.safetensors",
44308
- "model.layers.92.mlp.experts.130.gate_proj.weight": "model-mtp.safetensors",
44309
- "model.layers.92.mlp.experts.130.up_proj.weight": "model-mtp.safetensors",
44310
- "model.layers.92.mlp.experts.131.down_proj.weight": "model-mtp.safetensors",
44311
- "model.layers.92.mlp.experts.131.gate_proj.weight": "model-mtp.safetensors",
44312
- "model.layers.92.mlp.experts.131.up_proj.weight": "model-mtp.safetensors",
44313
- "model.layers.92.mlp.experts.132.down_proj.weight": "model-mtp.safetensors",
44314
- "model.layers.92.mlp.experts.132.gate_proj.weight": "model-mtp.safetensors",
44315
- "model.layers.92.mlp.experts.132.up_proj.weight": "model-mtp.safetensors",
44316
- "model.layers.92.mlp.experts.133.down_proj.weight": "model-mtp.safetensors",
44317
- "model.layers.92.mlp.experts.133.gate_proj.weight": "model-mtp.safetensors",
44318
- "model.layers.92.mlp.experts.133.up_proj.weight": "model-mtp.safetensors",
44319
- "model.layers.92.mlp.experts.134.down_proj.weight": "model-mtp.safetensors",
44320
- "model.layers.92.mlp.experts.134.gate_proj.weight": "model-mtp.safetensors",
44321
- "model.layers.92.mlp.experts.134.up_proj.weight": "model-mtp.safetensors",
44322
- "model.layers.92.mlp.experts.135.down_proj.weight": "model-mtp.safetensors",
44323
- "model.layers.92.mlp.experts.135.gate_proj.weight": "model-mtp.safetensors",
44324
- "model.layers.92.mlp.experts.135.up_proj.weight": "model-mtp.safetensors",
44325
- "model.layers.92.mlp.experts.136.down_proj.weight": "model-mtp.safetensors",
44326
- "model.layers.92.mlp.experts.136.gate_proj.weight": "model-mtp.safetensors",
44327
- "model.layers.92.mlp.experts.136.up_proj.weight": "model-mtp.safetensors",
44328
- "model.layers.92.mlp.experts.137.down_proj.weight": "model-mtp.safetensors",
44329
- "model.layers.92.mlp.experts.137.gate_proj.weight": "model-mtp.safetensors",
44330
- "model.layers.92.mlp.experts.137.up_proj.weight": "model-mtp.safetensors",
44331
- "model.layers.92.mlp.experts.138.down_proj.weight": "model-mtp.safetensors",
44332
- "model.layers.92.mlp.experts.138.gate_proj.weight": "model-mtp.safetensors",
44333
- "model.layers.92.mlp.experts.138.up_proj.weight": "model-mtp.safetensors",
44334
- "model.layers.92.mlp.experts.139.down_proj.weight": "model-mtp.safetensors",
44335
- "model.layers.92.mlp.experts.139.gate_proj.weight": "model-mtp.safetensors",
44336
- "model.layers.92.mlp.experts.139.up_proj.weight": "model-mtp.safetensors",
44337
- "model.layers.92.mlp.experts.14.down_proj.weight": "model-mtp.safetensors",
44338
- "model.layers.92.mlp.experts.14.gate_proj.weight": "model-mtp.safetensors",
44339
- "model.layers.92.mlp.experts.14.up_proj.weight": "model-mtp.safetensors",
44340
- "model.layers.92.mlp.experts.140.down_proj.weight": "model-mtp.safetensors",
44341
- "model.layers.92.mlp.experts.140.gate_proj.weight": "model-mtp.safetensors",
44342
- "model.layers.92.mlp.experts.140.up_proj.weight": "model-mtp.safetensors",
44343
- "model.layers.92.mlp.experts.141.down_proj.weight": "model-mtp.safetensors",
44344
- "model.layers.92.mlp.experts.141.gate_proj.weight": "model-mtp.safetensors",
44345
- "model.layers.92.mlp.experts.141.up_proj.weight": "model-mtp.safetensors",
44346
- "model.layers.92.mlp.experts.142.down_proj.weight": "model-mtp.safetensors",
44347
- "model.layers.92.mlp.experts.142.gate_proj.weight": "model-mtp.safetensors",
44348
- "model.layers.92.mlp.experts.142.up_proj.weight": "model-mtp.safetensors",
44349
- "model.layers.92.mlp.experts.143.down_proj.weight": "model-mtp.safetensors",
44350
- "model.layers.92.mlp.experts.143.gate_proj.weight": "model-mtp.safetensors",
44351
- "model.layers.92.mlp.experts.143.up_proj.weight": "model-mtp.safetensors",
44352
- "model.layers.92.mlp.experts.144.down_proj.weight": "model-mtp.safetensors",
44353
- "model.layers.92.mlp.experts.144.gate_proj.weight": "model-mtp.safetensors",
44354
- "model.layers.92.mlp.experts.144.up_proj.weight": "model-mtp.safetensors",
44355
- "model.layers.92.mlp.experts.145.down_proj.weight": "model-mtp.safetensors",
44356
- "model.layers.92.mlp.experts.145.gate_proj.weight": "model-mtp.safetensors",
44357
- "model.layers.92.mlp.experts.145.up_proj.weight": "model-mtp.safetensors",
44358
- "model.layers.92.mlp.experts.146.down_proj.weight": "model-mtp.safetensors",
44359
- "model.layers.92.mlp.experts.146.gate_proj.weight": "model-mtp.safetensors",
44360
- "model.layers.92.mlp.experts.146.up_proj.weight": "model-mtp.safetensors",
44361
- "model.layers.92.mlp.experts.147.down_proj.weight": "model-mtp.safetensors",
44362
- "model.layers.92.mlp.experts.147.gate_proj.weight": "model-mtp.safetensors",
44363
- "model.layers.92.mlp.experts.147.up_proj.weight": "model-mtp.safetensors",
44364
- "model.layers.92.mlp.experts.148.down_proj.weight": "model-mtp.safetensors",
44365
- "model.layers.92.mlp.experts.148.gate_proj.weight": "model-mtp.safetensors",
44366
- "model.layers.92.mlp.experts.148.up_proj.weight": "model-mtp.safetensors",
44367
- "model.layers.92.mlp.experts.149.down_proj.weight": "model-mtp.safetensors",
44368
- "model.layers.92.mlp.experts.149.gate_proj.weight": "model-mtp.safetensors",
44369
- "model.layers.92.mlp.experts.149.up_proj.weight": "model-mtp.safetensors",
44370
- "model.layers.92.mlp.experts.15.down_proj.weight": "model-mtp.safetensors",
44371
- "model.layers.92.mlp.experts.15.gate_proj.weight": "model-mtp.safetensors",
44372
- "model.layers.92.mlp.experts.15.up_proj.weight": "model-mtp.safetensors",
44373
- "model.layers.92.mlp.experts.150.down_proj.weight": "model-mtp.safetensors",
44374
- "model.layers.92.mlp.experts.150.gate_proj.weight": "model-mtp.safetensors",
44375
- "model.layers.92.mlp.experts.150.up_proj.weight": "model-mtp.safetensors",
44376
- "model.layers.92.mlp.experts.151.down_proj.weight": "model-mtp.safetensors",
44377
- "model.layers.92.mlp.experts.151.gate_proj.weight": "model-mtp.safetensors",
44378
- "model.layers.92.mlp.experts.151.up_proj.weight": "model-mtp.safetensors",
44379
- "model.layers.92.mlp.experts.152.down_proj.weight": "model-mtp.safetensors",
44380
- "model.layers.92.mlp.experts.152.gate_proj.weight": "model-mtp.safetensors",
44381
- "model.layers.92.mlp.experts.152.up_proj.weight": "model-mtp.safetensors",
44382
- "model.layers.92.mlp.experts.153.down_proj.weight": "model-mtp.safetensors",
44383
- "model.layers.92.mlp.experts.153.gate_proj.weight": "model-mtp.safetensors",
44384
- "model.layers.92.mlp.experts.153.up_proj.weight": "model-mtp.safetensors",
44385
- "model.layers.92.mlp.experts.154.down_proj.weight": "model-mtp.safetensors",
44386
- "model.layers.92.mlp.experts.154.gate_proj.weight": "model-mtp.safetensors",
44387
- "model.layers.92.mlp.experts.154.up_proj.weight": "model-mtp.safetensors",
44388
- "model.layers.92.mlp.experts.155.down_proj.weight": "model-mtp.safetensors",
44389
- "model.layers.92.mlp.experts.155.gate_proj.weight": "model-mtp.safetensors",
44390
- "model.layers.92.mlp.experts.155.up_proj.weight": "model-mtp.safetensors",
44391
- "model.layers.92.mlp.experts.156.down_proj.weight": "model-mtp.safetensors",
44392
- "model.layers.92.mlp.experts.156.gate_proj.weight": "model-mtp.safetensors",
44393
- "model.layers.92.mlp.experts.156.up_proj.weight": "model-mtp.safetensors",
44394
- "model.layers.92.mlp.experts.157.down_proj.weight": "model-mtp.safetensors",
44395
- "model.layers.92.mlp.experts.157.gate_proj.weight": "model-mtp.safetensors",
44396
- "model.layers.92.mlp.experts.157.up_proj.weight": "model-mtp.safetensors",
44397
- "model.layers.92.mlp.experts.158.down_proj.weight": "model-mtp.safetensors",
44398
- "model.layers.92.mlp.experts.158.gate_proj.weight": "model-mtp.safetensors",
44399
- "model.layers.92.mlp.experts.158.up_proj.weight": "model-mtp.safetensors",
44400
- "model.layers.92.mlp.experts.159.down_proj.weight": "model-mtp.safetensors",
44401
- "model.layers.92.mlp.experts.159.gate_proj.weight": "model-mtp.safetensors",
44402
- "model.layers.92.mlp.experts.159.up_proj.weight": "model-mtp.safetensors",
44403
- "model.layers.92.mlp.experts.16.down_proj.weight": "model-mtp.safetensors",
44404
- "model.layers.92.mlp.experts.16.gate_proj.weight": "model-mtp.safetensors",
44405
- "model.layers.92.mlp.experts.16.up_proj.weight": "model-mtp.safetensors",
44406
- "model.layers.92.mlp.experts.17.down_proj.weight": "model-mtp.safetensors",
44407
- "model.layers.92.mlp.experts.17.gate_proj.weight": "model-mtp.safetensors",
44408
- "model.layers.92.mlp.experts.17.up_proj.weight": "model-mtp.safetensors",
44409
- "model.layers.92.mlp.experts.18.down_proj.weight": "model-mtp.safetensors",
44410
- "model.layers.92.mlp.experts.18.gate_proj.weight": "model-mtp.safetensors",
44411
- "model.layers.92.mlp.experts.18.up_proj.weight": "model-mtp.safetensors",
44412
- "model.layers.92.mlp.experts.19.down_proj.weight": "model-mtp.safetensors",
44413
- "model.layers.92.mlp.experts.19.gate_proj.weight": "model-mtp.safetensors",
44414
- "model.layers.92.mlp.experts.19.up_proj.weight": "model-mtp.safetensors",
44415
- "model.layers.92.mlp.experts.2.down_proj.weight": "model-mtp.safetensors",
44416
- "model.layers.92.mlp.experts.2.gate_proj.weight": "model-mtp.safetensors",
44417
- "model.layers.92.mlp.experts.2.up_proj.weight": "model-mtp.safetensors",
44418
- "model.layers.92.mlp.experts.20.down_proj.weight": "model-mtp.safetensors",
44419
- "model.layers.92.mlp.experts.20.gate_proj.weight": "model-mtp.safetensors",
44420
- "model.layers.92.mlp.experts.20.up_proj.weight": "model-mtp.safetensors",
44421
- "model.layers.92.mlp.experts.21.down_proj.weight": "model-mtp.safetensors",
44422
- "model.layers.92.mlp.experts.21.gate_proj.weight": "model-mtp.safetensors",
44423
- "model.layers.92.mlp.experts.21.up_proj.weight": "model-mtp.safetensors",
44424
- "model.layers.92.mlp.experts.22.down_proj.weight": "model-mtp.safetensors",
44425
- "model.layers.92.mlp.experts.22.gate_proj.weight": "model-mtp.safetensors",
44426
- "model.layers.92.mlp.experts.22.up_proj.weight": "model-mtp.safetensors",
44427
- "model.layers.92.mlp.experts.23.down_proj.weight": "model-mtp.safetensors",
44428
- "model.layers.92.mlp.experts.23.gate_proj.weight": "model-mtp.safetensors",
44429
- "model.layers.92.mlp.experts.23.up_proj.weight": "model-mtp.safetensors",
44430
- "model.layers.92.mlp.experts.24.down_proj.weight": "model-mtp.safetensors",
44431
- "model.layers.92.mlp.experts.24.gate_proj.weight": "model-mtp.safetensors",
44432
- "model.layers.92.mlp.experts.24.up_proj.weight": "model-mtp.safetensors",
44433
- "model.layers.92.mlp.experts.25.down_proj.weight": "model-mtp.safetensors",
44434
- "model.layers.92.mlp.experts.25.gate_proj.weight": "model-mtp.safetensors",
44435
- "model.layers.92.mlp.experts.25.up_proj.weight": "model-mtp.safetensors",
44436
- "model.layers.92.mlp.experts.26.down_proj.weight": "model-mtp.safetensors",
44437
- "model.layers.92.mlp.experts.26.gate_proj.weight": "model-mtp.safetensors",
44438
- "model.layers.92.mlp.experts.26.up_proj.weight": "model-mtp.safetensors",
44439
- "model.layers.92.mlp.experts.27.down_proj.weight": "model-mtp.safetensors",
44440
- "model.layers.92.mlp.experts.27.gate_proj.weight": "model-mtp.safetensors",
44441
- "model.layers.92.mlp.experts.27.up_proj.weight": "model-mtp.safetensors",
44442
- "model.layers.92.mlp.experts.28.down_proj.weight": "model-mtp.safetensors",
44443
- "model.layers.92.mlp.experts.28.gate_proj.weight": "model-mtp.safetensors",
44444
- "model.layers.92.mlp.experts.28.up_proj.weight": "model-mtp.safetensors",
44445
- "model.layers.92.mlp.experts.29.down_proj.weight": "model-mtp.safetensors",
44446
- "model.layers.92.mlp.experts.29.gate_proj.weight": "model-mtp.safetensors",
44447
- "model.layers.92.mlp.experts.29.up_proj.weight": "model-mtp.safetensors",
44448
- "model.layers.92.mlp.experts.3.down_proj.weight": "model-mtp.safetensors",
44449
- "model.layers.92.mlp.experts.3.gate_proj.weight": "model-mtp.safetensors",
44450
- "model.layers.92.mlp.experts.3.up_proj.weight": "model-mtp.safetensors",
44451
- "model.layers.92.mlp.experts.30.down_proj.weight": "model-mtp.safetensors",
44452
- "model.layers.92.mlp.experts.30.gate_proj.weight": "model-mtp.safetensors",
44453
- "model.layers.92.mlp.experts.30.up_proj.weight": "model-mtp.safetensors",
44454
- "model.layers.92.mlp.experts.31.down_proj.weight": "model-mtp.safetensors",
44455
- "model.layers.92.mlp.experts.31.gate_proj.weight": "model-mtp.safetensors",
44456
- "model.layers.92.mlp.experts.31.up_proj.weight": "model-mtp.safetensors",
44457
- "model.layers.92.mlp.experts.32.down_proj.weight": "model-mtp.safetensors",
44458
- "model.layers.92.mlp.experts.32.gate_proj.weight": "model-mtp.safetensors",
44459
- "model.layers.92.mlp.experts.32.up_proj.weight": "model-mtp.safetensors",
44460
- "model.layers.92.mlp.experts.33.down_proj.weight": "model-mtp.safetensors",
44461
- "model.layers.92.mlp.experts.33.gate_proj.weight": "model-mtp.safetensors",
44462
- "model.layers.92.mlp.experts.33.up_proj.weight": "model-mtp.safetensors",
44463
- "model.layers.92.mlp.experts.34.down_proj.weight": "model-mtp.safetensors",
44464
- "model.layers.92.mlp.experts.34.gate_proj.weight": "model-mtp.safetensors",
44465
- "model.layers.92.mlp.experts.34.up_proj.weight": "model-mtp.safetensors",
44466
- "model.layers.92.mlp.experts.35.down_proj.weight": "model-mtp.safetensors",
44467
- "model.layers.92.mlp.experts.35.gate_proj.weight": "model-mtp.safetensors",
44468
- "model.layers.92.mlp.experts.35.up_proj.weight": "model-mtp.safetensors",
44469
- "model.layers.92.mlp.experts.36.down_proj.weight": "model-mtp.safetensors",
44470
- "model.layers.92.mlp.experts.36.gate_proj.weight": "model-mtp.safetensors",
44471
- "model.layers.92.mlp.experts.36.up_proj.weight": "model-mtp.safetensors",
44472
- "model.layers.92.mlp.experts.37.down_proj.weight": "model-mtp.safetensors",
44473
- "model.layers.92.mlp.experts.37.gate_proj.weight": "model-mtp.safetensors",
44474
- "model.layers.92.mlp.experts.37.up_proj.weight": "model-mtp.safetensors",
44475
- "model.layers.92.mlp.experts.38.down_proj.weight": "model-mtp.safetensors",
44476
- "model.layers.92.mlp.experts.38.gate_proj.weight": "model-mtp.safetensors",
44477
- "model.layers.92.mlp.experts.38.up_proj.weight": "model-mtp.safetensors",
44478
- "model.layers.92.mlp.experts.39.down_proj.weight": "model-mtp.safetensors",
44479
- "model.layers.92.mlp.experts.39.gate_proj.weight": "model-mtp.safetensors",
44480
- "model.layers.92.mlp.experts.39.up_proj.weight": "model-mtp.safetensors",
44481
- "model.layers.92.mlp.experts.4.down_proj.weight": "model-mtp.safetensors",
44482
- "model.layers.92.mlp.experts.4.gate_proj.weight": "model-mtp.safetensors",
44483
- "model.layers.92.mlp.experts.4.up_proj.weight": "model-mtp.safetensors",
44484
- "model.layers.92.mlp.experts.40.down_proj.weight": "model-mtp.safetensors",
44485
- "model.layers.92.mlp.experts.40.gate_proj.weight": "model-mtp.safetensors",
44486
- "model.layers.92.mlp.experts.40.up_proj.weight": "model-mtp.safetensors",
44487
- "model.layers.92.mlp.experts.41.down_proj.weight": "model-mtp.safetensors",
44488
- "model.layers.92.mlp.experts.41.gate_proj.weight": "model-mtp.safetensors",
44489
- "model.layers.92.mlp.experts.41.up_proj.weight": "model-mtp.safetensors",
44490
- "model.layers.92.mlp.experts.42.down_proj.weight": "model-mtp.safetensors",
44491
- "model.layers.92.mlp.experts.42.gate_proj.weight": "model-mtp.safetensors",
44492
- "model.layers.92.mlp.experts.42.up_proj.weight": "model-mtp.safetensors",
44493
- "model.layers.92.mlp.experts.43.down_proj.weight": "model-mtp.safetensors",
44494
- "model.layers.92.mlp.experts.43.gate_proj.weight": "model-mtp.safetensors",
44495
- "model.layers.92.mlp.experts.43.up_proj.weight": "model-mtp.safetensors",
44496
- "model.layers.92.mlp.experts.44.down_proj.weight": "model-mtp.safetensors",
44497
- "model.layers.92.mlp.experts.44.gate_proj.weight": "model-mtp.safetensors",
44498
- "model.layers.92.mlp.experts.44.up_proj.weight": "model-mtp.safetensors",
44499
- "model.layers.92.mlp.experts.45.down_proj.weight": "model-mtp.safetensors",
44500
- "model.layers.92.mlp.experts.45.gate_proj.weight": "model-mtp.safetensors",
44501
- "model.layers.92.mlp.experts.45.up_proj.weight": "model-mtp.safetensors",
44502
- "model.layers.92.mlp.experts.46.down_proj.weight": "model-mtp.safetensors",
44503
- "model.layers.92.mlp.experts.46.gate_proj.weight": "model-mtp.safetensors",
44504
- "model.layers.92.mlp.experts.46.up_proj.weight": "model-mtp.safetensors",
44505
- "model.layers.92.mlp.experts.47.down_proj.weight": "model-mtp.safetensors",
44506
- "model.layers.92.mlp.experts.47.gate_proj.weight": "model-mtp.safetensors",
44507
- "model.layers.92.mlp.experts.47.up_proj.weight": "model-mtp.safetensors",
44508
- "model.layers.92.mlp.experts.48.down_proj.weight": "model-mtp.safetensors",
44509
- "model.layers.92.mlp.experts.48.gate_proj.weight": "model-mtp.safetensors",
44510
- "model.layers.92.mlp.experts.48.up_proj.weight": "model-mtp.safetensors",
44511
- "model.layers.92.mlp.experts.49.down_proj.weight": "model-mtp.safetensors",
44512
- "model.layers.92.mlp.experts.49.gate_proj.weight": "model-mtp.safetensors",
44513
- "model.layers.92.mlp.experts.49.up_proj.weight": "model-mtp.safetensors",
44514
- "model.layers.92.mlp.experts.5.down_proj.weight": "model-mtp.safetensors",
44515
- "model.layers.92.mlp.experts.5.gate_proj.weight": "model-mtp.safetensors",
44516
- "model.layers.92.mlp.experts.5.up_proj.weight": "model-mtp.safetensors",
44517
- "model.layers.92.mlp.experts.50.down_proj.weight": "model-mtp.safetensors",
44518
- "model.layers.92.mlp.experts.50.gate_proj.weight": "model-mtp.safetensors",
44519
- "model.layers.92.mlp.experts.50.up_proj.weight": "model-mtp.safetensors",
44520
- "model.layers.92.mlp.experts.51.down_proj.weight": "model-mtp.safetensors",
44521
- "model.layers.92.mlp.experts.51.gate_proj.weight": "model-mtp.safetensors",
44522
- "model.layers.92.mlp.experts.51.up_proj.weight": "model-mtp.safetensors",
44523
- "model.layers.92.mlp.experts.52.down_proj.weight": "model-mtp.safetensors",
44524
- "model.layers.92.mlp.experts.52.gate_proj.weight": "model-mtp.safetensors",
44525
- "model.layers.92.mlp.experts.52.up_proj.weight": "model-mtp.safetensors",
44526
- "model.layers.92.mlp.experts.53.down_proj.weight": "model-mtp.safetensors",
44527
- "model.layers.92.mlp.experts.53.gate_proj.weight": "model-mtp.safetensors",
44528
- "model.layers.92.mlp.experts.53.up_proj.weight": "model-mtp.safetensors",
44529
- "model.layers.92.mlp.experts.54.down_proj.weight": "model-mtp.safetensors",
44530
- "model.layers.92.mlp.experts.54.gate_proj.weight": "model-mtp.safetensors",
44531
- "model.layers.92.mlp.experts.54.up_proj.weight": "model-mtp.safetensors",
44532
- "model.layers.92.mlp.experts.55.down_proj.weight": "model-mtp.safetensors",
44533
- "model.layers.92.mlp.experts.55.gate_proj.weight": "model-mtp.safetensors",
44534
- "model.layers.92.mlp.experts.55.up_proj.weight": "model-mtp.safetensors",
44535
- "model.layers.92.mlp.experts.56.down_proj.weight": "model-mtp.safetensors",
44536
- "model.layers.92.mlp.experts.56.gate_proj.weight": "model-mtp.safetensors",
44537
- "model.layers.92.mlp.experts.56.up_proj.weight": "model-mtp.safetensors",
44538
- "model.layers.92.mlp.experts.57.down_proj.weight": "model-mtp.safetensors",
44539
- "model.layers.92.mlp.experts.57.gate_proj.weight": "model-mtp.safetensors",
44540
- "model.layers.92.mlp.experts.57.up_proj.weight": "model-mtp.safetensors",
44541
- "model.layers.92.mlp.experts.58.down_proj.weight": "model-mtp.safetensors",
44542
- "model.layers.92.mlp.experts.58.gate_proj.weight": "model-mtp.safetensors",
44543
- "model.layers.92.mlp.experts.58.up_proj.weight": "model-mtp.safetensors",
44544
- "model.layers.92.mlp.experts.59.down_proj.weight": "model-mtp.safetensors",
44545
- "model.layers.92.mlp.experts.59.gate_proj.weight": "model-mtp.safetensors",
44546
- "model.layers.92.mlp.experts.59.up_proj.weight": "model-mtp.safetensors",
44547
- "model.layers.92.mlp.experts.6.down_proj.weight": "model-mtp.safetensors",
44548
- "model.layers.92.mlp.experts.6.gate_proj.weight": "model-mtp.safetensors",
44549
- "model.layers.92.mlp.experts.6.up_proj.weight": "model-mtp.safetensors",
44550
- "model.layers.92.mlp.experts.60.down_proj.weight": "model-mtp.safetensors",
44551
- "model.layers.92.mlp.experts.60.gate_proj.weight": "model-mtp.safetensors",
44552
- "model.layers.92.mlp.experts.60.up_proj.weight": "model-mtp.safetensors",
44553
- "model.layers.92.mlp.experts.61.down_proj.weight": "model-mtp.safetensors",
44554
- "model.layers.92.mlp.experts.61.gate_proj.weight": "model-mtp.safetensors",
44555
- "model.layers.92.mlp.experts.61.up_proj.weight": "model-mtp.safetensors",
44556
- "model.layers.92.mlp.experts.62.down_proj.weight": "model-mtp.safetensors",
44557
- "model.layers.92.mlp.experts.62.gate_proj.weight": "model-mtp.safetensors",
44558
- "model.layers.92.mlp.experts.62.up_proj.weight": "model-mtp.safetensors",
44559
- "model.layers.92.mlp.experts.63.down_proj.weight": "model-mtp.safetensors",
44560
- "model.layers.92.mlp.experts.63.gate_proj.weight": "model-mtp.safetensors",
44561
- "model.layers.92.mlp.experts.63.up_proj.weight": "model-mtp.safetensors",
44562
- "model.layers.92.mlp.experts.64.down_proj.weight": "model-mtp.safetensors",
44563
- "model.layers.92.mlp.experts.64.gate_proj.weight": "model-mtp.safetensors",
44564
- "model.layers.92.mlp.experts.64.up_proj.weight": "model-mtp.safetensors",
44565
- "model.layers.92.mlp.experts.65.down_proj.weight": "model-mtp.safetensors",
44566
- "model.layers.92.mlp.experts.65.gate_proj.weight": "model-mtp.safetensors",
44567
- "model.layers.92.mlp.experts.65.up_proj.weight": "model-mtp.safetensors",
44568
- "model.layers.92.mlp.experts.66.down_proj.weight": "model-mtp.safetensors",
44569
- "model.layers.92.mlp.experts.66.gate_proj.weight": "model-mtp.safetensors",
44570
- "model.layers.92.mlp.experts.66.up_proj.weight": "model-mtp.safetensors",
44571
- "model.layers.92.mlp.experts.67.down_proj.weight": "model-mtp.safetensors",
44572
- "model.layers.92.mlp.experts.67.gate_proj.weight": "model-mtp.safetensors",
44573
- "model.layers.92.mlp.experts.67.up_proj.weight": "model-mtp.safetensors",
44574
- "model.layers.92.mlp.experts.68.down_proj.weight": "model-mtp.safetensors",
44575
- "model.layers.92.mlp.experts.68.gate_proj.weight": "model-mtp.safetensors",
44576
- "model.layers.92.mlp.experts.68.up_proj.weight": "model-mtp.safetensors",
44577
- "model.layers.92.mlp.experts.69.down_proj.weight": "model-mtp.safetensors",
44578
- "model.layers.92.mlp.experts.69.gate_proj.weight": "model-mtp.safetensors",
44579
- "model.layers.92.mlp.experts.69.up_proj.weight": "model-mtp.safetensors",
44580
- "model.layers.92.mlp.experts.7.down_proj.weight": "model-mtp.safetensors",
44581
- "model.layers.92.mlp.experts.7.gate_proj.weight": "model-mtp.safetensors",
44582
- "model.layers.92.mlp.experts.7.up_proj.weight": "model-mtp.safetensors",
44583
- "model.layers.92.mlp.experts.70.down_proj.weight": "model-mtp.safetensors",
44584
- "model.layers.92.mlp.experts.70.gate_proj.weight": "model-mtp.safetensors",
44585
- "model.layers.92.mlp.experts.70.up_proj.weight": "model-mtp.safetensors",
44586
- "model.layers.92.mlp.experts.71.down_proj.weight": "model-mtp.safetensors",
44587
- "model.layers.92.mlp.experts.71.gate_proj.weight": "model-mtp.safetensors",
44588
- "model.layers.92.mlp.experts.71.up_proj.weight": "model-mtp.safetensors",
44589
- "model.layers.92.mlp.experts.72.down_proj.weight": "model-mtp.safetensors",
44590
- "model.layers.92.mlp.experts.72.gate_proj.weight": "model-mtp.safetensors",
44591
- "model.layers.92.mlp.experts.72.up_proj.weight": "model-mtp.safetensors",
44592
- "model.layers.92.mlp.experts.73.down_proj.weight": "model-mtp.safetensors",
44593
- "model.layers.92.mlp.experts.73.gate_proj.weight": "model-mtp.safetensors",
44594
- "model.layers.92.mlp.experts.73.up_proj.weight": "model-mtp.safetensors",
44595
- "model.layers.92.mlp.experts.74.down_proj.weight": "model-mtp.safetensors",
44596
- "model.layers.92.mlp.experts.74.gate_proj.weight": "model-mtp.safetensors",
44597
- "model.layers.92.mlp.experts.74.up_proj.weight": "model-mtp.safetensors",
44598
- "model.layers.92.mlp.experts.75.down_proj.weight": "model-mtp.safetensors",
44599
- "model.layers.92.mlp.experts.75.gate_proj.weight": "model-mtp.safetensors",
44600
- "model.layers.92.mlp.experts.75.up_proj.weight": "model-mtp.safetensors",
44601
- "model.layers.92.mlp.experts.76.down_proj.weight": "model-mtp.safetensors",
44602
- "model.layers.92.mlp.experts.76.gate_proj.weight": "model-mtp.safetensors",
44603
- "model.layers.92.mlp.experts.76.up_proj.weight": "model-mtp.safetensors",
44604
- "model.layers.92.mlp.experts.77.down_proj.weight": "model-mtp.safetensors",
44605
- "model.layers.92.mlp.experts.77.gate_proj.weight": "model-mtp.safetensors",
44606
- "model.layers.92.mlp.experts.77.up_proj.weight": "model-mtp.safetensors",
44607
- "model.layers.92.mlp.experts.78.down_proj.weight": "model-mtp.safetensors",
44608
- "model.layers.92.mlp.experts.78.gate_proj.weight": "model-mtp.safetensors",
44609
- "model.layers.92.mlp.experts.78.up_proj.weight": "model-mtp.safetensors",
44610
- "model.layers.92.mlp.experts.79.down_proj.weight": "model-mtp.safetensors",
44611
- "model.layers.92.mlp.experts.79.gate_proj.weight": "model-mtp.safetensors",
44612
- "model.layers.92.mlp.experts.79.up_proj.weight": "model-mtp.safetensors",
44613
- "model.layers.92.mlp.experts.8.down_proj.weight": "model-mtp.safetensors",
44614
- "model.layers.92.mlp.experts.8.gate_proj.weight": "model-mtp.safetensors",
44615
- "model.layers.92.mlp.experts.8.up_proj.weight": "model-mtp.safetensors",
44616
- "model.layers.92.mlp.experts.80.down_proj.weight": "model-mtp.safetensors",
44617
- "model.layers.92.mlp.experts.80.gate_proj.weight": "model-mtp.safetensors",
44618
- "model.layers.92.mlp.experts.80.up_proj.weight": "model-mtp.safetensors",
44619
- "model.layers.92.mlp.experts.81.down_proj.weight": "model-mtp.safetensors",
44620
- "model.layers.92.mlp.experts.81.gate_proj.weight": "model-mtp.safetensors",
44621
- "model.layers.92.mlp.experts.81.up_proj.weight": "model-mtp.safetensors",
44622
- "model.layers.92.mlp.experts.82.down_proj.weight": "model-mtp.safetensors",
44623
- "model.layers.92.mlp.experts.82.gate_proj.weight": "model-mtp.safetensors",
44624
- "model.layers.92.mlp.experts.82.up_proj.weight": "model-mtp.safetensors",
44625
- "model.layers.92.mlp.experts.83.down_proj.weight": "model-mtp.safetensors",
44626
- "model.layers.92.mlp.experts.83.gate_proj.weight": "model-mtp.safetensors",
44627
- "model.layers.92.mlp.experts.83.up_proj.weight": "model-mtp.safetensors",
44628
- "model.layers.92.mlp.experts.84.down_proj.weight": "model-mtp.safetensors",
44629
- "model.layers.92.mlp.experts.84.gate_proj.weight": "model-mtp.safetensors",
44630
- "model.layers.92.mlp.experts.84.up_proj.weight": "model-mtp.safetensors",
44631
- "model.layers.92.mlp.experts.85.down_proj.weight": "model-mtp.safetensors",
44632
- "model.layers.92.mlp.experts.85.gate_proj.weight": "model-mtp.safetensors",
44633
- "model.layers.92.mlp.experts.85.up_proj.weight": "model-mtp.safetensors",
44634
- "model.layers.92.mlp.experts.86.down_proj.weight": "model-mtp.safetensors",
44635
- "model.layers.92.mlp.experts.86.gate_proj.weight": "model-mtp.safetensors",
44636
- "model.layers.92.mlp.experts.86.up_proj.weight": "model-mtp.safetensors",
44637
- "model.layers.92.mlp.experts.87.down_proj.weight": "model-mtp.safetensors",
44638
- "model.layers.92.mlp.experts.87.gate_proj.weight": "model-mtp.safetensors",
44639
- "model.layers.92.mlp.experts.87.up_proj.weight": "model-mtp.safetensors",
44640
- "model.layers.92.mlp.experts.88.down_proj.weight": "model-mtp.safetensors",
44641
- "model.layers.92.mlp.experts.88.gate_proj.weight": "model-mtp.safetensors",
44642
- "model.layers.92.mlp.experts.88.up_proj.weight": "model-mtp.safetensors",
44643
- "model.layers.92.mlp.experts.89.down_proj.weight": "model-mtp.safetensors",
44644
- "model.layers.92.mlp.experts.89.gate_proj.weight": "model-mtp.safetensors",
44645
- "model.layers.92.mlp.experts.89.up_proj.weight": "model-mtp.safetensors",
44646
- "model.layers.92.mlp.experts.9.down_proj.weight": "model-mtp.safetensors",
44647
- "model.layers.92.mlp.experts.9.gate_proj.weight": "model-mtp.safetensors",
44648
- "model.layers.92.mlp.experts.9.up_proj.weight": "model-mtp.safetensors",
44649
- "model.layers.92.mlp.experts.90.down_proj.weight": "model-mtp.safetensors",
44650
- "model.layers.92.mlp.experts.90.gate_proj.weight": "model-mtp.safetensors",
44651
- "model.layers.92.mlp.experts.90.up_proj.weight": "model-mtp.safetensors",
44652
- "model.layers.92.mlp.experts.91.down_proj.weight": "model-mtp.safetensors",
44653
- "model.layers.92.mlp.experts.91.gate_proj.weight": "model-mtp.safetensors",
44654
- "model.layers.92.mlp.experts.91.up_proj.weight": "model-mtp.safetensors",
44655
- "model.layers.92.mlp.experts.92.down_proj.weight": "model-mtp.safetensors",
44656
- "model.layers.92.mlp.experts.92.gate_proj.weight": "model-mtp.safetensors",
44657
- "model.layers.92.mlp.experts.92.up_proj.weight": "model-mtp.safetensors",
44658
- "model.layers.92.mlp.experts.93.down_proj.weight": "model-mtp.safetensors",
44659
- "model.layers.92.mlp.experts.93.gate_proj.weight": "model-mtp.safetensors",
44660
- "model.layers.92.mlp.experts.93.up_proj.weight": "model-mtp.safetensors",
44661
- "model.layers.92.mlp.experts.94.down_proj.weight": "model-mtp.safetensors",
44662
- "model.layers.92.mlp.experts.94.gate_proj.weight": "model-mtp.safetensors",
44663
- "model.layers.92.mlp.experts.94.up_proj.weight": "model-mtp.safetensors",
44664
- "model.layers.92.mlp.experts.95.down_proj.weight": "model-mtp.safetensors",
44665
- "model.layers.92.mlp.experts.95.gate_proj.weight": "model-mtp.safetensors",
44666
- "model.layers.92.mlp.experts.95.up_proj.weight": "model-mtp.safetensors",
44667
- "model.layers.92.mlp.experts.96.down_proj.weight": "model-mtp.safetensors",
44668
- "model.layers.92.mlp.experts.96.gate_proj.weight": "model-mtp.safetensors",
44669
- "model.layers.92.mlp.experts.96.up_proj.weight": "model-mtp.safetensors",
44670
- "model.layers.92.mlp.experts.97.down_proj.weight": "model-mtp.safetensors",
44671
- "model.layers.92.mlp.experts.97.gate_proj.weight": "model-mtp.safetensors",
44672
- "model.layers.92.mlp.experts.97.up_proj.weight": "model-mtp.safetensors",
44673
- "model.layers.92.mlp.experts.98.down_proj.weight": "model-mtp.safetensors",
44674
- "model.layers.92.mlp.experts.98.gate_proj.weight": "model-mtp.safetensors",
44675
- "model.layers.92.mlp.experts.98.up_proj.weight": "model-mtp.safetensors",
44676
- "model.layers.92.mlp.experts.99.down_proj.weight": "model-mtp.safetensors",
44677
- "model.layers.92.mlp.experts.99.gate_proj.weight": "model-mtp.safetensors",
44678
- "model.layers.92.mlp.experts.99.up_proj.weight": "model-mtp.safetensors",
44679
- "model.layers.92.mlp.gate.e_score_correction_bias": "model-mtp.safetensors",
44680
- "model.layers.92.mlp.gate.weight": "model-mtp.safetensors",
44681
- "model.layers.92.mlp.shared_experts.down_proj.weight": "model-mtp.safetensors",
44682
- "model.layers.92.mlp.shared_experts.gate_proj.weight": "model-mtp.safetensors",
44683
- "model.layers.92.mlp.shared_experts.up_proj.weight": "model-mtp.safetensors",
44684
- "model.layers.92.post_attention_layernorm.weight": "model-mtp.safetensors",
44685
- "model.layers.92.self_attn.k_norm.weight": "model-mtp.safetensors",
44686
- "model.layers.92.self_attn.k_proj.bias": "model-mtp.safetensors",
44687
- "model.layers.92.self_attn.k_proj.weight": "model-mtp.safetensors",
44688
- "model.layers.92.self_attn.o_proj.weight": "model-mtp.safetensors",
44689
- "model.layers.92.self_attn.q_norm.weight": "model-mtp.safetensors",
44690
- "model.layers.92.self_attn.q_proj.bias": "model-mtp.safetensors",
44691
- "model.layers.92.self_attn.q_proj.weight": "model-mtp.safetensors",
44692
- "model.layers.92.self_attn.v_proj.bias": "model-mtp.safetensors",
44693
- "model.layers.92.self_attn.v_proj.weight": "model-mtp.safetensors",
44694
- "model.layers.92.shared_head.norm.weight": "model-mtp.safetensors"
44695
  }
44696
  }
 
44192
  "model.layers.91.self_attn.v_proj.bias": "model-00092-of-00092.safetensors",
44193
  "model.layers.91.self_attn.v_proj.weight": "model-00092-of-00092.safetensors",
44194
  "model.norm.weight": "model-00092-of-00092.safetensors",
44195
+ "model.layers.92.eh_proj.weight": "mtp.safetensors",
44196
+ "model.layers.92.enorm.weight": "mtp.safetensors",
44197
+ "model.layers.92.hnorm.weight": "mtp.safetensors",
44198
+ "model.layers.92.input_layernorm.weight": "mtp.safetensors",
44199
+ "model.layers.92.mlp.experts.0.down_proj.weight": "mtp.safetensors",
44200
+ "model.layers.92.mlp.experts.0.gate_proj.weight": "mtp.safetensors",
44201
+ "model.layers.92.mlp.experts.0.up_proj.weight": "mtp.safetensors",
44202
+ "model.layers.92.mlp.experts.1.down_proj.weight": "mtp.safetensors",
44203
+ "model.layers.92.mlp.experts.1.gate_proj.weight": "mtp.safetensors",
44204
+ "model.layers.92.mlp.experts.1.up_proj.weight": "mtp.safetensors",
44205
+ "model.layers.92.mlp.experts.10.down_proj.weight": "mtp.safetensors",
44206
+ "model.layers.92.mlp.experts.10.gate_proj.weight": "mtp.safetensors",
44207
+ "model.layers.92.mlp.experts.10.up_proj.weight": "mtp.safetensors",
44208
+ "model.layers.92.mlp.experts.100.down_proj.weight": "mtp.safetensors",
44209
+ "model.layers.92.mlp.experts.100.gate_proj.weight": "mtp.safetensors",
44210
+ "model.layers.92.mlp.experts.100.up_proj.weight": "mtp.safetensors",
44211
+ "model.layers.92.mlp.experts.101.down_proj.weight": "mtp.safetensors",
44212
+ "model.layers.92.mlp.experts.101.gate_proj.weight": "mtp.safetensors",
44213
+ "model.layers.92.mlp.experts.101.up_proj.weight": "mtp.safetensors",
44214
+ "model.layers.92.mlp.experts.102.down_proj.weight": "mtp.safetensors",
44215
+ "model.layers.92.mlp.experts.102.gate_proj.weight": "mtp.safetensors",
44216
+ "model.layers.92.mlp.experts.102.up_proj.weight": "mtp.safetensors",
44217
+ "model.layers.92.mlp.experts.103.down_proj.weight": "mtp.safetensors",
44218
+ "model.layers.92.mlp.experts.103.gate_proj.weight": "mtp.safetensors",
44219
+ "model.layers.92.mlp.experts.103.up_proj.weight": "mtp.safetensors",
44220
+ "model.layers.92.mlp.experts.104.down_proj.weight": "mtp.safetensors",
44221
+ "model.layers.92.mlp.experts.104.gate_proj.weight": "mtp.safetensors",
44222
+ "model.layers.92.mlp.experts.104.up_proj.weight": "mtp.safetensors",
44223
+ "model.layers.92.mlp.experts.105.down_proj.weight": "mtp.safetensors",
44224
+ "model.layers.92.mlp.experts.105.gate_proj.weight": "mtp.safetensors",
44225
+ "model.layers.92.mlp.experts.105.up_proj.weight": "mtp.safetensors",
44226
+ "model.layers.92.mlp.experts.106.down_proj.weight": "mtp.safetensors",
44227
+ "model.layers.92.mlp.experts.106.gate_proj.weight": "mtp.safetensors",
44228
+ "model.layers.92.mlp.experts.106.up_proj.weight": "mtp.safetensors",
44229
+ "model.layers.92.mlp.experts.107.down_proj.weight": "mtp.safetensors",
44230
+ "model.layers.92.mlp.experts.107.gate_proj.weight": "mtp.safetensors",
44231
+ "model.layers.92.mlp.experts.107.up_proj.weight": "mtp.safetensors",
44232
+ "model.layers.92.mlp.experts.108.down_proj.weight": "mtp.safetensors",
44233
+ "model.layers.92.mlp.experts.108.gate_proj.weight": "mtp.safetensors",
44234
+ "model.layers.92.mlp.experts.108.up_proj.weight": "mtp.safetensors",
44235
+ "model.layers.92.mlp.experts.109.down_proj.weight": "mtp.safetensors",
44236
+ "model.layers.92.mlp.experts.109.gate_proj.weight": "mtp.safetensors",
44237
+ "model.layers.92.mlp.experts.109.up_proj.weight": "mtp.safetensors",
44238
+ "model.layers.92.mlp.experts.11.down_proj.weight": "mtp.safetensors",
44239
+ "model.layers.92.mlp.experts.11.gate_proj.weight": "mtp.safetensors",
44240
+ "model.layers.92.mlp.experts.11.up_proj.weight": "mtp.safetensors",
44241
+ "model.layers.92.mlp.experts.110.down_proj.weight": "mtp.safetensors",
44242
+ "model.layers.92.mlp.experts.110.gate_proj.weight": "mtp.safetensors",
44243
+ "model.layers.92.mlp.experts.110.up_proj.weight": "mtp.safetensors",
44244
+ "model.layers.92.mlp.experts.111.down_proj.weight": "mtp.safetensors",
44245
+ "model.layers.92.mlp.experts.111.gate_proj.weight": "mtp.safetensors",
44246
+ "model.layers.92.mlp.experts.111.up_proj.weight": "mtp.safetensors",
44247
+ "model.layers.92.mlp.experts.112.down_proj.weight": "mtp.safetensors",
44248
+ "model.layers.92.mlp.experts.112.gate_proj.weight": "mtp.safetensors",
44249
+ "model.layers.92.mlp.experts.112.up_proj.weight": "mtp.safetensors",
44250
+ "model.layers.92.mlp.experts.113.down_proj.weight": "mtp.safetensors",
44251
+ "model.layers.92.mlp.experts.113.gate_proj.weight": "mtp.safetensors",
44252
+ "model.layers.92.mlp.experts.113.up_proj.weight": "mtp.safetensors",
44253
+ "model.layers.92.mlp.experts.114.down_proj.weight": "mtp.safetensors",
44254
+ "model.layers.92.mlp.experts.114.gate_proj.weight": "mtp.safetensors",
44255
+ "model.layers.92.mlp.experts.114.up_proj.weight": "mtp.safetensors",
44256
+ "model.layers.92.mlp.experts.115.down_proj.weight": "mtp.safetensors",
44257
+ "model.layers.92.mlp.experts.115.gate_proj.weight": "mtp.safetensors",
44258
+ "model.layers.92.mlp.experts.115.up_proj.weight": "mtp.safetensors",
44259
+ "model.layers.92.mlp.experts.116.down_proj.weight": "mtp.safetensors",
44260
+ "model.layers.92.mlp.experts.116.gate_proj.weight": "mtp.safetensors",
44261
+ "model.layers.92.mlp.experts.116.up_proj.weight": "mtp.safetensors",
44262
+ "model.layers.92.mlp.experts.117.down_proj.weight": "mtp.safetensors",
44263
+ "model.layers.92.mlp.experts.117.gate_proj.weight": "mtp.safetensors",
44264
+ "model.layers.92.mlp.experts.117.up_proj.weight": "mtp.safetensors",
44265
+ "model.layers.92.mlp.experts.118.down_proj.weight": "mtp.safetensors",
44266
+ "model.layers.92.mlp.experts.118.gate_proj.weight": "mtp.safetensors",
44267
+ "model.layers.92.mlp.experts.118.up_proj.weight": "mtp.safetensors",
44268
+ "model.layers.92.mlp.experts.119.down_proj.weight": "mtp.safetensors",
44269
+ "model.layers.92.mlp.experts.119.gate_proj.weight": "mtp.safetensors",
44270
+ "model.layers.92.mlp.experts.119.up_proj.weight": "mtp.safetensors",
44271
+ "model.layers.92.mlp.experts.12.down_proj.weight": "mtp.safetensors",
44272
+ "model.layers.92.mlp.experts.12.gate_proj.weight": "mtp.safetensors",
44273
+ "model.layers.92.mlp.experts.12.up_proj.weight": "mtp.safetensors",
44274
+ "model.layers.92.mlp.experts.120.down_proj.weight": "mtp.safetensors",
44275
+ "model.layers.92.mlp.experts.120.gate_proj.weight": "mtp.safetensors",
44276
+ "model.layers.92.mlp.experts.120.up_proj.weight": "mtp.safetensors",
44277
+ "model.layers.92.mlp.experts.121.down_proj.weight": "mtp.safetensors",
44278
+ "model.layers.92.mlp.experts.121.gate_proj.weight": "mtp.safetensors",
44279
+ "model.layers.92.mlp.experts.121.up_proj.weight": "mtp.safetensors",
44280
+ "model.layers.92.mlp.experts.122.down_proj.weight": "mtp.safetensors",
44281
+ "model.layers.92.mlp.experts.122.gate_proj.weight": "mtp.safetensors",
44282
+ "model.layers.92.mlp.experts.122.up_proj.weight": "mtp.safetensors",
44283
+ "model.layers.92.mlp.experts.123.down_proj.weight": "mtp.safetensors",
44284
+ "model.layers.92.mlp.experts.123.gate_proj.weight": "mtp.safetensors",
44285
+ "model.layers.92.mlp.experts.123.up_proj.weight": "mtp.safetensors",
44286
+ "model.layers.92.mlp.experts.124.down_proj.weight": "mtp.safetensors",
44287
+ "model.layers.92.mlp.experts.124.gate_proj.weight": "mtp.safetensors",
44288
+ "model.layers.92.mlp.experts.124.up_proj.weight": "mtp.safetensors",
44289
+ "model.layers.92.mlp.experts.125.down_proj.weight": "mtp.safetensors",
44290
+ "model.layers.92.mlp.experts.125.gate_proj.weight": "mtp.safetensors",
44291
+ "model.layers.92.mlp.experts.125.up_proj.weight": "mtp.safetensors",
44292
+ "model.layers.92.mlp.experts.126.down_proj.weight": "mtp.safetensors",
44293
+ "model.layers.92.mlp.experts.126.gate_proj.weight": "mtp.safetensors",
44294
+ "model.layers.92.mlp.experts.126.up_proj.weight": "mtp.safetensors",
44295
+ "model.layers.92.mlp.experts.127.down_proj.weight": "mtp.safetensors",
44296
+ "model.layers.92.mlp.experts.127.gate_proj.weight": "mtp.safetensors",
44297
+ "model.layers.92.mlp.experts.127.up_proj.weight": "mtp.safetensors",
44298
+ "model.layers.92.mlp.experts.128.down_proj.weight": "mtp.safetensors",
44299
+ "model.layers.92.mlp.experts.128.gate_proj.weight": "mtp.safetensors",
44300
+ "model.layers.92.mlp.experts.128.up_proj.weight": "mtp.safetensors",
44301
+ "model.layers.92.mlp.experts.129.down_proj.weight": "mtp.safetensors",
44302
+ "model.layers.92.mlp.experts.129.gate_proj.weight": "mtp.safetensors",
44303
+ "model.layers.92.mlp.experts.129.up_proj.weight": "mtp.safetensors",
44304
+ "model.layers.92.mlp.experts.13.down_proj.weight": "mtp.safetensors",
44305
+ "model.layers.92.mlp.experts.13.gate_proj.weight": "mtp.safetensors",
44306
+ "model.layers.92.mlp.experts.13.up_proj.weight": "mtp.safetensors",
44307
+ "model.layers.92.mlp.experts.130.down_proj.weight": "mtp.safetensors",
44308
+ "model.layers.92.mlp.experts.130.gate_proj.weight": "mtp.safetensors",
44309
+ "model.layers.92.mlp.experts.130.up_proj.weight": "mtp.safetensors",
44310
+ "model.layers.92.mlp.experts.131.down_proj.weight": "mtp.safetensors",
44311
+ "model.layers.92.mlp.experts.131.gate_proj.weight": "mtp.safetensors",
44312
+ "model.layers.92.mlp.experts.131.up_proj.weight": "mtp.safetensors",
44313
+ "model.layers.92.mlp.experts.132.down_proj.weight": "mtp.safetensors",
44314
+ "model.layers.92.mlp.experts.132.gate_proj.weight": "mtp.safetensors",
44315
+ "model.layers.92.mlp.experts.132.up_proj.weight": "mtp.safetensors",
44316
+ "model.layers.92.mlp.experts.133.down_proj.weight": "mtp.safetensors",
44317
+ "model.layers.92.mlp.experts.133.gate_proj.weight": "mtp.safetensors",
44318
+ "model.layers.92.mlp.experts.133.up_proj.weight": "mtp.safetensors",
44319
+ "model.layers.92.mlp.experts.134.down_proj.weight": "mtp.safetensors",
44320
+ "model.layers.92.mlp.experts.134.gate_proj.weight": "mtp.safetensors",
44321
+ "model.layers.92.mlp.experts.134.up_proj.weight": "mtp.safetensors",
44322
+ "model.layers.92.mlp.experts.135.down_proj.weight": "mtp.safetensors",
44323
+ "model.layers.92.mlp.experts.135.gate_proj.weight": "mtp.safetensors",
44324
+ "model.layers.92.mlp.experts.135.up_proj.weight": "mtp.safetensors",
44325
+ "model.layers.92.mlp.experts.136.down_proj.weight": "mtp.safetensors",
44326
+ "model.layers.92.mlp.experts.136.gate_proj.weight": "mtp.safetensors",
44327
+ "model.layers.92.mlp.experts.136.up_proj.weight": "mtp.safetensors",
44328
+ "model.layers.92.mlp.experts.137.down_proj.weight": "mtp.safetensors",
44329
+ "model.layers.92.mlp.experts.137.gate_proj.weight": "mtp.safetensors",
44330
+ "model.layers.92.mlp.experts.137.up_proj.weight": "mtp.safetensors",
44331
+ "model.layers.92.mlp.experts.138.down_proj.weight": "mtp.safetensors",
44332
+ "model.layers.92.mlp.experts.138.gate_proj.weight": "mtp.safetensors",
44333
+ "model.layers.92.mlp.experts.138.up_proj.weight": "mtp.safetensors",
44334
+ "model.layers.92.mlp.experts.139.down_proj.weight": "mtp.safetensors",
44335
+ "model.layers.92.mlp.experts.139.gate_proj.weight": "mtp.safetensors",
44336
+ "model.layers.92.mlp.experts.139.up_proj.weight": "mtp.safetensors",
44337
+ "model.layers.92.mlp.experts.14.down_proj.weight": "mtp.safetensors",
44338
+ "model.layers.92.mlp.experts.14.gate_proj.weight": "mtp.safetensors",
44339
+ "model.layers.92.mlp.experts.14.up_proj.weight": "mtp.safetensors",
44340
+ "model.layers.92.mlp.experts.140.down_proj.weight": "mtp.safetensors",
44341
+ "model.layers.92.mlp.experts.140.gate_proj.weight": "mtp.safetensors",
44342
+ "model.layers.92.mlp.experts.140.up_proj.weight": "mtp.safetensors",
44343
+ "model.layers.92.mlp.experts.141.down_proj.weight": "mtp.safetensors",
44344
+ "model.layers.92.mlp.experts.141.gate_proj.weight": "mtp.safetensors",
44345
+ "model.layers.92.mlp.experts.141.up_proj.weight": "mtp.safetensors",
44346
+ "model.layers.92.mlp.experts.142.down_proj.weight": "mtp.safetensors",
44347
+ "model.layers.92.mlp.experts.142.gate_proj.weight": "mtp.safetensors",
44348
+ "model.layers.92.mlp.experts.142.up_proj.weight": "mtp.safetensors",
44349
+ "model.layers.92.mlp.experts.143.down_proj.weight": "mtp.safetensors",
44350
+ "model.layers.92.mlp.experts.143.gate_proj.weight": "mtp.safetensors",
44351
+ "model.layers.92.mlp.experts.143.up_proj.weight": "mtp.safetensors",
44352
+ "model.layers.92.mlp.experts.144.down_proj.weight": "mtp.safetensors",
44353
+ "model.layers.92.mlp.experts.144.gate_proj.weight": "mtp.safetensors",
44354
+ "model.layers.92.mlp.experts.144.up_proj.weight": "mtp.safetensors",
44355
+ "model.layers.92.mlp.experts.145.down_proj.weight": "mtp.safetensors",
44356
+ "model.layers.92.mlp.experts.145.gate_proj.weight": "mtp.safetensors",
44357
+ "model.layers.92.mlp.experts.145.up_proj.weight": "mtp.safetensors",
44358
+ "model.layers.92.mlp.experts.146.down_proj.weight": "mtp.safetensors",
44359
+ "model.layers.92.mlp.experts.146.gate_proj.weight": "mtp.safetensors",
44360
+ "model.layers.92.mlp.experts.146.up_proj.weight": "mtp.safetensors",
44361
+ "model.layers.92.mlp.experts.147.down_proj.weight": "mtp.safetensors",
44362
+ "model.layers.92.mlp.experts.147.gate_proj.weight": "mtp.safetensors",
44363
+ "model.layers.92.mlp.experts.147.up_proj.weight": "mtp.safetensors",
44364
+ "model.layers.92.mlp.experts.148.down_proj.weight": "mtp.safetensors",
44365
+ "model.layers.92.mlp.experts.148.gate_proj.weight": "mtp.safetensors",
44366
+ "model.layers.92.mlp.experts.148.up_proj.weight": "mtp.safetensors",
44367
+ "model.layers.92.mlp.experts.149.down_proj.weight": "mtp.safetensors",
44368
+ "model.layers.92.mlp.experts.149.gate_proj.weight": "mtp.safetensors",
44369
+ "model.layers.92.mlp.experts.149.up_proj.weight": "mtp.safetensors",
44370
+ "model.layers.92.mlp.experts.15.down_proj.weight": "mtp.safetensors",
44371
+ "model.layers.92.mlp.experts.15.gate_proj.weight": "mtp.safetensors",
44372
+ "model.layers.92.mlp.experts.15.up_proj.weight": "mtp.safetensors",
44373
+ "model.layers.92.mlp.experts.150.down_proj.weight": "mtp.safetensors",
44374
+ "model.layers.92.mlp.experts.150.gate_proj.weight": "mtp.safetensors",
44375
+ "model.layers.92.mlp.experts.150.up_proj.weight": "mtp.safetensors",
44376
+ "model.layers.92.mlp.experts.151.down_proj.weight": "mtp.safetensors",
44377
+ "model.layers.92.mlp.experts.151.gate_proj.weight": "mtp.safetensors",
44378
+ "model.layers.92.mlp.experts.151.up_proj.weight": "mtp.safetensors",
44379
+ "model.layers.92.mlp.experts.152.down_proj.weight": "mtp.safetensors",
44380
+ "model.layers.92.mlp.experts.152.gate_proj.weight": "mtp.safetensors",
44381
+ "model.layers.92.mlp.experts.152.up_proj.weight": "mtp.safetensors",
44382
+ "model.layers.92.mlp.experts.153.down_proj.weight": "mtp.safetensors",
44383
+ "model.layers.92.mlp.experts.153.gate_proj.weight": "mtp.safetensors",
44384
+ "model.layers.92.mlp.experts.153.up_proj.weight": "mtp.safetensors",
44385
+ "model.layers.92.mlp.experts.154.down_proj.weight": "mtp.safetensors",
44386
+ "model.layers.92.mlp.experts.154.gate_proj.weight": "mtp.safetensors",
44387
+ "model.layers.92.mlp.experts.154.up_proj.weight": "mtp.safetensors",
44388
+ "model.layers.92.mlp.experts.155.down_proj.weight": "mtp.safetensors",
44389
+ "model.layers.92.mlp.experts.155.gate_proj.weight": "mtp.safetensors",
44390
+ "model.layers.92.mlp.experts.155.up_proj.weight": "mtp.safetensors",
44391
+ "model.layers.92.mlp.experts.156.down_proj.weight": "mtp.safetensors",
44392
+ "model.layers.92.mlp.experts.156.gate_proj.weight": "mtp.safetensors",
44393
+ "model.layers.92.mlp.experts.156.up_proj.weight": "mtp.safetensors",
44394
+ "model.layers.92.mlp.experts.157.down_proj.weight": "mtp.safetensors",
44395
+ "model.layers.92.mlp.experts.157.gate_proj.weight": "mtp.safetensors",
44396
+ "model.layers.92.mlp.experts.157.up_proj.weight": "mtp.safetensors",
44397
+ "model.layers.92.mlp.experts.158.down_proj.weight": "mtp.safetensors",
44398
+ "model.layers.92.mlp.experts.158.gate_proj.weight": "mtp.safetensors",
44399
+ "model.layers.92.mlp.experts.158.up_proj.weight": "mtp.safetensors",
44400
+ "model.layers.92.mlp.experts.159.down_proj.weight": "mtp.safetensors",
44401
+ "model.layers.92.mlp.experts.159.gate_proj.weight": "mtp.safetensors",
44402
+ "model.layers.92.mlp.experts.159.up_proj.weight": "mtp.safetensors",
44403
+ "model.layers.92.mlp.experts.16.down_proj.weight": "mtp.safetensors",
44404
+ "model.layers.92.mlp.experts.16.gate_proj.weight": "mtp.safetensors",
44405
+ "model.layers.92.mlp.experts.16.up_proj.weight": "mtp.safetensors",
44406
+ "model.layers.92.mlp.experts.17.down_proj.weight": "mtp.safetensors",
44407
+ "model.layers.92.mlp.experts.17.gate_proj.weight": "mtp.safetensors",
44408
+ "model.layers.92.mlp.experts.17.up_proj.weight": "mtp.safetensors",
44409
+ "model.layers.92.mlp.experts.18.down_proj.weight": "mtp.safetensors",
44410
+ "model.layers.92.mlp.experts.18.gate_proj.weight": "mtp.safetensors",
44411
+ "model.layers.92.mlp.experts.18.up_proj.weight": "mtp.safetensors",
44412
+ "model.layers.92.mlp.experts.19.down_proj.weight": "mtp.safetensors",
44413
+ "model.layers.92.mlp.experts.19.gate_proj.weight": "mtp.safetensors",
44414
+ "model.layers.92.mlp.experts.19.up_proj.weight": "mtp.safetensors",
44415
+ "model.layers.92.mlp.experts.2.down_proj.weight": "mtp.safetensors",
44416
+ "model.layers.92.mlp.experts.2.gate_proj.weight": "mtp.safetensors",
44417
+ "model.layers.92.mlp.experts.2.up_proj.weight": "mtp.safetensors",
44418
+ "model.layers.92.mlp.experts.20.down_proj.weight": "mtp.safetensors",
44419
+ "model.layers.92.mlp.experts.20.gate_proj.weight": "mtp.safetensors",
44420
+ "model.layers.92.mlp.experts.20.up_proj.weight": "mtp.safetensors",
44421
+ "model.layers.92.mlp.experts.21.down_proj.weight": "mtp.safetensors",
44422
+ "model.layers.92.mlp.experts.21.gate_proj.weight": "mtp.safetensors",
44423
+ "model.layers.92.mlp.experts.21.up_proj.weight": "mtp.safetensors",
44424
+ "model.layers.92.mlp.experts.22.down_proj.weight": "mtp.safetensors",
44425
+ "model.layers.92.mlp.experts.22.gate_proj.weight": "mtp.safetensors",
44426
+ "model.layers.92.mlp.experts.22.up_proj.weight": "mtp.safetensors",
44427
+ "model.layers.92.mlp.experts.23.down_proj.weight": "mtp.safetensors",
44428
+ "model.layers.92.mlp.experts.23.gate_proj.weight": "mtp.safetensors",
44429
+ "model.layers.92.mlp.experts.23.up_proj.weight": "mtp.safetensors",
44430
+ "model.layers.92.mlp.experts.24.down_proj.weight": "mtp.safetensors",
44431
+ "model.layers.92.mlp.experts.24.gate_proj.weight": "mtp.safetensors",
44432
+ "model.layers.92.mlp.experts.24.up_proj.weight": "mtp.safetensors",
44433
+ "model.layers.92.mlp.experts.25.down_proj.weight": "mtp.safetensors",
44434
+ "model.layers.92.mlp.experts.25.gate_proj.weight": "mtp.safetensors",
44435
+ "model.layers.92.mlp.experts.25.up_proj.weight": "mtp.safetensors",
44436
+ "model.layers.92.mlp.experts.26.down_proj.weight": "mtp.safetensors",
44437
+ "model.layers.92.mlp.experts.26.gate_proj.weight": "mtp.safetensors",
44438
+ "model.layers.92.mlp.experts.26.up_proj.weight": "mtp.safetensors",
44439
+ "model.layers.92.mlp.experts.27.down_proj.weight": "mtp.safetensors",
44440
+ "model.layers.92.mlp.experts.27.gate_proj.weight": "mtp.safetensors",
44441
+ "model.layers.92.mlp.experts.27.up_proj.weight": "mtp.safetensors",
44442
+ "model.layers.92.mlp.experts.28.down_proj.weight": "mtp.safetensors",
44443
+ "model.layers.92.mlp.experts.28.gate_proj.weight": "mtp.safetensors",
44444
+ "model.layers.92.mlp.experts.28.up_proj.weight": "mtp.safetensors",
44445
+ "model.layers.92.mlp.experts.29.down_proj.weight": "mtp.safetensors",
44446
+ "model.layers.92.mlp.experts.29.gate_proj.weight": "mtp.safetensors",
44447
+ "model.layers.92.mlp.experts.29.up_proj.weight": "mtp.safetensors",
44448
+ "model.layers.92.mlp.experts.3.down_proj.weight": "mtp.safetensors",
44449
+ "model.layers.92.mlp.experts.3.gate_proj.weight": "mtp.safetensors",
44450
+ "model.layers.92.mlp.experts.3.up_proj.weight": "mtp.safetensors",
44451
+ "model.layers.92.mlp.experts.30.down_proj.weight": "mtp.safetensors",
44452
+ "model.layers.92.mlp.experts.30.gate_proj.weight": "mtp.safetensors",
44453
+ "model.layers.92.mlp.experts.30.up_proj.weight": "mtp.safetensors",
44454
+ "model.layers.92.mlp.experts.31.down_proj.weight": "mtp.safetensors",
44455
+ "model.layers.92.mlp.experts.31.gate_proj.weight": "mtp.safetensors",
44456
+ "model.layers.92.mlp.experts.31.up_proj.weight": "mtp.safetensors",
44457
+ "model.layers.92.mlp.experts.32.down_proj.weight": "mtp.safetensors",
44458
+ "model.layers.92.mlp.experts.32.gate_proj.weight": "mtp.safetensors",
44459
+ "model.layers.92.mlp.experts.32.up_proj.weight": "mtp.safetensors",
44460
+ "model.layers.92.mlp.experts.33.down_proj.weight": "mtp.safetensors",
44461
+ "model.layers.92.mlp.experts.33.gate_proj.weight": "mtp.safetensors",
44462
+ "model.layers.92.mlp.experts.33.up_proj.weight": "mtp.safetensors",
44463
+ "model.layers.92.mlp.experts.34.down_proj.weight": "mtp.safetensors",
44464
+ "model.layers.92.mlp.experts.34.gate_proj.weight": "mtp.safetensors",
44465
+ "model.layers.92.mlp.experts.34.up_proj.weight": "mtp.safetensors",
44466
+ "model.layers.92.mlp.experts.35.down_proj.weight": "mtp.safetensors",
44467
+ "model.layers.92.mlp.experts.35.gate_proj.weight": "mtp.safetensors",
44468
+ "model.layers.92.mlp.experts.35.up_proj.weight": "mtp.safetensors",
44469
+ "model.layers.92.mlp.experts.36.down_proj.weight": "mtp.safetensors",
44470
+ "model.layers.92.mlp.experts.36.gate_proj.weight": "mtp.safetensors",
44471
+ "model.layers.92.mlp.experts.36.up_proj.weight": "mtp.safetensors",
44472
+ "model.layers.92.mlp.experts.37.down_proj.weight": "mtp.safetensors",
44473
+ "model.layers.92.mlp.experts.37.gate_proj.weight": "mtp.safetensors",
44474
+ "model.layers.92.mlp.experts.37.up_proj.weight": "mtp.safetensors",
44475
+ "model.layers.92.mlp.experts.38.down_proj.weight": "mtp.safetensors",
44476
+ "model.layers.92.mlp.experts.38.gate_proj.weight": "mtp.safetensors",
44477
+ "model.layers.92.mlp.experts.38.up_proj.weight": "mtp.safetensors",
44478
+ "model.layers.92.mlp.experts.39.down_proj.weight": "mtp.safetensors",
44479
+ "model.layers.92.mlp.experts.39.gate_proj.weight": "mtp.safetensors",
44480
+ "model.layers.92.mlp.experts.39.up_proj.weight": "mtp.safetensors",
44481
+ "model.layers.92.mlp.experts.4.down_proj.weight": "mtp.safetensors",
44482
+ "model.layers.92.mlp.experts.4.gate_proj.weight": "mtp.safetensors",
44483
+ "model.layers.92.mlp.experts.4.up_proj.weight": "mtp.safetensors",
44484
+ "model.layers.92.mlp.experts.40.down_proj.weight": "mtp.safetensors",
44485
+ "model.layers.92.mlp.experts.40.gate_proj.weight": "mtp.safetensors",
44486
+ "model.layers.92.mlp.experts.40.up_proj.weight": "mtp.safetensors",
44487
+ "model.layers.92.mlp.experts.41.down_proj.weight": "mtp.safetensors",
44488
+ "model.layers.92.mlp.experts.41.gate_proj.weight": "mtp.safetensors",
44489
+ "model.layers.92.mlp.experts.41.up_proj.weight": "mtp.safetensors",
44490
+ "model.layers.92.mlp.experts.42.down_proj.weight": "mtp.safetensors",
44491
+ "model.layers.92.mlp.experts.42.gate_proj.weight": "mtp.safetensors",
44492
+ "model.layers.92.mlp.experts.42.up_proj.weight": "mtp.safetensors",
44493
+ "model.layers.92.mlp.experts.43.down_proj.weight": "mtp.safetensors",
44494
+ "model.layers.92.mlp.experts.43.gate_proj.weight": "mtp.safetensors",
44495
+ "model.layers.92.mlp.experts.43.up_proj.weight": "mtp.safetensors",
44496
+ "model.layers.92.mlp.experts.44.down_proj.weight": "mtp.safetensors",
44497
+ "model.layers.92.mlp.experts.44.gate_proj.weight": "mtp.safetensors",
44498
+ "model.layers.92.mlp.experts.44.up_proj.weight": "mtp.safetensors",
44499
+ "model.layers.92.mlp.experts.45.down_proj.weight": "mtp.safetensors",
44500
+ "model.layers.92.mlp.experts.45.gate_proj.weight": "mtp.safetensors",
44501
+ "model.layers.92.mlp.experts.45.up_proj.weight": "mtp.safetensors",
44502
+ "model.layers.92.mlp.experts.46.down_proj.weight": "mtp.safetensors",
44503
+ "model.layers.92.mlp.experts.46.gate_proj.weight": "mtp.safetensors",
44504
+ "model.layers.92.mlp.experts.46.up_proj.weight": "mtp.safetensors",
44505
+ "model.layers.92.mlp.experts.47.down_proj.weight": "mtp.safetensors",
44506
+ "model.layers.92.mlp.experts.47.gate_proj.weight": "mtp.safetensors",
44507
+ "model.layers.92.mlp.experts.47.up_proj.weight": "mtp.safetensors",
44508
+ "model.layers.92.mlp.experts.48.down_proj.weight": "mtp.safetensors",
44509
+ "model.layers.92.mlp.experts.48.gate_proj.weight": "mtp.safetensors",
44510
+ "model.layers.92.mlp.experts.48.up_proj.weight": "mtp.safetensors",
44511
+ "model.layers.92.mlp.experts.49.down_proj.weight": "mtp.safetensors",
44512
+ "model.layers.92.mlp.experts.49.gate_proj.weight": "mtp.safetensors",
44513
+ "model.layers.92.mlp.experts.49.up_proj.weight": "mtp.safetensors",
44514
+ "model.layers.92.mlp.experts.5.down_proj.weight": "mtp.safetensors",
44515
+ "model.layers.92.mlp.experts.5.gate_proj.weight": "mtp.safetensors",
44516
+ "model.layers.92.mlp.experts.5.up_proj.weight": "mtp.safetensors",
44517
+ "model.layers.92.mlp.experts.50.down_proj.weight": "mtp.safetensors",
44518
+ "model.layers.92.mlp.experts.50.gate_proj.weight": "mtp.safetensors",
44519
+ "model.layers.92.mlp.experts.50.up_proj.weight": "mtp.safetensors",
44520
+ "model.layers.92.mlp.experts.51.down_proj.weight": "mtp.safetensors",
44521
+ "model.layers.92.mlp.experts.51.gate_proj.weight": "mtp.safetensors",
44522
+ "model.layers.92.mlp.experts.51.up_proj.weight": "mtp.safetensors",
44523
+ "model.layers.92.mlp.experts.52.down_proj.weight": "mtp.safetensors",
44524
+ "model.layers.92.mlp.experts.52.gate_proj.weight": "mtp.safetensors",
44525
+ "model.layers.92.mlp.experts.52.up_proj.weight": "mtp.safetensors",
44526
+ "model.layers.92.mlp.experts.53.down_proj.weight": "mtp.safetensors",
44527
+ "model.layers.92.mlp.experts.53.gate_proj.weight": "mtp.safetensors",
44528
+ "model.layers.92.mlp.experts.53.up_proj.weight": "mtp.safetensors",
44529
+ "model.layers.92.mlp.experts.54.down_proj.weight": "mtp.safetensors",
44530
+ "model.layers.92.mlp.experts.54.gate_proj.weight": "mtp.safetensors",
44531
+ "model.layers.92.mlp.experts.54.up_proj.weight": "mtp.safetensors",
44532
+ "model.layers.92.mlp.experts.55.down_proj.weight": "mtp.safetensors",
44533
+ "model.layers.92.mlp.experts.55.gate_proj.weight": "mtp.safetensors",
44534
+ "model.layers.92.mlp.experts.55.up_proj.weight": "mtp.safetensors",
44535
+ "model.layers.92.mlp.experts.56.down_proj.weight": "mtp.safetensors",
44536
+ "model.layers.92.mlp.experts.56.gate_proj.weight": "mtp.safetensors",
44537
+ "model.layers.92.mlp.experts.56.up_proj.weight": "mtp.safetensors",
44538
+ "model.layers.92.mlp.experts.57.down_proj.weight": "mtp.safetensors",
44539
+ "model.layers.92.mlp.experts.57.gate_proj.weight": "mtp.safetensors",
44540
+ "model.layers.92.mlp.experts.57.up_proj.weight": "mtp.safetensors",
44541
+ "model.layers.92.mlp.experts.58.down_proj.weight": "mtp.safetensors",
44542
+ "model.layers.92.mlp.experts.58.gate_proj.weight": "mtp.safetensors",
44543
+ "model.layers.92.mlp.experts.58.up_proj.weight": "mtp.safetensors",
44544
+ "model.layers.92.mlp.experts.59.down_proj.weight": "mtp.safetensors",
44545
+ "model.layers.92.mlp.experts.59.gate_proj.weight": "mtp.safetensors",
44546
+ "model.layers.92.mlp.experts.59.up_proj.weight": "mtp.safetensors",
44547
+ "model.layers.92.mlp.experts.6.down_proj.weight": "mtp.safetensors",
44548
+ "model.layers.92.mlp.experts.6.gate_proj.weight": "mtp.safetensors",
44549
+ "model.layers.92.mlp.experts.6.up_proj.weight": "mtp.safetensors",
44550
+ "model.layers.92.mlp.experts.60.down_proj.weight": "mtp.safetensors",
44551
+ "model.layers.92.mlp.experts.60.gate_proj.weight": "mtp.safetensors",
44552
+ "model.layers.92.mlp.experts.60.up_proj.weight": "mtp.safetensors",
44553
+ "model.layers.92.mlp.experts.61.down_proj.weight": "mtp.safetensors",
44554
+ "model.layers.92.mlp.experts.61.gate_proj.weight": "mtp.safetensors",
44555
+ "model.layers.92.mlp.experts.61.up_proj.weight": "mtp.safetensors",
44556
+ "model.layers.92.mlp.experts.62.down_proj.weight": "mtp.safetensors",
44557
+ "model.layers.92.mlp.experts.62.gate_proj.weight": "mtp.safetensors",
44558
+ "model.layers.92.mlp.experts.62.up_proj.weight": "mtp.safetensors",
44559
+ "model.layers.92.mlp.experts.63.down_proj.weight": "mtp.safetensors",
44560
+ "model.layers.92.mlp.experts.63.gate_proj.weight": "mtp.safetensors",
44561
+ "model.layers.92.mlp.experts.63.up_proj.weight": "mtp.safetensors",
44562
+ "model.layers.92.mlp.experts.64.down_proj.weight": "mtp.safetensors",
44563
+ "model.layers.92.mlp.experts.64.gate_proj.weight": "mtp.safetensors",
44564
+ "model.layers.92.mlp.experts.64.up_proj.weight": "mtp.safetensors",
44565
+ "model.layers.92.mlp.experts.65.down_proj.weight": "mtp.safetensors",
44566
+ "model.layers.92.mlp.experts.65.gate_proj.weight": "mtp.safetensors",
44567
+ "model.layers.92.mlp.experts.65.up_proj.weight": "mtp.safetensors",
44568
+ "model.layers.92.mlp.experts.66.down_proj.weight": "mtp.safetensors",
44569
+ "model.layers.92.mlp.experts.66.gate_proj.weight": "mtp.safetensors",
44570
+ "model.layers.92.mlp.experts.66.up_proj.weight": "mtp.safetensors",
44571
+ "model.layers.92.mlp.experts.67.down_proj.weight": "mtp.safetensors",
44572
+ "model.layers.92.mlp.experts.67.gate_proj.weight": "mtp.safetensors",
44573
+ "model.layers.92.mlp.experts.67.up_proj.weight": "mtp.safetensors",
44574
+ "model.layers.92.mlp.experts.68.down_proj.weight": "mtp.safetensors",
44575
+ "model.layers.92.mlp.experts.68.gate_proj.weight": "mtp.safetensors",
44576
+ "model.layers.92.mlp.experts.68.up_proj.weight": "mtp.safetensors",
44577
+ "model.layers.92.mlp.experts.69.down_proj.weight": "mtp.safetensors",
44578
+ "model.layers.92.mlp.experts.69.gate_proj.weight": "mtp.safetensors",
44579
+ "model.layers.92.mlp.experts.69.up_proj.weight": "mtp.safetensors",
44580
+ "model.layers.92.mlp.experts.7.down_proj.weight": "mtp.safetensors",
44581
+ "model.layers.92.mlp.experts.7.gate_proj.weight": "mtp.safetensors",
44582
+ "model.layers.92.mlp.experts.7.up_proj.weight": "mtp.safetensors",
44583
+ "model.layers.92.mlp.experts.70.down_proj.weight": "mtp.safetensors",
44584
+ "model.layers.92.mlp.experts.70.gate_proj.weight": "mtp.safetensors",
44585
+ "model.layers.92.mlp.experts.70.up_proj.weight": "mtp.safetensors",
44586
+ "model.layers.92.mlp.experts.71.down_proj.weight": "mtp.safetensors",
44587
+ "model.layers.92.mlp.experts.71.gate_proj.weight": "mtp.safetensors",
44588
+ "model.layers.92.mlp.experts.71.up_proj.weight": "mtp.safetensors",
44589
+ "model.layers.92.mlp.experts.72.down_proj.weight": "mtp.safetensors",
44590
+ "model.layers.92.mlp.experts.72.gate_proj.weight": "mtp.safetensors",
44591
+ "model.layers.92.mlp.experts.72.up_proj.weight": "mtp.safetensors",
44592
+ "model.layers.92.mlp.experts.73.down_proj.weight": "mtp.safetensors",
44593
+ "model.layers.92.mlp.experts.73.gate_proj.weight": "mtp.safetensors",
44594
+ "model.layers.92.mlp.experts.73.up_proj.weight": "mtp.safetensors",
44595
+ "model.layers.92.mlp.experts.74.down_proj.weight": "mtp.safetensors",
44596
+ "model.layers.92.mlp.experts.74.gate_proj.weight": "mtp.safetensors",
44597
+ "model.layers.92.mlp.experts.74.up_proj.weight": "mtp.safetensors",
44598
+ "model.layers.92.mlp.experts.75.down_proj.weight": "mtp.safetensors",
44599
+ "model.layers.92.mlp.experts.75.gate_proj.weight": "mtp.safetensors",
44600
+ "model.layers.92.mlp.experts.75.up_proj.weight": "mtp.safetensors",
44601
+ "model.layers.92.mlp.experts.76.down_proj.weight": "mtp.safetensors",
44602
+ "model.layers.92.mlp.experts.76.gate_proj.weight": "mtp.safetensors",
44603
+ "model.layers.92.mlp.experts.76.up_proj.weight": "mtp.safetensors",
44604
+ "model.layers.92.mlp.experts.77.down_proj.weight": "mtp.safetensors",
44605
+ "model.layers.92.mlp.experts.77.gate_proj.weight": "mtp.safetensors",
44606
+ "model.layers.92.mlp.experts.77.up_proj.weight": "mtp.safetensors",
44607
+ "model.layers.92.mlp.experts.78.down_proj.weight": "mtp.safetensors",
44608
+ "model.layers.92.mlp.experts.78.gate_proj.weight": "mtp.safetensors",
44609
+ "model.layers.92.mlp.experts.78.up_proj.weight": "mtp.safetensors",
44610
+ "model.layers.92.mlp.experts.79.down_proj.weight": "mtp.safetensors",
44611
+ "model.layers.92.mlp.experts.79.gate_proj.weight": "mtp.safetensors",
44612
+ "model.layers.92.mlp.experts.79.up_proj.weight": "mtp.safetensors",
44613
+ "model.layers.92.mlp.experts.8.down_proj.weight": "mtp.safetensors",
44614
+ "model.layers.92.mlp.experts.8.gate_proj.weight": "mtp.safetensors",
44615
+ "model.layers.92.mlp.experts.8.up_proj.weight": "mtp.safetensors",
44616
+ "model.layers.92.mlp.experts.80.down_proj.weight": "mtp.safetensors",
44617
+ "model.layers.92.mlp.experts.80.gate_proj.weight": "mtp.safetensors",
44618
+ "model.layers.92.mlp.experts.80.up_proj.weight": "mtp.safetensors",
44619
+ "model.layers.92.mlp.experts.81.down_proj.weight": "mtp.safetensors",
44620
+ "model.layers.92.mlp.experts.81.gate_proj.weight": "mtp.safetensors",
44621
+ "model.layers.92.mlp.experts.81.up_proj.weight": "mtp.safetensors",
44622
+ "model.layers.92.mlp.experts.82.down_proj.weight": "mtp.safetensors",
44623
+ "model.layers.92.mlp.experts.82.gate_proj.weight": "mtp.safetensors",
44624
+ "model.layers.92.mlp.experts.82.up_proj.weight": "mtp.safetensors",
44625
+ "model.layers.92.mlp.experts.83.down_proj.weight": "mtp.safetensors",
44626
+ "model.layers.92.mlp.experts.83.gate_proj.weight": "mtp.safetensors",
44627
+ "model.layers.92.mlp.experts.83.up_proj.weight": "mtp.safetensors",
44628
+ "model.layers.92.mlp.experts.84.down_proj.weight": "mtp.safetensors",
44629
+ "model.layers.92.mlp.experts.84.gate_proj.weight": "mtp.safetensors",
44630
+ "model.layers.92.mlp.experts.84.up_proj.weight": "mtp.safetensors",
44631
+ "model.layers.92.mlp.experts.85.down_proj.weight": "mtp.safetensors",
44632
+ "model.layers.92.mlp.experts.85.gate_proj.weight": "mtp.safetensors",
44633
+ "model.layers.92.mlp.experts.85.up_proj.weight": "mtp.safetensors",
44634
+ "model.layers.92.mlp.experts.86.down_proj.weight": "mtp.safetensors",
44635
+ "model.layers.92.mlp.experts.86.gate_proj.weight": "mtp.safetensors",
44636
+ "model.layers.92.mlp.experts.86.up_proj.weight": "mtp.safetensors",
44637
+ "model.layers.92.mlp.experts.87.down_proj.weight": "mtp.safetensors",
44638
+ "model.layers.92.mlp.experts.87.gate_proj.weight": "mtp.safetensors",
44639
+ "model.layers.92.mlp.experts.87.up_proj.weight": "mtp.safetensors",
44640
+ "model.layers.92.mlp.experts.88.down_proj.weight": "mtp.safetensors",
44641
+ "model.layers.92.mlp.experts.88.gate_proj.weight": "mtp.safetensors",
44642
+ "model.layers.92.mlp.experts.88.up_proj.weight": "mtp.safetensors",
44643
+ "model.layers.92.mlp.experts.89.down_proj.weight": "mtp.safetensors",
44644
+ "model.layers.92.mlp.experts.89.gate_proj.weight": "mtp.safetensors",
44645
+ "model.layers.92.mlp.experts.89.up_proj.weight": "mtp.safetensors",
44646
+ "model.layers.92.mlp.experts.9.down_proj.weight": "mtp.safetensors",
44647
+ "model.layers.92.mlp.experts.9.gate_proj.weight": "mtp.safetensors",
44648
+ "model.layers.92.mlp.experts.9.up_proj.weight": "mtp.safetensors",
44649
+ "model.layers.92.mlp.experts.90.down_proj.weight": "mtp.safetensors",
44650
+ "model.layers.92.mlp.experts.90.gate_proj.weight": "mtp.safetensors",
44651
+ "model.layers.92.mlp.experts.90.up_proj.weight": "mtp.safetensors",
44652
+ "model.layers.92.mlp.experts.91.down_proj.weight": "mtp.safetensors",
44653
+ "model.layers.92.mlp.experts.91.gate_proj.weight": "mtp.safetensors",
44654
+ "model.layers.92.mlp.experts.91.up_proj.weight": "mtp.safetensors",
44655
+ "model.layers.92.mlp.experts.92.down_proj.weight": "mtp.safetensors",
44656
+ "model.layers.92.mlp.experts.92.gate_proj.weight": "mtp.safetensors",
44657
+ "model.layers.92.mlp.experts.92.up_proj.weight": "mtp.safetensors",
44658
+ "model.layers.92.mlp.experts.93.down_proj.weight": "mtp.safetensors",
44659
+ "model.layers.92.mlp.experts.93.gate_proj.weight": "mtp.safetensors",
44660
+ "model.layers.92.mlp.experts.93.up_proj.weight": "mtp.safetensors",
44661
+ "model.layers.92.mlp.experts.94.down_proj.weight": "mtp.safetensors",
44662
+ "model.layers.92.mlp.experts.94.gate_proj.weight": "mtp.safetensors",
44663
+ "model.layers.92.mlp.experts.94.up_proj.weight": "mtp.safetensors",
44664
+ "model.layers.92.mlp.experts.95.down_proj.weight": "mtp.safetensors",
44665
+ "model.layers.92.mlp.experts.95.gate_proj.weight": "mtp.safetensors",
44666
+ "model.layers.92.mlp.experts.95.up_proj.weight": "mtp.safetensors",
44667
+ "model.layers.92.mlp.experts.96.down_proj.weight": "mtp.safetensors",
44668
+ "model.layers.92.mlp.experts.96.gate_proj.weight": "mtp.safetensors",
44669
+ "model.layers.92.mlp.experts.96.up_proj.weight": "mtp.safetensors",
44670
+ "model.layers.92.mlp.experts.97.down_proj.weight": "mtp.safetensors",
44671
+ "model.layers.92.mlp.experts.97.gate_proj.weight": "mtp.safetensors",
44672
+ "model.layers.92.mlp.experts.97.up_proj.weight": "mtp.safetensors",
44673
+ "model.layers.92.mlp.experts.98.down_proj.weight": "mtp.safetensors",
44674
+ "model.layers.92.mlp.experts.98.gate_proj.weight": "mtp.safetensors",
44675
+ "model.layers.92.mlp.experts.98.up_proj.weight": "mtp.safetensors",
44676
+ "model.layers.92.mlp.experts.99.down_proj.weight": "mtp.safetensors",
44677
+ "model.layers.92.mlp.experts.99.gate_proj.weight": "mtp.safetensors",
44678
+ "model.layers.92.mlp.experts.99.up_proj.weight": "mtp.safetensors",
44679
+ "model.layers.92.mlp.gate.e_score_correction_bias": "mtp.safetensors",
44680
+ "model.layers.92.mlp.gate.weight": "mtp.safetensors",
44681
+ "model.layers.92.mlp.shared_experts.down_proj.weight": "mtp.safetensors",
44682
+ "model.layers.92.mlp.shared_experts.gate_proj.weight": "mtp.safetensors",
44683
+ "model.layers.92.mlp.shared_experts.up_proj.weight": "mtp.safetensors",
44684
+ "model.layers.92.post_attention_layernorm.weight": "mtp.safetensors",
44685
+ "model.layers.92.self_attn.k_norm.weight": "mtp.safetensors",
44686
+ "model.layers.92.self_attn.k_proj.bias": "mtp.safetensors",
44687
+ "model.layers.92.self_attn.k_proj.weight": "mtp.safetensors",
44688
+ "model.layers.92.self_attn.o_proj.weight": "mtp.safetensors",
44689
+ "model.layers.92.self_attn.q_norm.weight": "mtp.safetensors",
44690
+ "model.layers.92.self_attn.q_proj.bias": "mtp.safetensors",
44691
+ "model.layers.92.self_attn.q_proj.weight": "mtp.safetensors",
44692
+ "model.layers.92.self_attn.v_proj.bias": "mtp.safetensors",
44693
+ "model.layers.92.self_attn.v_proj.weight": "mtp.safetensors",
44694
+ "model.layers.92.shared_head.norm.weight": "mtp.safetensors"
44695
  }
44696
  }
model-mtp.safetensors → mtp.safetensors RENAMED
File without changes
tokenizer_config.json CHANGED
@@ -324,5 +324,5 @@
324
  "remove_space": false,
325
  "tokenizer_class": "PreTrainedTokenizerFast",
326
  "unk_token": null,
327
- "chat_template": "{# Unsloth template fixes #}[gMASK]<sop>\n{%- if tools -%}\n<|system|>\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{% for tool in tools %}\n{{ tool | tojson|string }}\n{% endfor %}\n</tools>\n\nFor each function call, output the function name and arguments within the following XML format:\n<tool_call>{function-name}\n<arg_key>{arg-key-1}</arg_key>\n<arg_value>{arg-value-1}</arg_value>\n<arg_key>{arg-key-2}</arg_key>\n<arg_value>{arg-value-2}</arg_value>\n...\n</tool_call>{%- endif -%}\n{%- macro visible_text(content) -%}\n {%- if content is string -%}\n {{- content }}\n {%- elif content is iterable and content is not mapping -%}\n {%- for item in content -%}\n {%- if item is mapping and item.type == 'text' -%}\n {{- item.text }}\n {%- elif item is string -%}\n {{- item }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{- content }}\n {%- endif -%}\n{%- endmacro -%}\n{%- set ns = namespace(last_user_index=-1) %}\n{%- for m in messages %}\n {%- if m.role == 'user' %}\n {% set ns.last_user_index = loop.index0 -%}\n {%- endif %}\n{%- endfor %}\n{% for m in messages %}\n{%- if m.role == 'user' -%}<|user|>\n{%- set content = visible_text(m.content)|string %}{{ content }}\n{{- '/nothink' if (enable_thinking is defined and not enable_thinking and not content.endswith(\"/nothink\")) else '' -}}\n{%- elif m.role == 'assistant' -%}\n<|assistant|>\n{%- set reasoning_content = '' %}\n{%- set content = visible_text(m.content)|string %}\n{%- if m.reasoning_content is defined and m.reasoning_content is string %}\n {%- set reasoning_content = m.reasoning_content %}\n{%- else %}\n {# Unsloth template fixes - must change to for loop since llama.cpp will error out if not #}\n {%- set parts = content.split('</think>') %}\n {% for part in parts %}\n {%- if loop.index0 == 0 -%}\n {%- set reasoning_content = (part.split(\"<think>\")|last) %}\n {%- set reasoning_content = reasoning_content.lstrip('\\n').rstrip('\\n') -%}\n {%- else -%}\n {%- set content = part.lstrip('\\n') %}\n {%- endif %}\n {%- endfor %}\n{%- endif %}\n{%- if loop.index0 > ns.last_user_index and reasoning_content -%}\n{{ '\\n<think>' + reasoning_content.strip() + '</think>'}}\n{%- else -%}\n{{ '\\n<think></think>' }}\n{%- endif -%}\n{%- if content.strip() -%}\n{{ '\\n' + content.strip() }}\n{%- endif -%}\n{% if m.tool_calls %}\n{% for tc in m.tool_calls %}\n{%- if tc.function %}\n {%- set tc = tc.function %}\n{%- endif %}\n{{ '\\n<tool_call>' + tc.name }}\n{% set _args = tc.arguments %}\n{%- if _args is not mapping -%}\n {%- set _args = {} %}\n{%- endif -%}\n{% for k, v in _args|items %}\n<arg_key>{{ k }}</arg_key>\n<arg_value>{{ v | tojson|string if v is not string else v }}</arg_value>\n{% endfor %}\n</tool_call>{% endfor %}\n{% endif %}\n{%- elif m.role == 'tool' -%}\n{%- if m.content is string -%}\n{%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|observation|>' }}\n{%- endif %}\n{{- '\\n<tool_response>\\n' }}\n{{- m.content }}\n{{- '\\n</tool_response>' }}\n{%- else -%}\n<|observation|>{% for tr in m.content %}\n\n<tool_response>\n{{ tr.output if tr.output is defined else tr }}\n</tool_response>{% endfor -%}\n{% endif -%}\n{%- elif m.role == 'system' -%}\n<|system|>\n{{ visible_text(m.content)|string }}\n{%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n <|assistant|>{{- '\\n<think></think>' if (enable_thinking is defined and not enable_thinking) else '' -}}\n{%- endif -%}{# Copyright 2025-present Unsloth. Apache 2.0 License. #}"
328
  }
 
324
  "remove_space": false,
325
  "tokenizer_class": "PreTrainedTokenizerFast",
326
  "unk_token": null,
327
+ "chat_template": "{# Unsloth template fixes #}\n[gMASK]<sop>\n{%- if tools -%}\n<|system|>\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{% for tool in tools %}\n{{ tool | tojson|string }}\n{% endfor %}\n</tools>\n\nFor each function call, output the function name and arguments within the following XML format:\n<tool_call>{function-name}\n<arg_key>{arg-key-1}</arg_key>\n<arg_value>{arg-value-1}</arg_value>\n<arg_key>{arg-key-2}</arg_key>\n<arg_value>{arg-value-2}</arg_value>\n...\n</tool_call>{%- endif -%}\n{%- macro visible_text(content) -%}\n {%- if content is string -%}\n {{- content }}\n {%- elif content is iterable and content is not mapping -%}\n {%- for item in content -%}\n {%- if item is mapping and item.type == 'text' -%}\n {{- item.text }}\n {%- elif item is string -%}\n {{- item }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{- content }}\n {%- endif -%}\n{%- endmacro -%}\n{%- set ns = namespace(last_user_index=-1) %}\n{%- for m in messages %}\n {%- if m.role == 'user' %}\n {% set ns.last_user_index = loop.index0 -%}\n {%- endif %}\n{%- endfor %}\n{% for m in messages %}\n{%- if m.role == 'user' -%}<|user|>\n{{ visible_text(m.content) }}\n{{- '/nothink' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith(\"/nothink\")) else '' -}}\n{%- elif m.role == 'assistant' -%}\n<|assistant|>\n{%- set reasoning_content = '' %}\n{%- set content = visible_text(m.content) %}\n{%- if m.reasoning_content is string %}\n {%- set reasoning_content = m.reasoning_content %}\n{%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n {%- endif %}\n{%- endif %}\n{%- if loop.index0 > ns.last_user_index and reasoning_content -%}\n{{ '\\n<think>' + reasoning_content.strip() + '</think>'}}\n{%- else -%}\n{{ '\\n<think></think>' }}\n{%- endif -%}\n{%- if content.strip() -%}\n{{ '\\n' + content.strip() }}\n{%- endif -%}\n{% if m.tool_calls %}\n{% for tc in m.tool_calls %}\n{%- if tc.function %}\n {%- set tc = tc.function %}\n{%- endif %}\n{{ '\\n<tool_call>' + tc.name }}\n{% set _args = tc.arguments %}{%- if _args is mapping %}\n{% for k, v in _args|items %}\n<arg_key>{{ k }}</arg_key>\n<arg_value>{{ v | tojson|string if v is not string else v }}</arg_value>\n{% endfor %}{%- endif %}\n</tool_call>{% endfor %}\n{% endif %}\n{%- elif m.role == 'tool' -%}\n{%- if m.content is string -%}\n{%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|observation|>' }}\n{%- endif %}\n{{- '\\n<tool_response>\\n' }}\n{{- m.content }}\n{{- '\\n</tool_response>' }}\n{%- else -%}\n<|observation|>{% for tr in m.content %}\n\n<tool_response>\n{{ tr.output if tr.output is defined else tr }}\n</tool_response>{% endfor -%}\n{% endif -%}\n{%- elif m.role == 'system' -%}\n<|system|>\n{{ visible_text(m.content) }}\n{%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n <|assistant|>{{- '\\n<think></think>' if (enable_thinking is defined and not enable_thinking) else '' -}}\n{%- endif -%}\n{# Copyright 2025-present Unsloth. Apache 2.0 License. #}"
328
  }