Geodd commited on
Commit
1721dc7
·
verified ·
1 Parent(s): 3e6eace

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -115,54 +115,7 @@
115
  }
116
  },
117
  "ignore": [
118
- "lm_head",
119
- "model.layers.0.mlp.gate_proj",
120
- "model.layers.1.mlp.shared_experts.gate_proj",
121
- "model.layers.10.mlp.shared_experts.gate_proj",
122
- "model.layers.11.mlp.shared_experts.gate_proj",
123
- "model.layers.12.mlp.shared_experts.gate_proj",
124
- "model.layers.13.mlp.shared_experts.gate_proj",
125
- "model.layers.14.mlp.shared_experts.gate_proj",
126
- "model.layers.15.mlp.shared_experts.gate_proj",
127
- "model.layers.16.mlp.shared_experts.gate_proj",
128
- "model.layers.17.mlp.shared_experts.gate_proj",
129
- "model.layers.18.mlp.shared_experts.gate_proj",
130
- "model.layers.19.mlp.shared_experts.gate_proj",
131
- "model.layers.2.mlp.shared_experts.gate_proj",
132
- "model.layers.20.mlp.shared_experts.gate_proj",
133
- "model.layers.21.mlp.shared_experts.gate_proj",
134
- "model.layers.22.mlp.shared_experts.gate_proj",
135
- "model.layers.23.mlp.shared_experts.gate_proj",
136
- "model.layers.24.mlp.shared_experts.gate_proj",
137
- "model.layers.25.mlp.shared_experts.gate_proj",
138
- "model.layers.26.mlp.shared_experts.gate_proj",
139
- "model.layers.27.mlp.shared_experts.gate_proj",
140
- "model.layers.28.mlp.shared_experts.gate_proj",
141
- "model.layers.29.mlp.shared_experts.gate_proj",
142
- "model.layers.3.mlp.shared_experts.gate_proj",
143
- "model.layers.30.mlp.shared_experts.gate_proj",
144
- "model.layers.31.mlp.shared_experts.gate_proj",
145
- "model.layers.32.mlp.shared_experts.gate_proj",
146
- "model.layers.33.mlp.shared_experts.gate_proj",
147
- "model.layers.34.mlp.shared_experts.gate_proj",
148
- "model.layers.35.mlp.shared_experts.gate_proj",
149
- "model.layers.36.mlp.shared_experts.gate_proj",
150
- "model.layers.37.mlp.shared_experts.gate_proj",
151
- "model.layers.38.mlp.shared_experts.gate_proj",
152
- "model.layers.39.mlp.shared_experts.gate_proj",
153
- "model.layers.4.mlp.shared_experts.gate_proj",
154
- "model.layers.40.mlp.shared_experts.gate_proj",
155
- "model.layers.41.mlp.shared_experts.gate_proj",
156
- "model.layers.42.mlp.shared_experts.gate_proj",
157
- "model.layers.43.mlp.shared_experts.gate_proj",
158
- "model.layers.44.mlp.shared_experts.gate_proj",
159
- "model.layers.45.mlp.shared_experts.gate_proj",
160
- "model.layers.46.mlp.shared_experts.gate_proj",
161
- "model.layers.5.mlp.shared_experts.gate_proj",
162
- "model.layers.6.mlp.shared_experts.gate_proj",
163
- "model.layers.7.mlp.shared_experts.gate_proj",
164
- "model.layers.8.mlp.shared_experts.gate_proj",
165
- "model.layers.9.mlp.shared_experts.gate_proj"
166
  ],
167
  "quant_algo": "W8A16",
168
  "producer": {
 
115
  }
116
  },
117
  "ignore": [
118
+ "lm_head"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  ],
120
  "quant_algo": "W8A16",
121
  "producer": {
hf_quant_config.json CHANGED
@@ -7,54 +7,7 @@
7
  "quant_algo": "W8A16",
8
  "kv_cache_quant_algo": null,
9
  "exclude_modules": [
10
- "lm_head",
11
- "model.layers.0.mlp.gate_proj",
12
- "model.layers.1.mlp.shared_experts.gate_proj",
13
- "model.layers.10.mlp.shared_experts.gate_proj",
14
- "model.layers.11.mlp.shared_experts.gate_proj",
15
- "model.layers.12.mlp.shared_experts.gate_proj",
16
- "model.layers.13.mlp.shared_experts.gate_proj",
17
- "model.layers.14.mlp.shared_experts.gate_proj",
18
- "model.layers.15.mlp.shared_experts.gate_proj",
19
- "model.layers.16.mlp.shared_experts.gate_proj",
20
- "model.layers.17.mlp.shared_experts.gate_proj",
21
- "model.layers.18.mlp.shared_experts.gate_proj",
22
- "model.layers.19.mlp.shared_experts.gate_proj",
23
- "model.layers.2.mlp.shared_experts.gate_proj",
24
- "model.layers.20.mlp.shared_experts.gate_proj",
25
- "model.layers.21.mlp.shared_experts.gate_proj",
26
- "model.layers.22.mlp.shared_experts.gate_proj",
27
- "model.layers.23.mlp.shared_experts.gate_proj",
28
- "model.layers.24.mlp.shared_experts.gate_proj",
29
- "model.layers.25.mlp.shared_experts.gate_proj",
30
- "model.layers.26.mlp.shared_experts.gate_proj",
31
- "model.layers.27.mlp.shared_experts.gate_proj",
32
- "model.layers.28.mlp.shared_experts.gate_proj",
33
- "model.layers.29.mlp.shared_experts.gate_proj",
34
- "model.layers.3.mlp.shared_experts.gate_proj",
35
- "model.layers.30.mlp.shared_experts.gate_proj",
36
- "model.layers.31.mlp.shared_experts.gate_proj",
37
- "model.layers.32.mlp.shared_experts.gate_proj",
38
- "model.layers.33.mlp.shared_experts.gate_proj",
39
- "model.layers.34.mlp.shared_experts.gate_proj",
40
- "model.layers.35.mlp.shared_experts.gate_proj",
41
- "model.layers.36.mlp.shared_experts.gate_proj",
42
- "model.layers.37.mlp.shared_experts.gate_proj",
43
- "model.layers.38.mlp.shared_experts.gate_proj",
44
- "model.layers.39.mlp.shared_experts.gate_proj",
45
- "model.layers.4.mlp.shared_experts.gate_proj",
46
- "model.layers.40.mlp.shared_experts.gate_proj",
47
- "model.layers.41.mlp.shared_experts.gate_proj",
48
- "model.layers.42.mlp.shared_experts.gate_proj",
49
- "model.layers.43.mlp.shared_experts.gate_proj",
50
- "model.layers.44.mlp.shared_experts.gate_proj",
51
- "model.layers.45.mlp.shared_experts.gate_proj",
52
- "model.layers.46.mlp.shared_experts.gate_proj",
53
- "model.layers.5.mlp.shared_experts.gate_proj",
54
- "model.layers.6.mlp.shared_experts.gate_proj",
55
- "model.layers.7.mlp.shared_experts.gate_proj",
56
- "model.layers.8.mlp.shared_experts.gate_proj",
57
- "model.layers.9.mlp.shared_experts.gate_proj"
58
  ]
59
  }
60
  }
 
7
  "quant_algo": "W8A16",
8
  "kv_cache_quant_algo": null,
9
  "exclude_modules": [
10
+ "lm_head"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  ]
12
  }
13
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b89f4089f79e8ed75ffab5502f411e0f163ec1d4dc31ab173b8490bd9473ca31
3
- size 49839252352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d16fe9402a1d9d91bd042840e31cfa7dd9f7d13e685824387c7bf1d5a2d3001b
3
+ size 49695882864
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84dad1ccfe088bca7359a69f5d4833b30b3633b9db383da11cfeb074ae011f63
3
- size 8698788944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:385786ac66776950162d4e7b1da147f06ec68dfd297c1511a65b17d5a5812d3e
3
+ size 8676812696
model.safetensors.index.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "metadata": {
3
  "total_parameters": 29943390976,
4
- "total_size": 58536807424
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00001-of-00002.safetensors",
@@ -10,6 +10,7 @@
10
  "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
11
  "model.layers.0.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
12
  "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
 
13
  "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
14
  "model.layers.0.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
15
  "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -223,6 +224,7 @@
223
  "model.layers.1.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
224
  "model.layers.1.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
225
  "model.layers.1.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
226
  "model.layers.1.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
227
  "model.layers.1.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
228
  "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -436,6 +438,7 @@
436
  "model.layers.10.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
437
  "model.layers.10.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
438
  "model.layers.10.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
439
  "model.layers.10.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
440
  "model.layers.10.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
441
  "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -649,6 +652,7 @@
649
  "model.layers.11.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
650
  "model.layers.11.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
651
  "model.layers.11.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
652
  "model.layers.11.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
653
  "model.layers.11.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
654
  "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -862,6 +866,7 @@
862
  "model.layers.12.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
863
  "model.layers.12.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
864
  "model.layers.12.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
865
  "model.layers.12.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
866
  "model.layers.12.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
867
  "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -1075,6 +1080,7 @@
1075
  "model.layers.13.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
1076
  "model.layers.13.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
1077
  "model.layers.13.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
1078
  "model.layers.13.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
1079
  "model.layers.13.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
1080
  "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -1288,6 +1294,7 @@
1288
  "model.layers.14.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
1289
  "model.layers.14.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
1290
  "model.layers.14.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
1291
  "model.layers.14.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
1292
  "model.layers.14.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
1293
  "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -1501,6 +1508,7 @@
1501
  "model.layers.15.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
1502
  "model.layers.15.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
1503
  "model.layers.15.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
1504
  "model.layers.15.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
1505
  "model.layers.15.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
1506
  "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -1714,6 +1722,7 @@
1714
  "model.layers.16.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
1715
  "model.layers.16.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
1716
  "model.layers.16.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
1717
  "model.layers.16.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
1718
  "model.layers.16.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
1719
  "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -1927,6 +1936,7 @@
1927
  "model.layers.17.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
1928
  "model.layers.17.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
1929
  "model.layers.17.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
1930
  "model.layers.17.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
1931
  "model.layers.17.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
1932
  "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -2140,6 +2150,7 @@
2140
  "model.layers.18.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
2141
  "model.layers.18.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
2142
  "model.layers.18.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
2143
  "model.layers.18.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
2144
  "model.layers.18.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
2145
  "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -2353,6 +2364,7 @@
2353
  "model.layers.19.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
2354
  "model.layers.19.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
2355
  "model.layers.19.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
2356
  "model.layers.19.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
2357
  "model.layers.19.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
2358
  "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -2566,6 +2578,7 @@
2566
  "model.layers.2.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
2567
  "model.layers.2.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
2568
  "model.layers.2.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
2569
  "model.layers.2.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
2570
  "model.layers.2.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
2571
  "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -2779,6 +2792,7 @@
2779
  "model.layers.20.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
2780
  "model.layers.20.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
2781
  "model.layers.20.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
2782
  "model.layers.20.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
2783
  "model.layers.20.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
2784
  "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -2992,6 +3006,7 @@
2992
  "model.layers.21.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
2993
  "model.layers.21.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
2994
  "model.layers.21.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
2995
  "model.layers.21.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
2996
  "model.layers.21.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
2997
  "model.layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -3205,6 +3220,7 @@
3205
  "model.layers.22.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
3206
  "model.layers.22.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
3207
  "model.layers.22.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
3208
  "model.layers.22.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
3209
  "model.layers.22.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
3210
  "model.layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -3418,6 +3434,7 @@
3418
  "model.layers.23.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
3419
  "model.layers.23.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
3420
  "model.layers.23.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
3421
  "model.layers.23.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
3422
  "model.layers.23.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
3423
  "model.layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -3631,6 +3648,7 @@
3631
  "model.layers.24.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
3632
  "model.layers.24.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
3633
  "model.layers.24.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
3634
  "model.layers.24.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
3635
  "model.layers.24.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
3636
  "model.layers.24.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -3844,6 +3862,7 @@
3844
  "model.layers.25.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
3845
  "model.layers.25.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
3846
  "model.layers.25.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
3847
  "model.layers.25.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
3848
  "model.layers.25.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
3849
  "model.layers.25.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -4057,6 +4076,7 @@
4057
  "model.layers.26.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
4058
  "model.layers.26.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
4059
  "model.layers.26.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
4060
  "model.layers.26.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
4061
  "model.layers.26.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
4062
  "model.layers.26.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -4270,6 +4290,7 @@
4270
  "model.layers.27.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
4271
  "model.layers.27.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
4272
  "model.layers.27.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
4273
  "model.layers.27.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
4274
  "model.layers.27.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
4275
  "model.layers.27.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -4483,6 +4504,7 @@
4483
  "model.layers.28.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
4484
  "model.layers.28.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
4485
  "model.layers.28.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
4486
  "model.layers.28.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
4487
  "model.layers.28.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
4488
  "model.layers.28.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -4696,6 +4718,7 @@
4696
  "model.layers.29.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
4697
  "model.layers.29.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
4698
  "model.layers.29.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
4699
  "model.layers.29.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
4700
  "model.layers.29.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
4701
  "model.layers.29.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -4909,6 +4932,7 @@
4909
  "model.layers.3.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
4910
  "model.layers.3.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
4911
  "model.layers.3.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
4912
  "model.layers.3.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
4913
  "model.layers.3.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
4914
  "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -5122,6 +5146,7 @@
5122
  "model.layers.30.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
5123
  "model.layers.30.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
5124
  "model.layers.30.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
5125
  "model.layers.30.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
5126
  "model.layers.30.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
5127
  "model.layers.30.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -5335,6 +5360,7 @@
5335
  "model.layers.31.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
5336
  "model.layers.31.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
5337
  "model.layers.31.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
5338
  "model.layers.31.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
5339
  "model.layers.31.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
5340
  "model.layers.31.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -5548,6 +5574,7 @@
5548
  "model.layers.32.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
5549
  "model.layers.32.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
5550
  "model.layers.32.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
5551
  "model.layers.32.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
5552
  "model.layers.32.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
5553
  "model.layers.32.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -5761,6 +5788,7 @@
5761
  "model.layers.33.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
5762
  "model.layers.33.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
5763
  "model.layers.33.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
5764
  "model.layers.33.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
5765
  "model.layers.33.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
5766
  "model.layers.33.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -5974,6 +6002,7 @@
5974
  "model.layers.34.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
5975
  "model.layers.34.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
5976
  "model.layers.34.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
5977
  "model.layers.34.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
5978
  "model.layers.34.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
5979
  "model.layers.34.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -6187,6 +6216,7 @@
6187
  "model.layers.35.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
6188
  "model.layers.35.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
6189
  "model.layers.35.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
6190
  "model.layers.35.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
6191
  "model.layers.35.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
6192
  "model.layers.35.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -6400,6 +6430,7 @@
6400
  "model.layers.36.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
6401
  "model.layers.36.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
6402
  "model.layers.36.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
6403
  "model.layers.36.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
6404
  "model.layers.36.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
6405
  "model.layers.36.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -6613,6 +6644,7 @@
6613
  "model.layers.37.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
6614
  "model.layers.37.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
6615
  "model.layers.37.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
6616
  "model.layers.37.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
6617
  "model.layers.37.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
6618
  "model.layers.37.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -6826,6 +6858,7 @@
6826
  "model.layers.38.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
6827
  "model.layers.38.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
6828
  "model.layers.38.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
6829
  "model.layers.38.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
6830
  "model.layers.38.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
6831
  "model.layers.38.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -7039,6 +7072,7 @@
7039
  "model.layers.39.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
7040
  "model.layers.39.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
7041
  "model.layers.39.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
7042
  "model.layers.39.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
7043
  "model.layers.39.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
7044
  "model.layers.39.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -7252,6 +7286,7 @@
7252
  "model.layers.4.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
7253
  "model.layers.4.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
7254
  "model.layers.4.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
7255
  "model.layers.4.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
7256
  "model.layers.4.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
7257
  "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -7465,6 +7500,7 @@
7465
  "model.layers.40.mlp.shared_experts.down_proj.weight": "model-00002-of-00002.safetensors",
7466
  "model.layers.40.mlp.shared_experts.down_proj.weight_scale": "model-00002-of-00002.safetensors",
7467
  "model.layers.40.mlp.shared_experts.gate_proj.weight": "model-00002-of-00002.safetensors",
 
7468
  "model.layers.40.mlp.shared_experts.up_proj.weight": "model-00002-of-00002.safetensors",
7469
  "model.layers.40.mlp.shared_experts.up_proj.weight_scale": "model-00002-of-00002.safetensors",
7470
  "model.layers.40.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
@@ -7678,6 +7714,7 @@
7678
  "model.layers.41.mlp.shared_experts.down_proj.weight": "model-00002-of-00002.safetensors",
7679
  "model.layers.41.mlp.shared_experts.down_proj.weight_scale": "model-00002-of-00002.safetensors",
7680
  "model.layers.41.mlp.shared_experts.gate_proj.weight": "model-00002-of-00002.safetensors",
 
7681
  "model.layers.41.mlp.shared_experts.up_proj.weight": "model-00002-of-00002.safetensors",
7682
  "model.layers.41.mlp.shared_experts.up_proj.weight_scale": "model-00002-of-00002.safetensors",
7683
  "model.layers.41.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
@@ -7891,6 +7928,7 @@
7891
  "model.layers.42.mlp.shared_experts.down_proj.weight": "model-00002-of-00002.safetensors",
7892
  "model.layers.42.mlp.shared_experts.down_proj.weight_scale": "model-00002-of-00002.safetensors",
7893
  "model.layers.42.mlp.shared_experts.gate_proj.weight": "model-00002-of-00002.safetensors",
 
7894
  "model.layers.42.mlp.shared_experts.up_proj.weight": "model-00002-of-00002.safetensors",
7895
  "model.layers.42.mlp.shared_experts.up_proj.weight_scale": "model-00002-of-00002.safetensors",
7896
  "model.layers.42.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
@@ -8104,6 +8142,7 @@
8104
  "model.layers.43.mlp.shared_experts.down_proj.weight": "model-00002-of-00002.safetensors",
8105
  "model.layers.43.mlp.shared_experts.down_proj.weight_scale": "model-00002-of-00002.safetensors",
8106
  "model.layers.43.mlp.shared_experts.gate_proj.weight": "model-00002-of-00002.safetensors",
 
8107
  "model.layers.43.mlp.shared_experts.up_proj.weight": "model-00002-of-00002.safetensors",
8108
  "model.layers.43.mlp.shared_experts.up_proj.weight_scale": "model-00002-of-00002.safetensors",
8109
  "model.layers.43.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
@@ -8317,6 +8356,7 @@
8317
  "model.layers.44.mlp.shared_experts.down_proj.weight": "model-00002-of-00002.safetensors",
8318
  "model.layers.44.mlp.shared_experts.down_proj.weight_scale": "model-00002-of-00002.safetensors",
8319
  "model.layers.44.mlp.shared_experts.gate_proj.weight": "model-00002-of-00002.safetensors",
 
8320
  "model.layers.44.mlp.shared_experts.up_proj.weight": "model-00002-of-00002.safetensors",
8321
  "model.layers.44.mlp.shared_experts.up_proj.weight_scale": "model-00002-of-00002.safetensors",
8322
  "model.layers.44.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
@@ -8530,6 +8570,7 @@
8530
  "model.layers.45.mlp.shared_experts.down_proj.weight": "model-00002-of-00002.safetensors",
8531
  "model.layers.45.mlp.shared_experts.down_proj.weight_scale": "model-00002-of-00002.safetensors",
8532
  "model.layers.45.mlp.shared_experts.gate_proj.weight": "model-00002-of-00002.safetensors",
 
8533
  "model.layers.45.mlp.shared_experts.up_proj.weight": "model-00002-of-00002.safetensors",
8534
  "model.layers.45.mlp.shared_experts.up_proj.weight_scale": "model-00002-of-00002.safetensors",
8535
  "model.layers.45.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
@@ -8743,6 +8784,7 @@
8743
  "model.layers.46.mlp.shared_experts.down_proj.weight": "model-00002-of-00002.safetensors",
8744
  "model.layers.46.mlp.shared_experts.down_proj.weight_scale": "model-00002-of-00002.safetensors",
8745
  "model.layers.46.mlp.shared_experts.gate_proj.weight": "model-00002-of-00002.safetensors",
 
8746
  "model.layers.46.mlp.shared_experts.up_proj.weight": "model-00002-of-00002.safetensors",
8747
  "model.layers.46.mlp.shared_experts.up_proj.weight_scale": "model-00002-of-00002.safetensors",
8748
  "model.layers.46.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
@@ -8956,6 +8998,7 @@
8956
  "model.layers.5.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
8957
  "model.layers.5.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
8958
  "model.layers.5.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
8959
  "model.layers.5.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
8960
  "model.layers.5.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
8961
  "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -9169,6 +9212,7 @@
9169
  "model.layers.6.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
9170
  "model.layers.6.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
9171
  "model.layers.6.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
9172
  "model.layers.6.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
9173
  "model.layers.6.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
9174
  "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -9382,6 +9426,7 @@
9382
  "model.layers.7.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
9383
  "model.layers.7.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
9384
  "model.layers.7.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
9385
  "model.layers.7.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
9386
  "model.layers.7.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
9387
  "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -9595,6 +9640,7 @@
9595
  "model.layers.8.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
9596
  "model.layers.8.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
9597
  "model.layers.8.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
9598
  "model.layers.8.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
9599
  "model.layers.8.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
9600
  "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
@@ -9808,6 +9854,7 @@
9808
  "model.layers.9.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
9809
  "model.layers.9.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
9810
  "model.layers.9.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
 
9811
  "model.layers.9.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
9812
  "model.layers.9.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
9813
  "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
  "total_parameters": 29943390976,
4
+ "total_size": 58371456000
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00001-of-00002.safetensors",
 
10
  "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
11
  "model.layers.0.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
12
  "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
13
+ "model.layers.0.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
14
  "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
15
  "model.layers.0.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
16
  "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
224
  "model.layers.1.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
225
  "model.layers.1.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
226
  "model.layers.1.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
227
+ "model.layers.1.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
228
  "model.layers.1.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
229
  "model.layers.1.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
230
  "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
438
  "model.layers.10.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
439
  "model.layers.10.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
440
  "model.layers.10.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
441
+ "model.layers.10.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
442
  "model.layers.10.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
443
  "model.layers.10.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
444
  "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
652
  "model.layers.11.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
653
  "model.layers.11.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
654
  "model.layers.11.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
655
+ "model.layers.11.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
656
  "model.layers.11.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
657
  "model.layers.11.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
658
  "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
866
  "model.layers.12.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
867
  "model.layers.12.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
868
  "model.layers.12.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
869
+ "model.layers.12.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
870
  "model.layers.12.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
871
  "model.layers.12.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
872
  "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
1080
  "model.layers.13.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
1081
  "model.layers.13.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
1082
  "model.layers.13.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
1083
+ "model.layers.13.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
1084
  "model.layers.13.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
1085
  "model.layers.13.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
1086
  "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
1294
  "model.layers.14.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
1295
  "model.layers.14.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
1296
  "model.layers.14.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
1297
+ "model.layers.14.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
1298
  "model.layers.14.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
1299
  "model.layers.14.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
1300
  "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
1508
  "model.layers.15.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
1509
  "model.layers.15.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
1510
  "model.layers.15.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
1511
+ "model.layers.15.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
1512
  "model.layers.15.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
1513
  "model.layers.15.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
1514
  "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
1722
  "model.layers.16.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
1723
  "model.layers.16.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
1724
  "model.layers.16.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
1725
+ "model.layers.16.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
1726
  "model.layers.16.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
1727
  "model.layers.16.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
1728
  "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
1936
  "model.layers.17.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
1937
  "model.layers.17.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
1938
  "model.layers.17.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
1939
+ "model.layers.17.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
1940
  "model.layers.17.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
1941
  "model.layers.17.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
1942
  "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
2150
  "model.layers.18.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
2151
  "model.layers.18.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
2152
  "model.layers.18.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
2153
+ "model.layers.18.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
2154
  "model.layers.18.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
2155
  "model.layers.18.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
2156
  "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
2364
  "model.layers.19.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
2365
  "model.layers.19.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
2366
  "model.layers.19.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
2367
+ "model.layers.19.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
2368
  "model.layers.19.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
2369
  "model.layers.19.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
2370
  "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
2578
  "model.layers.2.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
2579
  "model.layers.2.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
2580
  "model.layers.2.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
2581
+ "model.layers.2.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
2582
  "model.layers.2.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
2583
  "model.layers.2.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
2584
  "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
2792
  "model.layers.20.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
2793
  "model.layers.20.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
2794
  "model.layers.20.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
2795
+ "model.layers.20.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
2796
  "model.layers.20.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
2797
  "model.layers.20.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
2798
  "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
3006
  "model.layers.21.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
3007
  "model.layers.21.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
3008
  "model.layers.21.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
3009
+ "model.layers.21.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
3010
  "model.layers.21.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
3011
  "model.layers.21.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
3012
  "model.layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
3220
  "model.layers.22.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
3221
  "model.layers.22.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
3222
  "model.layers.22.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
3223
+ "model.layers.22.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
3224
  "model.layers.22.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
3225
  "model.layers.22.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
3226
  "model.layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
3434
  "model.layers.23.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
3435
  "model.layers.23.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
3436
  "model.layers.23.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
3437
+ "model.layers.23.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
3438
  "model.layers.23.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
3439
  "model.layers.23.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
3440
  "model.layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
3648
  "model.layers.24.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
3649
  "model.layers.24.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
3650
  "model.layers.24.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
3651
+ "model.layers.24.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
3652
  "model.layers.24.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
3653
  "model.layers.24.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
3654
  "model.layers.24.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
3862
  "model.layers.25.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
3863
  "model.layers.25.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
3864
  "model.layers.25.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
3865
+ "model.layers.25.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
3866
  "model.layers.25.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
3867
  "model.layers.25.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
3868
  "model.layers.25.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
4076
  "model.layers.26.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
4077
  "model.layers.26.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
4078
  "model.layers.26.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
4079
+ "model.layers.26.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
4080
  "model.layers.26.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
4081
  "model.layers.26.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
4082
  "model.layers.26.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
4290
  "model.layers.27.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
4291
  "model.layers.27.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
4292
  "model.layers.27.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
4293
+ "model.layers.27.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
4294
  "model.layers.27.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
4295
  "model.layers.27.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
4296
  "model.layers.27.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
4504
  "model.layers.28.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
4505
  "model.layers.28.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
4506
  "model.layers.28.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
4507
+ "model.layers.28.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
4508
  "model.layers.28.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
4509
  "model.layers.28.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
4510
  "model.layers.28.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
4718
  "model.layers.29.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
4719
  "model.layers.29.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
4720
  "model.layers.29.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
4721
+ "model.layers.29.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
4722
  "model.layers.29.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
4723
  "model.layers.29.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
4724
  "model.layers.29.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
4932
  "model.layers.3.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
4933
  "model.layers.3.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
4934
  "model.layers.3.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
4935
+ "model.layers.3.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
4936
  "model.layers.3.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
4937
  "model.layers.3.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
4938
  "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
5146
  "model.layers.30.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
5147
  "model.layers.30.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
5148
  "model.layers.30.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
5149
+ "model.layers.30.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
5150
  "model.layers.30.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
5151
  "model.layers.30.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
5152
  "model.layers.30.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
5360
  "model.layers.31.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
5361
  "model.layers.31.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
5362
  "model.layers.31.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
5363
+ "model.layers.31.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
5364
  "model.layers.31.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
5365
  "model.layers.31.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
5366
  "model.layers.31.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
5574
  "model.layers.32.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
5575
  "model.layers.32.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
5576
  "model.layers.32.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
5577
+ "model.layers.32.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
5578
  "model.layers.32.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
5579
  "model.layers.32.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
5580
  "model.layers.32.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
5788
  "model.layers.33.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
5789
  "model.layers.33.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
5790
  "model.layers.33.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
5791
+ "model.layers.33.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
5792
  "model.layers.33.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
5793
  "model.layers.33.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
5794
  "model.layers.33.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
6002
  "model.layers.34.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
6003
  "model.layers.34.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
6004
  "model.layers.34.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
6005
+ "model.layers.34.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
6006
  "model.layers.34.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
6007
  "model.layers.34.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
6008
  "model.layers.34.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
6216
  "model.layers.35.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
6217
  "model.layers.35.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
6218
  "model.layers.35.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
6219
+ "model.layers.35.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
6220
  "model.layers.35.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
6221
  "model.layers.35.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
6222
  "model.layers.35.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
6430
  "model.layers.36.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
6431
  "model.layers.36.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
6432
  "model.layers.36.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
6433
+ "model.layers.36.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
6434
  "model.layers.36.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
6435
  "model.layers.36.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
6436
  "model.layers.36.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
6644
  "model.layers.37.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
6645
  "model.layers.37.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
6646
  "model.layers.37.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
6647
+ "model.layers.37.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
6648
  "model.layers.37.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
6649
  "model.layers.37.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
6650
  "model.layers.37.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
6858
  "model.layers.38.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
6859
  "model.layers.38.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
6860
  "model.layers.38.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
6861
+ "model.layers.38.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
6862
  "model.layers.38.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
6863
  "model.layers.38.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
6864
  "model.layers.38.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
7072
  "model.layers.39.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
7073
  "model.layers.39.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
7074
  "model.layers.39.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
7075
+ "model.layers.39.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
7076
  "model.layers.39.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
7077
  "model.layers.39.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
7078
  "model.layers.39.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
7286
  "model.layers.4.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
7287
  "model.layers.4.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
7288
  "model.layers.4.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
7289
+ "model.layers.4.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
7290
  "model.layers.4.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
7291
  "model.layers.4.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
7292
  "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
7500
  "model.layers.40.mlp.shared_experts.down_proj.weight": "model-00002-of-00002.safetensors",
7501
  "model.layers.40.mlp.shared_experts.down_proj.weight_scale": "model-00002-of-00002.safetensors",
7502
  "model.layers.40.mlp.shared_experts.gate_proj.weight": "model-00002-of-00002.safetensors",
7503
+ "model.layers.40.mlp.shared_experts.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
7504
  "model.layers.40.mlp.shared_experts.up_proj.weight": "model-00002-of-00002.safetensors",
7505
  "model.layers.40.mlp.shared_experts.up_proj.weight_scale": "model-00002-of-00002.safetensors",
7506
  "model.layers.40.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
 
7714
  "model.layers.41.mlp.shared_experts.down_proj.weight": "model-00002-of-00002.safetensors",
7715
  "model.layers.41.mlp.shared_experts.down_proj.weight_scale": "model-00002-of-00002.safetensors",
7716
  "model.layers.41.mlp.shared_experts.gate_proj.weight": "model-00002-of-00002.safetensors",
7717
+ "model.layers.41.mlp.shared_experts.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
7718
  "model.layers.41.mlp.shared_experts.up_proj.weight": "model-00002-of-00002.safetensors",
7719
  "model.layers.41.mlp.shared_experts.up_proj.weight_scale": "model-00002-of-00002.safetensors",
7720
  "model.layers.41.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
 
7928
  "model.layers.42.mlp.shared_experts.down_proj.weight": "model-00002-of-00002.safetensors",
7929
  "model.layers.42.mlp.shared_experts.down_proj.weight_scale": "model-00002-of-00002.safetensors",
7930
  "model.layers.42.mlp.shared_experts.gate_proj.weight": "model-00002-of-00002.safetensors",
7931
+ "model.layers.42.mlp.shared_experts.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
7932
  "model.layers.42.mlp.shared_experts.up_proj.weight": "model-00002-of-00002.safetensors",
7933
  "model.layers.42.mlp.shared_experts.up_proj.weight_scale": "model-00002-of-00002.safetensors",
7934
  "model.layers.42.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
 
8142
  "model.layers.43.mlp.shared_experts.down_proj.weight": "model-00002-of-00002.safetensors",
8143
  "model.layers.43.mlp.shared_experts.down_proj.weight_scale": "model-00002-of-00002.safetensors",
8144
  "model.layers.43.mlp.shared_experts.gate_proj.weight": "model-00002-of-00002.safetensors",
8145
+ "model.layers.43.mlp.shared_experts.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
8146
  "model.layers.43.mlp.shared_experts.up_proj.weight": "model-00002-of-00002.safetensors",
8147
  "model.layers.43.mlp.shared_experts.up_proj.weight_scale": "model-00002-of-00002.safetensors",
8148
  "model.layers.43.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
 
8356
  "model.layers.44.mlp.shared_experts.down_proj.weight": "model-00002-of-00002.safetensors",
8357
  "model.layers.44.mlp.shared_experts.down_proj.weight_scale": "model-00002-of-00002.safetensors",
8358
  "model.layers.44.mlp.shared_experts.gate_proj.weight": "model-00002-of-00002.safetensors",
8359
+ "model.layers.44.mlp.shared_experts.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
8360
  "model.layers.44.mlp.shared_experts.up_proj.weight": "model-00002-of-00002.safetensors",
8361
  "model.layers.44.mlp.shared_experts.up_proj.weight_scale": "model-00002-of-00002.safetensors",
8362
  "model.layers.44.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
 
8570
  "model.layers.45.mlp.shared_experts.down_proj.weight": "model-00002-of-00002.safetensors",
8571
  "model.layers.45.mlp.shared_experts.down_proj.weight_scale": "model-00002-of-00002.safetensors",
8572
  "model.layers.45.mlp.shared_experts.gate_proj.weight": "model-00002-of-00002.safetensors",
8573
+ "model.layers.45.mlp.shared_experts.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
8574
  "model.layers.45.mlp.shared_experts.up_proj.weight": "model-00002-of-00002.safetensors",
8575
  "model.layers.45.mlp.shared_experts.up_proj.weight_scale": "model-00002-of-00002.safetensors",
8576
  "model.layers.45.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
 
8784
  "model.layers.46.mlp.shared_experts.down_proj.weight": "model-00002-of-00002.safetensors",
8785
  "model.layers.46.mlp.shared_experts.down_proj.weight_scale": "model-00002-of-00002.safetensors",
8786
  "model.layers.46.mlp.shared_experts.gate_proj.weight": "model-00002-of-00002.safetensors",
8787
+ "model.layers.46.mlp.shared_experts.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
8788
  "model.layers.46.mlp.shared_experts.up_proj.weight": "model-00002-of-00002.safetensors",
8789
  "model.layers.46.mlp.shared_experts.up_proj.weight_scale": "model-00002-of-00002.safetensors",
8790
  "model.layers.46.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
 
8998
  "model.layers.5.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
8999
  "model.layers.5.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
9000
  "model.layers.5.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
9001
+ "model.layers.5.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
9002
  "model.layers.5.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
9003
  "model.layers.5.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
9004
  "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
9212
  "model.layers.6.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
9213
  "model.layers.6.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
9214
  "model.layers.6.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
9215
+ "model.layers.6.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
9216
  "model.layers.6.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
9217
  "model.layers.6.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
9218
  "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
9426
  "model.layers.7.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
9427
  "model.layers.7.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
9428
  "model.layers.7.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
9429
+ "model.layers.7.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
9430
  "model.layers.7.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
9431
  "model.layers.7.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
9432
  "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
9640
  "model.layers.8.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
9641
  "model.layers.8.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
9642
  "model.layers.8.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
9643
+ "model.layers.8.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
9644
  "model.layers.8.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
9645
  "model.layers.8.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
9646
  "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
 
9854
  "model.layers.9.mlp.shared_experts.down_proj.weight": "model-00001-of-00002.safetensors",
9855
  "model.layers.9.mlp.shared_experts.down_proj.weight_scale": "model-00001-of-00002.safetensors",
9856
  "model.layers.9.mlp.shared_experts.gate_proj.weight": "model-00001-of-00002.safetensors",
9857
+ "model.layers.9.mlp.shared_experts.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
9858
  "model.layers.9.mlp.shared_experts.up_proj.weight": "model-00001-of-00002.safetensors",
9859
  "model.layers.9.mlp.shared_experts.up_proj.weight_scale": "model-00001-of-00002.safetensors",
9860
  "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",