ToastyPigeon commited on
Commit
94a5ea0
·
verified ·
1 Parent(s): 54b6ca9

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -9,7 +9,7 @@
9
  "hidden_act": "silu",
10
  "hidden_size": 5120,
11
  "initializer_range": 0.02,
12
- "intermediate_size": 7168,
13
  "max_position_embeddings": 131072,
14
  "model_type": "mistral",
15
  "num_attention_heads": 32,
 
9
  "hidden_act": "silu",
10
  "hidden_size": 5120,
11
  "initializer_range": 0.02,
12
+ "intermediate_size": 8192,
13
  "max_position_embeddings": 131072,
14
  "model_type": "mistral",
15
  "num_attention_heads": 32,
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:181eb4cc2477fdae2c65f338a8ca8834ba518efb927cbd1af788493f5975955a
3
- size 4980973216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92977a63d0de7d2e968abf3c6ed919f64328cb17a701d341e1d58100b60c1348
3
+ size 4970465928
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0d497b709194f5c0180f3e90faa988d1b130b1c5d02e77190d03cff6d0dd331
3
- size 4991544888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f58eff1a8d0c8ece6db305126f4814358ce48a46d79152afe99af362a69c56c9
3
+ size 4991523128
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47fb6771de986047fb56f3e222415667b186e5f3170984f9beb93e187befccde
3
- size 4372873032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a066e36b27eef690dcd7ba1d7218966f292081f3fa848287f22fed4a04e5813
3
+ size 4991523128
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c51fcd3f6b660c26921cbcfef56951387774cd2c3313e51545df25cb09f9263a
3
- size 1342177408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96de783d8222d1e08e1c77f627afd970f9079caa3b2fa5456c4e11ac65093eb7
3
+ size 1992347560
model.safetensors.index.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "metadata": {
3
- "total_parameters": 7843763200,
4
- "total_size": 15687526400
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00004-of-00004.safetensors",
@@ -24,13 +24,13 @@
24
  "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
25
  "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
26
  "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
27
- "model.layers.10.input_layernorm.weight": "model-00001-of-00004.safetensors",
28
- "model.layers.10.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
29
- "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
30
- "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
31
- "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
32
  "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
33
- "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
34
  "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
35
  "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
36
  "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -38,10 +38,10 @@
38
  "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
39
  "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
40
  "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
41
- "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
42
  "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
43
- "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
44
- "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
45
  "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
46
  "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
47
  "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
@@ -159,33 +159,33 @@
159
  "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
160
  "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
161
  "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
162
- "model.layers.24.input_layernorm.weight": "model-00002-of-00004.safetensors",
163
- "model.layers.24.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
164
- "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
165
- "model.layers.24.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
166
- "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
167
  "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
168
- "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
169
  "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
170
  "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
171
- "model.layers.25.input_layernorm.weight": "model-00002-of-00004.safetensors",
172
- "model.layers.25.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
173
- "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
174
- "model.layers.25.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
175
- "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
176
- "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
177
- "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
178
- "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
179
- "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
180
  "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
181
  "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
182
- "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
183
  "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
184
  "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
185
- "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
186
- "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
187
- "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
188
- "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
189
  "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
190
  "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
191
  "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
@@ -294,24 +294,24 @@
294
  "model.layers.37.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
295
  "model.layers.37.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
296
  "model.layers.37.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
297
- "model.layers.38.input_layernorm.weight": "model-00003-of-00004.safetensors",
298
- "model.layers.38.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
299
- "model.layers.38.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
300
- "model.layers.38.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
301
- "model.layers.38.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
302
  "model.layers.38.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
303
- "model.layers.38.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
304
  "model.layers.38.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
305
  "model.layers.38.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
306
- "model.layers.39.input_layernorm.weight": "model-00003-of-00004.safetensors",
307
- "model.layers.39.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
308
- "model.layers.39.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
309
- "model.layers.39.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
310
- "model.layers.39.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
311
- "model.layers.39.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
312
- "model.layers.39.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
313
- "model.layers.39.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
314
- "model.layers.39.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
315
  "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
316
  "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
317
  "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
@@ -366,6 +366,6 @@
366
  "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
367
  "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
368
  "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
369
- "model.norm.weight": "model-00003-of-00004.safetensors"
370
  }
371
  }
 
1
  {
2
  "metadata": {
3
+ "total_parameters": 8472908800,
4
+ "total_size": 16945817600
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00004-of-00004.safetensors",
 
24
  "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
25
  "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
26
  "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
27
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
28
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
29
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
30
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
31
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
32
  "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
33
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
34
  "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
35
  "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
36
  "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
38
  "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
39
  "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
40
  "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
41
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
42
  "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
43
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
44
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
45
  "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
46
  "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
47
  "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
 
159
  "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
160
  "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
161
  "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
162
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
163
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
164
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
165
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
166
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
167
  "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
168
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
169
  "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
170
  "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
171
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
172
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
173
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
174
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
175
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
176
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
177
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
178
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
179
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
180
  "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
181
  "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
182
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
183
  "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
184
  "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
185
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
186
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
187
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
188
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
189
  "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
190
  "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
191
  "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
 
294
  "model.layers.37.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
295
  "model.layers.37.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
296
  "model.layers.37.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
297
+ "model.layers.38.input_layernorm.weight": "model-00004-of-00004.safetensors",
298
+ "model.layers.38.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
299
+ "model.layers.38.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
300
+ "model.layers.38.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
301
+ "model.layers.38.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
302
  "model.layers.38.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
303
+ "model.layers.38.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
304
  "model.layers.38.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
305
  "model.layers.38.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
306
+ "model.layers.39.input_layernorm.weight": "model-00004-of-00004.safetensors",
307
+ "model.layers.39.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
308
+ "model.layers.39.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
309
+ "model.layers.39.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
310
+ "model.layers.39.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
311
+ "model.layers.39.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
312
+ "model.layers.39.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
313
+ "model.layers.39.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
314
+ "model.layers.39.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
315
  "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
316
  "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
317
  "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
 
366
  "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
367
  "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
368
  "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
369
+ "model.norm.weight": "model-00004-of-00004.safetensors"
370
  }
371
  }