Upload folder using huggingface_hub
Browse files- config.json +1 -1
- model-00001-of-00004.safetensors +2 -2
- model-00002-of-00004.safetensors +2 -2
- model-00003-of-00004.safetensors +2 -2
- model-00004-of-00004.safetensors +2 -2
- model.safetensors.index.json +47 -47
config.json
CHANGED
|
@@ -9,7 +9,7 @@
|
|
| 9 |
"hidden_act": "silu",
|
| 10 |
"hidden_size": 5120,
|
| 11 |
"initializer_range": 0.02,
|
| 12 |
-
"intermediate_size":
|
| 13 |
"max_position_embeddings": 131072,
|
| 14 |
"model_type": "mistral",
|
| 15 |
"num_attention_heads": 32,
|
|
|
|
| 9 |
"hidden_act": "silu",
|
| 10 |
"hidden_size": 5120,
|
| 11 |
"initializer_range": 0.02,
|
| 12 |
+
"intermediate_size": 8192,
|
| 13 |
"max_position_embeddings": 131072,
|
| 14 |
"model_type": "mistral",
|
| 15 |
"num_attention_heads": 32,
|
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92977a63d0de7d2e968abf3c6ed919f64328cb17a701d341e1d58100b60c1348
|
| 3 |
+
size 4970465928
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f58eff1a8d0c8ece6db305126f4814358ce48a46d79152afe99af362a69c56c9
|
| 3 |
+
size 4991523128
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a066e36b27eef690dcd7ba1d7218966f292081f3fa848287f22fed4a04e5813
|
| 3 |
+
size 4991523128
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:96de783d8222d1e08e1c77f627afd970f9079caa3b2fa5456c4e11ac65093eb7
|
| 3 |
+
size 1992347560
|
model.safetensors.index.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
-
"total_parameters":
|
| 4 |
-
"total_size":
|
| 5 |
},
|
| 6 |
"weight_map": {
|
| 7 |
"lm_head.weight": "model-00004-of-00004.safetensors",
|
|
@@ -24,13 +24,13 @@
|
|
| 24 |
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
| 25 |
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
| 26 |
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
| 27 |
-
"model.layers.10.input_layernorm.weight": "model-
|
| 28 |
-
"model.layers.10.mlp.down_proj.weight": "model-
|
| 29 |
-
"model.layers.10.mlp.gate_proj.weight": "model-
|
| 30 |
-
"model.layers.10.mlp.up_proj.weight": "model-
|
| 31 |
-
"model.layers.10.post_attention_layernorm.weight": "model-
|
| 32 |
"model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
| 33 |
-
"model.layers.10.self_attn.o_proj.weight": "model-
|
| 34 |
"model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
| 35 |
"model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
| 36 |
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
|
@@ -38,10 +38,10 @@
|
|
| 38 |
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
| 39 |
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
| 40 |
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 41 |
-
"model.layers.11.self_attn.k_proj.weight": "model-
|
| 42 |
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
| 43 |
-
"model.layers.11.self_attn.q_proj.weight": "model-
|
| 44 |
-
"model.layers.11.self_attn.v_proj.weight": "model-
|
| 45 |
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 46 |
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 47 |
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
|
@@ -159,33 +159,33 @@
|
|
| 159 |
"model.layers.23.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
| 160 |
"model.layers.23.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 161 |
"model.layers.23.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 162 |
-
"model.layers.24.input_layernorm.weight": "model-
|
| 163 |
-
"model.layers.24.mlp.down_proj.weight": "model-
|
| 164 |
-
"model.layers.24.mlp.gate_proj.weight": "model-
|
| 165 |
-
"model.layers.24.mlp.up_proj.weight": "model-
|
| 166 |
-
"model.layers.24.post_attention_layernorm.weight": "model-
|
| 167 |
"model.layers.24.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 168 |
-
"model.layers.24.self_attn.o_proj.weight": "model-
|
| 169 |
"model.layers.24.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 170 |
"model.layers.24.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 171 |
-
"model.layers.25.input_layernorm.weight": "model-
|
| 172 |
-
"model.layers.25.mlp.down_proj.weight": "model-
|
| 173 |
-
"model.layers.25.mlp.gate_proj.weight": "model-
|
| 174 |
-
"model.layers.25.mlp.up_proj.weight": "model-
|
| 175 |
-
"model.layers.25.post_attention_layernorm.weight": "model-
|
| 176 |
-
"model.layers.25.self_attn.k_proj.weight": "model-
|
| 177 |
-
"model.layers.25.self_attn.o_proj.weight": "model-
|
| 178 |
-
"model.layers.25.self_attn.q_proj.weight": "model-
|
| 179 |
-
"model.layers.25.self_attn.v_proj.weight": "model-
|
| 180 |
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 181 |
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 182 |
-
"model.layers.26.mlp.gate_proj.weight": "model-
|
| 183 |
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 184 |
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 185 |
-
"model.layers.26.self_attn.k_proj.weight": "model-
|
| 186 |
-
"model.layers.26.self_attn.o_proj.weight": "model-
|
| 187 |
-
"model.layers.26.self_attn.q_proj.weight": "model-
|
| 188 |
-
"model.layers.26.self_attn.v_proj.weight": "model-
|
| 189 |
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 190 |
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 191 |
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
|
@@ -294,24 +294,24 @@
|
|
| 294 |
"model.layers.37.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
| 295 |
"model.layers.37.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 296 |
"model.layers.37.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 297 |
-
"model.layers.38.input_layernorm.weight": "model-
|
| 298 |
-
"model.layers.38.mlp.down_proj.weight": "model-
|
| 299 |
-
"model.layers.38.mlp.gate_proj.weight": "model-
|
| 300 |
-
"model.layers.38.mlp.up_proj.weight": "model-
|
| 301 |
-
"model.layers.38.post_attention_layernorm.weight": "model-
|
| 302 |
"model.layers.38.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
| 303 |
-
"model.layers.38.self_attn.o_proj.weight": "model-
|
| 304 |
"model.layers.38.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 305 |
"model.layers.38.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 306 |
-
"model.layers.39.input_layernorm.weight": "model-
|
| 307 |
-
"model.layers.39.mlp.down_proj.weight": "model-
|
| 308 |
-
"model.layers.39.mlp.gate_proj.weight": "model-
|
| 309 |
-
"model.layers.39.mlp.up_proj.weight": "model-
|
| 310 |
-
"model.layers.39.post_attention_layernorm.weight": "model-
|
| 311 |
-
"model.layers.39.self_attn.k_proj.weight": "model-
|
| 312 |
-
"model.layers.39.self_attn.o_proj.weight": "model-
|
| 313 |
-
"model.layers.39.self_attn.q_proj.weight": "model-
|
| 314 |
-
"model.layers.39.self_attn.v_proj.weight": "model-
|
| 315 |
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 316 |
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
| 317 |
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
|
@@ -366,6 +366,6 @@
|
|
| 366 |
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
| 367 |
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
| 368 |
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
| 369 |
-
"model.norm.weight": "model-
|
| 370 |
}
|
| 371 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
+
"total_parameters": 8472908800,
|
| 4 |
+
"total_size": 16945817600
|
| 5 |
},
|
| 6 |
"weight_map": {
|
| 7 |
"lm_head.weight": "model-00004-of-00004.safetensors",
|
|
|
|
| 24 |
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
| 25 |
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
| 26 |
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
| 27 |
+
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 28 |
+
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 29 |
+
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
| 30 |
+
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
| 31 |
+
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 32 |
"model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
| 33 |
+
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
| 34 |
"model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
| 35 |
"model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
| 36 |
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
|
|
|
| 38 |
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
| 39 |
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
| 40 |
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 41 |
+
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 42 |
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
| 43 |
+
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 44 |
+
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 45 |
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 46 |
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 47 |
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
|
|
|
| 159 |
"model.layers.23.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
| 160 |
"model.layers.23.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 161 |
"model.layers.23.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 162 |
+
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 163 |
+
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 164 |
+
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 165 |
+
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 166 |
+
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 167 |
"model.layers.24.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 168 |
+
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
| 169 |
"model.layers.24.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 170 |
"model.layers.24.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 171 |
+
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 172 |
+
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 173 |
+
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 174 |
+
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 175 |
+
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 176 |
+
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
| 177 |
+
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
| 178 |
+
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 179 |
+
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 180 |
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 181 |
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 182 |
+
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 183 |
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 184 |
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 185 |
+
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
| 186 |
+
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
| 187 |
+
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 188 |
+
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 189 |
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 190 |
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 191 |
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
|
|
|
| 294 |
"model.layers.37.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
| 295 |
"model.layers.37.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 296 |
"model.layers.37.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 297 |
+
"model.layers.38.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 298 |
+
"model.layers.38.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
| 299 |
+
"model.layers.38.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
|
| 300 |
+
"model.layers.38.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
|
| 301 |
+
"model.layers.38.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 302 |
"model.layers.38.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
| 303 |
+
"model.layers.38.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
|
| 304 |
"model.layers.38.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 305 |
"model.layers.38.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 306 |
+
"model.layers.39.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 307 |
+
"model.layers.39.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
| 308 |
+
"model.layers.39.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
|
| 309 |
+
"model.layers.39.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
|
| 310 |
+
"model.layers.39.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 311 |
+
"model.layers.39.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
|
| 312 |
+
"model.layers.39.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
|
| 313 |
+
"model.layers.39.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
|
| 314 |
+
"model.layers.39.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
|
| 315 |
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 316 |
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
| 317 |
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
|
|
|
| 366 |
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
| 367 |
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
| 368 |
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
| 369 |
+
"model.norm.weight": "model-00004-of-00004.safetensors"
|
| 370 |
}
|
| 371 |
}
|