Training completed: lora mode
Browse files- config.json +1 -1
- model-00001-of-00004.safetensors +2 -2
- model-00002-of-00004.safetensors +2 -2
- model-00003-of-00004.safetensors +2 -2
- model-00004-of-00004.safetensors +2 -2
- model.safetensors.index.json +27 -27
config.json
CHANGED
|
@@ -64,5 +64,5 @@
|
|
| 64 |
"transformers_version": "4.57.3",
|
| 65 |
"use_cache": true,
|
| 66 |
"use_sliding_window": false,
|
| 67 |
-
"vocab_size":
|
| 68 |
}
|
|
|
|
| 64 |
"transformers_version": "4.57.3",
|
| 65 |
"use_cache": true,
|
| 66 |
"use_sliding_window": false,
|
| 67 |
+
"vocab_size": 172149
|
| 68 |
}
|
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac441a8c341009c2c2e7303b60809e021b4a4c85883d2883ec2cbc93bfba9c10
|
| 3 |
+
size 4992602008
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95df3569d118243abac6a310ecac2a331f50831cb90bd0dd32c2842ca4078058
|
| 3 |
+
size 4949553568
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e4e1a18759efa122b503668d40c6506dd7edd37f92122f5023e26ee074fbc71
|
| 3 |
+
size 4944309120
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9df5b8499787780210223204855f70581c850bc707daf6a9b0c679a14f434f08
|
| 3 |
+
size 1410434256
|
model.safetensors.index.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
-
"total_parameters":
|
| 4 |
-
"total_size":
|
| 5 |
},
|
| 6 |
"weight_map": {
|
| 7 |
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
|
@@ -126,11 +126,11 @@
|
|
| 126 |
"model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
| 127 |
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 128 |
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 129 |
-
"model.layers.19.input_layernorm.weight": "model-
|
| 130 |
-
"model.layers.19.mlp.down_proj.weight": "model-
|
| 131 |
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
| 132 |
-
"model.layers.19.mlp.up_proj.weight": "model-
|
| 133 |
-
"model.layers.19.post_attention_layernorm.weight": "model-
|
| 134 |
"model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 135 |
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 136 |
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
|
@@ -153,12 +153,12 @@
|
|
| 153 |
"model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 154 |
"model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 155 |
"model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 156 |
-
"model.layers.20.self_attn.k_norm.weight": "model-
|
| 157 |
-
"model.layers.20.self_attn.k_proj.weight": "model-
|
| 158 |
-
"model.layers.20.self_attn.o_proj.weight": "model-
|
| 159 |
-
"model.layers.20.self_attn.q_norm.weight": "model-
|
| 160 |
-
"model.layers.20.self_attn.q_proj.weight": "model-
|
| 161 |
-
"model.layers.20.self_attn.v_proj.weight": "model-
|
| 162 |
"model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 163 |
"model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 164 |
"model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
|
@@ -280,11 +280,11 @@
|
|
| 280 |
"model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
| 281 |
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 282 |
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 283 |
-
"model.layers.31.input_layernorm.weight": "model-
|
| 284 |
-
"model.layers.31.mlp.down_proj.weight": "model-
|
| 285 |
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 286 |
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 287 |
-
"model.layers.31.post_attention_layernorm.weight": "model-
|
| 288 |
"model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 289 |
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
| 290 |
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
|
@@ -293,15 +293,15 @@
|
|
| 293 |
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 294 |
"model.layers.32.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 295 |
"model.layers.32.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
| 296 |
-
"model.layers.32.mlp.gate_proj.weight": "model-
|
| 297 |
"model.layers.32.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
|
| 298 |
"model.layers.32.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 299 |
-
"model.layers.32.self_attn.k_norm.weight": "model-
|
| 300 |
-
"model.layers.32.self_attn.k_proj.weight": "model-
|
| 301 |
-
"model.layers.32.self_attn.o_proj.weight": "model-
|
| 302 |
-
"model.layers.32.self_attn.q_norm.weight": "model-
|
| 303 |
-
"model.layers.32.self_attn.q_proj.weight": "model-
|
| 304 |
-
"model.layers.32.self_attn.v_proj.weight": "model-
|
| 305 |
"model.layers.33.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 306 |
"model.layers.33.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
| 307 |
"model.layers.33.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
|
|
@@ -368,11 +368,11 @@
|
|
| 368 |
"model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
| 369 |
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
| 370 |
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
| 371 |
-
"model.layers.7.input_layernorm.weight": "model-
|
| 372 |
-
"model.layers.7.mlp.down_proj.weight": "model-
|
| 373 |
-
"model.layers.7.mlp.gate_proj.weight": "model-
|
| 374 |
-
"model.layers.7.mlp.up_proj.weight": "model-
|
| 375 |
-
"model.layers.7.post_attention_layernorm.weight": "model-
|
| 376 |
"model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 377 |
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
| 378 |
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
+
"total_parameters": 4074213376,
|
| 4 |
+
"total_size": 16296853504
|
| 5 |
},
|
| 6 |
"weight_map": {
|
| 7 |
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
|
|
|
| 126 |
"model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
| 127 |
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 128 |
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 129 |
+
"model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 130 |
+
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 131 |
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
| 132 |
+
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
| 133 |
+
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 134 |
"model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 135 |
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 136 |
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
|
|
|
| 153 |
"model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 154 |
"model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 155 |
"model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 156 |
+
"model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 157 |
+
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 158 |
+
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
| 159 |
+
"model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
| 160 |
+
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 161 |
+
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 162 |
"model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 163 |
"model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 164 |
"model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
|
|
|
| 280 |
"model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
| 281 |
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 282 |
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 283 |
+
"model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 284 |
+
"model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 285 |
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 286 |
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 287 |
+
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 288 |
"model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 289 |
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
| 290 |
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
|
|
|
| 293 |
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 294 |
"model.layers.32.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 295 |
"model.layers.32.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
| 296 |
+
"model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 297 |
"model.layers.32.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
|
| 298 |
"model.layers.32.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 299 |
+
"model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 300 |
+
"model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
| 301 |
+
"model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
| 302 |
+
"model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
| 303 |
+
"model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 304 |
+
"model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 305 |
"model.layers.33.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 306 |
"model.layers.33.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
| 307 |
"model.layers.33.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
|
|
|
|
| 368 |
"model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
| 369 |
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
| 370 |
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
| 371 |
+
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 372 |
+
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
| 373 |
+
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
| 374 |
+
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
| 375 |
+
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 376 |
"model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 377 |
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
| 378 |
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|