voidful commited on
Commit
6776800
·
verified ·
1 Parent(s): 4a72c13

Training completed: lora mode

Browse files
config.json CHANGED
@@ -64,5 +64,5 @@
64
  "transformers_version": "4.57.3",
65
  "use_cache": true,
66
  "use_sliding_window": false,
67
- "vocab_size": 192629
68
  }
 
64
  "transformers_version": "4.57.3",
65
  "use_cache": true,
66
  "use_sliding_window": false,
67
+ "vocab_size": 172149
68
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e25a7f3c76b73098e4da6c762c26503c9b529aa986bc8537042151e29440f18f
3
- size 4903452008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac441a8c341009c2c2e7303b60809e021b4a4c85883d2883ec2cbc93bfba9c10
3
+ size 4992602008
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad8cacd63923186bf64ba25c12a70e42d78633857f0d1090d8299b4a3d0b6303
3
- size 4944309080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95df3569d118243abac6a310ecac2a331f50831cb90bd0dd32c2842ca4078058
3
+ size 4949553568
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dea94940cc0f1d460ae8c6c5adf26cbea07225d8ecb1d52364319b361be59802
3
- size 4944309104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e4e1a18759efa122b503668d40c6506dd7edd37f92122f5023e26ee074fbc71
3
+ size 4944309120
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53f2092845a9dfbdca8ecf3ceefb424e0ed9e5ca0cfe71b35bc6997e02e94aca
3
- size 1714543944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9df5b8499787780210223204855f70581c850bc707daf6a9b0c679a14f434f08
3
+ size 1410434256
model.safetensors.index.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "metadata": {
3
- "total_parameters": 4126642176,
4
- "total_size": 16506568704
5
  },
6
  "weight_map": {
7
  "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
@@ -126,11 +126,11 @@
126
  "model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
127
  "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
128
  "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
129
- "model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
130
- "model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
131
  "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
132
- "model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
133
- "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
134
  "model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
135
  "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
136
  "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
@@ -153,12 +153,12 @@
153
  "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
154
  "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
155
  "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
156
- "model.layers.20.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
157
- "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
158
- "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
159
- "model.layers.20.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
160
- "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
161
- "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
162
  "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
163
  "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
164
  "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
@@ -280,11 +280,11 @@
280
  "model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
281
  "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
282
  "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
283
- "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
284
- "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
285
  "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
286
  "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
287
- "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
288
  "model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
289
  "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
290
  "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
@@ -293,15 +293,15 @@
293
  "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
294
  "model.layers.32.input_layernorm.weight": "model-00004-of-00004.safetensors",
295
  "model.layers.32.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
296
- "model.layers.32.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
297
  "model.layers.32.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
298
  "model.layers.32.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
299
- "model.layers.32.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
300
- "model.layers.32.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
301
- "model.layers.32.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
302
- "model.layers.32.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
303
- "model.layers.32.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
304
- "model.layers.32.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
305
  "model.layers.33.input_layernorm.weight": "model-00004-of-00004.safetensors",
306
  "model.layers.33.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
307
  "model.layers.33.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
@@ -368,11 +368,11 @@
368
  "model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
369
  "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
370
  "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
371
- "model.layers.7.input_layernorm.weight": "model-00002-of-00004.safetensors",
372
- "model.layers.7.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
373
- "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
374
- "model.layers.7.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
375
- "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
376
  "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
377
  "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
378
  "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_parameters": 4074213376,
4
+ "total_size": 16296853504
5
  },
6
  "weight_map": {
7
  "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
 
126
  "model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
127
  "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
128
  "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
129
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
130
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
131
  "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
132
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
133
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
134
  "model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
135
  "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
136
  "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
 
153
  "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
154
  "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
155
  "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
156
+ "model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
157
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
158
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
159
+ "model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
160
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
161
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
162
  "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
163
  "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
164
  "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
 
280
  "model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
281
  "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
282
  "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
283
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
284
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
285
  "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
286
  "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
287
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
288
  "model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
289
  "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
290
  "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
 
293
  "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
294
  "model.layers.32.input_layernorm.weight": "model-00004-of-00004.safetensors",
295
  "model.layers.32.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
296
+ "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
297
  "model.layers.32.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
298
  "model.layers.32.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
299
+ "model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
300
+ "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
301
+ "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
302
+ "model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
303
+ "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
304
+ "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
305
  "model.layers.33.input_layernorm.weight": "model-00004-of-00004.safetensors",
306
  "model.layers.33.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
307
  "model.layers.33.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
 
368
  "model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
369
  "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
370
  "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
371
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
372
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
373
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
374
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
375
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
376
  "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
377
  "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
378
  "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",