voidful commited on
Commit
ce21e33
·
verified ·
1 Parent(s): 31c7bb9

Training completed: lora mode

Browse files
config.json CHANGED
@@ -64,5 +64,5 @@
64
  "transformers_version": "4.57.3",
65
  "use_cache": true,
66
  "use_sliding_window": false,
67
- "vocab_size": 192629
68
  }
 
64
  "transformers_version": "4.57.3",
65
  "use_cache": true,
66
  "use_sliding_window": false,
67
+ "vocab_size": 172149
68
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca3c45ba437b6059fe3b3ae0881fa318194fc31d1c5f8e271416609efbe67dc4
3
- size 4903452008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ceccc0f70ecdf4be84652f8bdf024f49f1f1e9c1332d0b0cddcd424d377cdf22
3
+ size 4992602008
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c6bd63fc9fe553a8392ad937054d5e85bc2d35c6420e7bac4f952393f2e65e5
3
- size 4944309080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a786e48e6e4d5709b557e09a2f8acec5a5ab8af0c5a136de14e3cb7947788ef
3
+ size 4949553568
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:300fe8091ba3b8c7b3e165ccc304fe82878c58690c104d088dfe537178f420e1
3
- size 4944309104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd0f2a39ba7bead2a97a64a8738f31e3bf3db155e7a65667a3b21fa7b1634853
3
+ size 4944309120
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be393e0bc602cf6cfa7ab22c76247e4c57085484f43a894b33b6d967e6a74875
3
- size 1714543944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c22e721da22b0b2fc0c3f1dc47e44fd4523738a999b7802259caf46f28ee21d7
3
+ size 1410434256
model.safetensors.index.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "metadata": {
3
- "total_parameters": 4126642176,
4
- "total_size": 16506568704
5
  },
6
  "weight_map": {
7
  "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
@@ -126,11 +126,11 @@
126
  "model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
127
  "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
128
  "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
129
- "model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
130
- "model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
131
  "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
132
- "model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
133
- "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
134
  "model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
135
  "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
136
  "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
@@ -153,12 +153,12 @@
153
  "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
154
  "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
155
  "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
156
- "model.layers.20.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
157
- "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
158
- "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
159
- "model.layers.20.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
160
- "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
161
- "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
162
  "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
163
  "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
164
  "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
@@ -280,11 +280,11 @@
280
  "model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
281
  "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
282
  "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
283
- "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
284
- "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
285
  "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
286
  "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
287
- "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
288
  "model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
289
  "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
290
  "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
@@ -293,15 +293,15 @@
293
  "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
294
  "model.layers.32.input_layernorm.weight": "model-00004-of-00004.safetensors",
295
  "model.layers.32.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
296
- "model.layers.32.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
297
  "model.layers.32.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
298
  "model.layers.32.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
299
- "model.layers.32.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
300
- "model.layers.32.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
301
- "model.layers.32.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
302
- "model.layers.32.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
303
- "model.layers.32.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
304
- "model.layers.32.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
305
  "model.layers.33.input_layernorm.weight": "model-00004-of-00004.safetensors",
306
  "model.layers.33.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
307
  "model.layers.33.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
@@ -368,11 +368,11 @@
368
  "model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
369
  "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
370
  "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
371
- "model.layers.7.input_layernorm.weight": "model-00002-of-00004.safetensors",
372
- "model.layers.7.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
373
- "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
374
- "model.layers.7.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
375
- "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
376
  "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
377
  "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
378
  "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_parameters": 4074213376,
4
+ "total_size": 16296853504
5
  },
6
  "weight_map": {
7
  "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
 
126
  "model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
127
  "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
128
  "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
129
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
130
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
131
  "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
132
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
133
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
134
  "model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
135
  "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
136
  "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
 
153
  "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
154
  "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
155
  "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
156
+ "model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
157
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
158
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
159
+ "model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
160
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
161
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
162
  "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
163
  "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
164
  "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
 
280
  "model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
281
  "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
282
  "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
283
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
284
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
285
  "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
286
  "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
287
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
288
  "model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
289
  "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
290
  "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
 
293
  "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
294
  "model.layers.32.input_layernorm.weight": "model-00004-of-00004.safetensors",
295
  "model.layers.32.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
296
+ "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
297
  "model.layers.32.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
298
  "model.layers.32.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
299
+ "model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
300
+ "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
301
+ "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
302
+ "model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
303
+ "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
304
+ "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
305
  "model.layers.33.input_layernorm.weight": "model-00004-of-00004.safetensors",
306
  "model.layers.33.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
307
  "model.layers.33.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
 
368
  "model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
369
  "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
370
  "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
371
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
372
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
373
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
374
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
375
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
376
  "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
377
  "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
378
  "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",