stk5 commited on
Commit
f341f43
·
verified ·
1 Parent(s): 918a84c

Add files using upload-large-folder tool

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "model_cache/deval",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -22,6 +22,7 @@
22
  "num_attention_heads": 32,
23
  "num_hidden_layers": 32,
24
  "num_key_value_heads": 8,
 
25
  "pretraining_tp": 1,
26
  "rms_norm_eps": 1e-05,
27
  "rope_scaling": {
@@ -33,8 +34,9 @@
33
  },
34
  "rope_theta": 500000.0,
35
  "tie_word_embeddings": false,
36
- "torch_dtype": "float16",
37
- "transformers_version": "4.46.2",
 
38
  "use_cache": true,
39
  "vocab_size": 128256
40
  }
 
1
  {
2
+ "_name_or_path": "/workspace/sn15-pod/fine_tuning/pre_trained",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
22
  "num_attention_heads": 32,
23
  "num_hidden_layers": 32,
24
  "num_key_value_heads": 8,
25
+ "pad_token_id": 128004,
26
  "pretraining_tp": 1,
27
  "rms_norm_eps": 1e-05,
28
  "rope_scaling": {
 
34
  },
35
  "rope_theta": 500000.0,
36
  "tie_word_embeddings": false,
37
+ "torch_dtype": "bfloat16",
38
+ "transformers_version": "4.46.3",
39
+ "unsloth_version": "2024.11.9",
40
  "use_cache": true,
41
  "vocab_size": 128256
42
  }
generation_config.json CHANGED
@@ -6,7 +6,9 @@
6
  128008,
7
  128009
8
  ],
 
 
9
  "temperature": 0.6,
10
  "top_p": 0.9,
11
- "transformers_version": "4.46.2"
12
  }
 
6
  128008,
7
  128009
8
  ],
9
+ "max_length": 131072,
10
+ "pad_token_id": 128004,
11
  "temperature": 0.6,
12
  "top_p": 0.9,
13
+ "transformers_version": "4.46.3"
14
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc2c0d16457aa578a458988fef7a5c944dc18b7803c1c3527ddb7a7e0e464d39
3
- size 4423016320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:101cdbb4077db22723914c5c74bc94e1cabf23e80e37607c95e0c713264482a2
3
+ size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c25bcf8689fa06044cccc511196a79aae92812e49ebb68f3c38285ec989a7c1
3
- size 4479707624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:965369d6c0e4da9c55b87ed2667be536bb4aad5f7cb629e47b617e9f88cbde09
3
+ size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21c89392bdfd51862426ce484dbb8b4993bdd62b7792c19aa86f74af736b63aa
3
- size 4446136952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49b004851aeec693c177902fc5dc0e591d85aa22f891c14995ffb8d1d2847f4c
3
+ size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc52b35ca253418ef79b3b9e20d0801f344c936fce1749edcf6b091ebc163ae4
3
- size 2711695184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b72dcd7e08e7f403350a02974db7f881ac69d48db7e6f34fc6170ed267979bb2
3
+ size 1168138808
model.safetensors.index.json CHANGED
@@ -95,24 +95,24 @@
95
  "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
96
  "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
97
  "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
98
- "model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors",
99
- "model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
100
- "model.layers.18.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
101
- "model.layers.18.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
102
- "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
103
- "model.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
104
- "model.layers.18.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
105
- "model.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
106
- "model.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
107
- "model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
108
- "model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
109
- "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
110
- "model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
111
- "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
112
- "model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
113
- "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
114
- "model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
115
- "model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
116
  "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
117
  "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
118
  "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
@@ -124,13 +124,13 @@
124
  "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
125
  "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
126
  "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
127
- "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
128
  "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
129
  "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
130
- "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
131
- "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
132
- "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
133
- "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
134
  "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
135
  "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
136
  "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
@@ -194,24 +194,24 @@
194
  "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
195
  "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
196
  "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
197
- "model.layers.28.input_layernorm.weight": "model-00004-of-00004.safetensors",
198
- "model.layers.28.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
199
- "model.layers.28.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
200
- "model.layers.28.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
201
- "model.layers.28.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
202
  "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
203
  "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
204
  "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
205
  "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
206
- "model.layers.29.input_layernorm.weight": "model-00004-of-00004.safetensors",
207
- "model.layers.29.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
208
- "model.layers.29.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
209
- "model.layers.29.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
210
- "model.layers.29.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
211
- "model.layers.29.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
212
- "model.layers.29.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
213
- "model.layers.29.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
214
- "model.layers.29.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
215
  "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
216
  "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
217
  "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
@@ -221,24 +221,24 @@
221
  "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
222
  "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
223
  "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
224
- "model.layers.30.input_layernorm.weight": "model-00004-of-00004.safetensors",
225
- "model.layers.30.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
226
- "model.layers.30.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
227
- "model.layers.30.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
228
- "model.layers.30.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
229
- "model.layers.30.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
230
- "model.layers.30.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
231
- "model.layers.30.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
232
- "model.layers.30.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
233
  "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
234
  "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
235
- "model.layers.31.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
236
- "model.layers.31.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
237
  "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
238
- "model.layers.31.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
239
- "model.layers.31.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
240
- "model.layers.31.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
241
- "model.layers.31.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
242
  "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
243
  "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
244
  "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
@@ -266,24 +266,24 @@
266
  "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
267
  "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
268
  "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
269
- "model.layers.7.input_layernorm.weight": "model-00002-of-00004.safetensors",
270
- "model.layers.7.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
271
  "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
272
  "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
273
- "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
274
  "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
275
  "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
276
  "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
277
  "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
278
- "model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
279
- "model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
280
- "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
281
- "model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
282
- "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
283
- "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
284
- "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
285
- "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
286
- "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
287
  "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
288
  "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
289
  "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
 
95
  "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
96
  "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
97
  "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
116
  "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
117
  "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
118
  "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
 
124
  "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
125
  "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
126
  "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
128
  "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
129
  "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
134
  "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
135
  "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
136
  "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
 
194
  "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
195
  "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
196
  "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
202
  "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
203
  "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
204
  "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
205
  "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
215
  "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
216
  "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
217
  "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
 
221
  "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
222
  "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
223
  "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
233
  "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
234
  "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
237
  "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
242
  "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
243
  "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
244
  "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
 
266
  "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
267
  "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
268
  "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
269
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
270
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
271
  "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
272
  "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
273
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
274
  "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
275
  "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
276
  "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
277
  "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
278
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
279
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
280
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
281
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
282
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
283
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
284
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
285
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
286
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
287
  "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
288
  "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
289
  "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
special_tokens_map.json CHANGED
@@ -12,5 +12,12 @@
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
 
 
 
 
 
 
 
15
  }
16
  }
 
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|finetune_right_pad_id|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
  }
23
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
- size 17209920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988
3
+ size 17210019
tokenizer_config.json CHANGED
@@ -2050,13 +2050,19 @@
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
2053
- "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|eot_id|>",
 
2056
  "model_input_names": [
2057
  "input_ids",
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 131072,
2061
- "tokenizer_class": "PreTrainedTokenizerFast"
 
 
 
 
 
2062
  }
 
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{{- bos_token }}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{{messages[0]['content']|trim}}{{- \"<|start_header_id|>user<|end_header_id|>\\n\\n\" -}}\n{{messages[1]['content']|trim}}<|eot_id|>{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- if messages[2] is defined %}\n{{messages[2]['content']|trim}}<|eot_id|>{%- endif %}{%- endif %}\n",
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|eot_id|>",
2056
+ "max_length": 2048,
2057
  "model_input_names": [
2058
  "input_ids",
2059
  "attention_mask"
2060
  ],
2061
  "model_max_length": 131072,
2062
+ "pad_token": "<|finetune_right_pad_id|>",
2063
+ "padding_side": "right",
2064
+ "stride": 0,
2065
+ "tokenizer_class": "PreTrainedTokenizerFast",
2066
+ "truncation_side": "right",
2067
+ "truncation_strategy": "longest_first"
2068
  }