stk5 commited on
Commit
56100c9
·
verified ·
1 Parent(s): 87fa9e9

Add files using upload-large-folder tool

Browse files
config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "architectures": [
3
  "LlamaForCausalLM"
4
  ],
@@ -34,7 +35,7 @@
34
  "rope_theta": 500000.0,
35
  "tie_word_embeddings": false,
36
  "torch_dtype": "bfloat16",
37
- "transformers_version": "4.46.3",
38
  "unsloth_version": "2024.11.9",
39
  "use_cache": true,
40
  "vocab_size": 128256
 
1
  {
2
+ "_name_or_path": "/data/nodes/tao/training/local_models/m1h1k1_father_of_forest",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
35
  "rope_theta": 500000.0,
36
  "tie_word_embeddings": false,
37
  "torch_dtype": "bfloat16",
38
+ "transformers_version": "4.47.0",
39
  "unsloth_version": "2024.11.9",
40
  "use_cache": true,
41
  "vocab_size": 128256
generation_config.json CHANGED
@@ -10,5 +10,5 @@
10
  "pad_token_id": 128004,
11
  "temperature": 0.6,
12
  "top_p": 0.9,
13
- "transformers_version": "4.46.3"
14
  }
 
10
  "pad_token_id": 128004,
11
  "temperature": 0.6,
12
  "top_p": 0.9,
13
+ "transformers_version": "4.47.0"
14
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6566e6c6d08f726cd3e4cec641b53da0acd35a6b977666443b294841399a68bc
3
- size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ab97aa2718a30d58af3d2b2f5970a3b5167dcda2ef5866912d27d30421769d6
3
+ size 4624360176
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:897c65aa4b32fbfedd83e39df58f2f3b66c04ed759665e6f3ebe27b9a70027c7
3
- size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17aa2553875ebb1e8e8c10c8244982515ce85f0447c49e28d0f02378dbd1669c
3
+ size 4714588976
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f110c3555c5df53d4738fdc167f08cc56a75762f3b36c576b05ac7f404fea00
3
- size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a15347bc69b58ce8e00764a944991e859badb58d946be60119d295c7b54d6e0
3
+ size 4681018304
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45896a4165dddfcc42135f86e667ee349ac56f06fde2a4129feb0495d9166e97
3
- size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:422650a464496f6aa1fbe6e3bb69a10e6d948764eb3d60fafbb4cc7383d02719
3
+ size 2040588912
model.safetensors.index.json CHANGED
@@ -104,15 +104,15 @@
104
  "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
105
  "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
106
  "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
107
- "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
108
- "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
109
- "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
110
- "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
111
- "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
112
- "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
113
- "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
114
- "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
115
- "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
116
  "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
117
  "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
118
  "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
@@ -124,13 +124,13 @@
124
  "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
125
  "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
126
  "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
127
- "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
128
  "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
129
  "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
130
- "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
131
- "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
132
- "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
133
- "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
134
  "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
135
  "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
136
  "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
@@ -203,11 +203,11 @@
203
  "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
204
  "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
205
  "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
206
- "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
207
- "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
208
  "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
209
  "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
210
- "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
211
  "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
212
  "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
213
  "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
@@ -221,24 +221,24 @@
221
  "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
222
  "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
223
  "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
224
- "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
225
- "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
226
- "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
227
- "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
228
- "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
229
- "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
230
- "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
231
- "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
232
- "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
233
  "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
234
  "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
235
- "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
236
- "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
237
  "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
238
- "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
239
- "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
240
- "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
241
- "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
242
  "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
243
  "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
244
  "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
@@ -275,11 +275,11 @@
275
  "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
276
  "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
277
  "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
278
- "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
279
- "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
280
- "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
281
- "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
282
- "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
283
  "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
284
  "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
285
  "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
 
104
  "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
105
  "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
106
  "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
116
  "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
117
  "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
118
  "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
 
124
  "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
125
  "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
126
  "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
128
  "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
129
  "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
134
  "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
135
  "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
136
  "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
 
203
  "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
204
  "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
205
  "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00004-of-00004.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
208
  "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
209
  "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
211
  "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
212
  "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
213
  "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
 
221
  "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
222
  "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
223
  "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00004-of-00004.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
233
  "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
234
  "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
237
  "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
242
  "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
243
  "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
244
  "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
 
275
  "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
276
  "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
277
  "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
278
+ "model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
279
+ "model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
280
+ "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
281
+ "model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
282
+ "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
283
  "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
284
  "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
285
  "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
tokenizer_config.json CHANGED
@@ -2053,6 +2053,7 @@
2053
  "chat_template": "{{- bos_token }}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{{messages[0]['content']|trim}}{{- \"<|start_header_id|>user<|end_header_id|>\\n\\n\" -}}\n{{messages[1]['content']|trim}}<|eot_id|>{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- if messages[2] is defined %}\n{{messages[2]['content']|trim}}<|eot_id|>{%- endif %}{%- endif %}\n",
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|eot_id|>",
 
2056
  "model_input_names": [
2057
  "input_ids",
2058
  "attention_mask"
 
2053
  "chat_template": "{{- bos_token }}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{{messages[0]['content']|trim}}{{- \"<|start_header_id|>user<|end_header_id|>\\n\\n\" -}}\n{{messages[1]['content']|trim}}<|eot_id|>{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- if messages[2] is defined %}\n{{messages[2]['content']|trim}}<|eot_id|>{%- endif %}{%- endif %}\n",
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|eot_id|>",
2056
+ "extra_special_tokens": {},
2057
  "model_input_names": [
2058
  "input_ids",
2059
  "attention_mask"