Upload model
Browse files- config.json +5 -4
- generation_config.json +1 -1
- model-00001-of-00002.safetensors +2 -2
- model-00002-of-00002.safetensors +2 -2
- model.safetensors.index.json +7 -7
- tokenizer_config.json +1 -1
config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "/home/
|
| 3 |
"architectures": [
|
| 4 |
"LlamaForCausalLM"
|
| 5 |
],
|
|
@@ -18,6 +18,7 @@
|
|
| 18 |
"num_attention_heads": 16,
|
| 19 |
"num_hidden_layers": 24,
|
| 20 |
"num_key_value_heads": 16,
|
|
|
|
| 21 |
"rms_norm_eps": 1e-06,
|
| 22 |
"rope_scaling": {
|
| 23 |
"factor": 4.0,
|
|
@@ -26,8 +27,8 @@
|
|
| 26 |
},
|
| 27 |
"rope_theta": 100000,
|
| 28 |
"tie_word_embeddings": false,
|
| 29 |
-
"torch_dtype": "
|
| 30 |
-
"transformers_version": "4.
|
| 31 |
"use_cache": true,
|
| 32 |
"vocab_size": 32256
|
| 33 |
-
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "/home/dm/prj/py/nn-gpt/out/upload/Models/ABrain/NNGPT-DeepSeek-Coder-1.3B-Instruct",
|
| 3 |
"architectures": [
|
| 4 |
"LlamaForCausalLM"
|
| 5 |
],
|
|
|
|
| 18 |
"num_attention_heads": 16,
|
| 19 |
"num_hidden_layers": 24,
|
| 20 |
"num_key_value_heads": 16,
|
| 21 |
+
"pretraining_tp": 1,
|
| 22 |
"rms_norm_eps": 1e-06,
|
| 23 |
"rope_scaling": {
|
| 24 |
"factor": 4.0,
|
|
|
|
| 27 |
},
|
| 28 |
"rope_theta": 100000,
|
| 29 |
"tie_word_embeddings": false,
|
| 30 |
+
"torch_dtype": "float16",
|
| 31 |
+
"transformers_version": "4.48.3",
|
| 32 |
"use_cache": true,
|
| 33 |
"vocab_size": 32256
|
| 34 |
+
}
|
generation_config.json
CHANGED
|
@@ -2,5 +2,5 @@
|
|
| 2 |
"_from_model_config": true,
|
| 3 |
"bos_token_id": 32013,
|
| 4 |
"eos_token_id": 32021,
|
| 5 |
-
"transformers_version": "4.
|
| 6 |
}
|
|
|
|
| 2 |
"_from_model_config": true,
|
| 3 |
"bos_token_id": 32013,
|
| 4 |
"eos_token_id": 32021,
|
| 5 |
+
"transformers_version": "4.48.3"
|
| 6 |
}
|
model-00001-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6fa8663157f2b1ee79d943f48e39ae1cc00f3b36af4914fa5c067a57333778c
|
| 3 |
+
size 4989350312
|
model-00002-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc6b0314faf3b20778b12edad6acc738d5ef8ab351204c60cd061ee807110009
|
| 3 |
+
size 132120704
|
model.safetensors.index.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
-
"total_size":
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
"lm_head.weight": "model-00002-of-00002.safetensors",
|
|
@@ -149,11 +149,11 @@
|
|
| 149 |
"model.layers.22.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 150 |
"model.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 151 |
"model.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 152 |
-
"model.layers.23.input_layernorm.weight": "model-
|
| 153 |
-
"model.layers.23.mlp.down_proj.weight": "model-
|
| 154 |
-
"model.layers.23.mlp.gate_proj.weight": "model-
|
| 155 |
-
"model.layers.23.mlp.up_proj.weight": "model-
|
| 156 |
-
"model.layers.23.post_attention_layernorm.weight": "model-
|
| 157 |
"model.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 158 |
"model.layers.23.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 159 |
"model.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
|
@@ -221,6 +221,6 @@
|
|
| 221 |
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 222 |
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 223 |
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 224 |
-
"model.norm.weight": "model-
|
| 225 |
}
|
| 226 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
+
"total_size": 5121445888
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
"lm_head.weight": "model-00002-of-00002.safetensors",
|
|
|
|
| 149 |
"model.layers.22.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 150 |
"model.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 151 |
"model.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 152 |
+
"model.layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 153 |
+
"model.layers.23.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 154 |
+
"model.layers.23.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 155 |
+
"model.layers.23.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 156 |
+
"model.layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 157 |
"model.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 158 |
"model.layers.23.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 159 |
"model.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
|
|
|
| 221 |
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 222 |
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 223 |
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 224 |
+
"model.norm.weight": "model-00001-of-00002.safetensors"
|
| 225 |
}
|
| 226 |
}
|
tokenizer_config.json
CHANGED
|
@@ -189,7 +189,7 @@
|
|
| 189 |
"model_max_length": 16384,
|
| 190 |
"pad_token": "<|EOT|>",
|
| 191 |
"sp_model_kwargs": {},
|
| 192 |
-
"tokenizer_class": "
|
| 193 |
"unk_token": null,
|
| 194 |
"use_default_system_prompt": false
|
| 195 |
}
|
|
|
|
| 189 |
"model_max_length": 16384,
|
| 190 |
"pad_token": "<|EOT|>",
|
| 191 |
"sp_model_kwargs": {},
|
| 192 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 193 |
"unk_token": null,
|
| 194 |
"use_default_system_prompt": false
|
| 195 |
}
|