Add files using upload-large-folder tool
Browse files- 0000001000/config.json +26 -0
- 0000001000/pytorch_model.bin +3 -0
- 0000002000/config.json +26 -0
- 0000002000/pytorch_model.bin +3 -0
- 0000003000/config.json +26 -0
- 0000003000/pytorch_model.bin +3 -0
- 0000004000/config.json +26 -0
- 0000004000/pytorch_model.bin +3 -0
- 0000005000/config.json +26 -0
- 0000005000/pytorch_model.bin +3 -0
- 0000006000/config.json +26 -0
- 0000006000/pytorch_model.bin +3 -0
- 0000007000/config.json +26 -0
- 0000007000/pytorch_model.bin +3 -0
- 0000008000/config.json +26 -0
- 0000008000/pytorch_model.bin +3 -0
- 0000009000/config.json +26 -0
- 0000009000/pytorch_model.bin +3 -0
- 0000010000/config.json +26 -0
- 0000010000/pytorch_model.bin +3 -0
- 0000011000/config.json +26 -0
- 0000011000/pytorch_model.bin +3 -0
- 0000012000/config.json +26 -0
- 0000012000/pytorch_model.bin +3 -0
- 0000013000/config.json +26 -0
- 0000013000/pytorch_model.bin +3 -0
- 0000014000/config.json +26 -0
- 0000014000/pytorch_model.bin +3 -0
- 0000015000/config.json +26 -0
- 0000015000/pytorch_model.bin +3 -0
- 0000016000/config.json +26 -0
- 0000016000/pytorch_model.bin +3 -0
- 0000017000/config.json +26 -0
- 0000017000/pytorch_model.bin +3 -0
- 0000018000/config.json +26 -0
- 0000018000/pytorch_model.bin +3 -0
- 0000019000/config.json +26 -0
- 0000019000/pytorch_model.bin +3 -0
- 0000020000/config.json +26 -0
- 0000020000/pytorch_model.bin +3 -0
0000001000/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"attention_bias": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"bos_token_id": 1,
|
| 5 |
+
"eos_token_id": 2,
|
| 6 |
+
"head_dim": 64,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 1280,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 5120,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"mlp_bias": false,
|
| 13 |
+
"model_type": "llama",
|
| 14 |
+
"num_attention_heads": 20,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"num_key_value_heads": 20,
|
| 17 |
+
"pretraining_tp": 1,
|
| 18 |
+
"rms_norm_eps": 1e-05,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 10000.0,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.50.3",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 128256
|
| 26 |
+
}
|
0000001000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:562162e335add833b1f213040f95ab7fe97ab6bf7831da8b154c300b0520fd57
|
| 3 |
+
size 709111698
|
0000002000/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"attention_bias": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"bos_token_id": 1,
|
| 5 |
+
"eos_token_id": 2,
|
| 6 |
+
"head_dim": 64,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 1280,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 5120,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"mlp_bias": false,
|
| 13 |
+
"model_type": "llama",
|
| 14 |
+
"num_attention_heads": 20,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"num_key_value_heads": 20,
|
| 17 |
+
"pretraining_tp": 1,
|
| 18 |
+
"rms_norm_eps": 1e-05,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 10000.0,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.50.3",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 128256
|
| 26 |
+
}
|
0000002000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff2e7413280770f533e7e863b9b22c6cc10f43f900d44c394a81fc7706041ecf
|
| 3 |
+
size 709111698
|
0000003000/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"attention_bias": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"bos_token_id": 1,
|
| 5 |
+
"eos_token_id": 2,
|
| 6 |
+
"head_dim": 64,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 1280,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 5120,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"mlp_bias": false,
|
| 13 |
+
"model_type": "llama",
|
| 14 |
+
"num_attention_heads": 20,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"num_key_value_heads": 20,
|
| 17 |
+
"pretraining_tp": 1,
|
| 18 |
+
"rms_norm_eps": 1e-05,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 10000.0,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.50.3",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 128256
|
| 26 |
+
}
|
0000003000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2be48dcbee4ba08f9057c51b5fb1e57938ab3afa35436f9c22901b4d529a707
|
| 3 |
+
size 709111698
|
0000004000/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"attention_bias": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"bos_token_id": 1,
|
| 5 |
+
"eos_token_id": 2,
|
| 6 |
+
"head_dim": 64,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 1280,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 5120,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"mlp_bias": false,
|
| 13 |
+
"model_type": "llama",
|
| 14 |
+
"num_attention_heads": 20,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"num_key_value_heads": 20,
|
| 17 |
+
"pretraining_tp": 1,
|
| 18 |
+
"rms_norm_eps": 1e-05,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 10000.0,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.50.3",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 128256
|
| 26 |
+
}
|
0000004000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f50413c1d2ceceba92d31c5f3d45c74102ab701ab30cec92ab2b29b139f6db95
|
| 3 |
+
size 709111698
|
0000005000/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"attention_bias": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"bos_token_id": 1,
|
| 5 |
+
"eos_token_id": 2,
|
| 6 |
+
"head_dim": 64,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 1280,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 5120,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"mlp_bias": false,
|
| 13 |
+
"model_type": "llama",
|
| 14 |
+
"num_attention_heads": 20,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"num_key_value_heads": 20,
|
| 17 |
+
"pretraining_tp": 1,
|
| 18 |
+
"rms_norm_eps": 1e-05,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 10000.0,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.50.3",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 128256
|
| 26 |
+
}
|
0000005000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85e026ed04e46571e158f07a61cef80d5044957b4d7fd5ef58492acb8be7c952
|
| 3 |
+
size 709111698
|
0000006000/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"attention_bias": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"bos_token_id": 1,
|
| 5 |
+
"eos_token_id": 2,
|
| 6 |
+
"head_dim": 64,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 1280,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 5120,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"mlp_bias": false,
|
| 13 |
+
"model_type": "llama",
|
| 14 |
+
"num_attention_heads": 20,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"num_key_value_heads": 20,
|
| 17 |
+
"pretraining_tp": 1,
|
| 18 |
+
"rms_norm_eps": 1e-05,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 10000.0,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.50.3",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 128256
|
| 26 |
+
}
|
0000006000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e6c3679da9f7c4208aa969a7b6fa30f6458123942d437b6d692d3f020109adb
|
| 3 |
+
size 709111698
|
0000007000/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"attention_bias": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"bos_token_id": 1,
|
| 5 |
+
"eos_token_id": 2,
|
| 6 |
+
"head_dim": 64,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 1280,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 5120,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"mlp_bias": false,
|
| 13 |
+
"model_type": "llama",
|
| 14 |
+
"num_attention_heads": 20,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"num_key_value_heads": 20,
|
| 17 |
+
"pretraining_tp": 1,
|
| 18 |
+
"rms_norm_eps": 1e-05,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 10000.0,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.50.3",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 128256
|
| 26 |
+
}
|
0000007000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f7671a84ddd81a3d0b7662366062a348c48ff834e99a213c30e3e8f6ca649ee
|
| 3 |
+
size 709111698
|
0000008000/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"attention_bias": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"bos_token_id": 1,
|
| 5 |
+
"eos_token_id": 2,
|
| 6 |
+
"head_dim": 64,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 1280,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 5120,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"mlp_bias": false,
|
| 13 |
+
"model_type": "llama",
|
| 14 |
+
"num_attention_heads": 20,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"num_key_value_heads": 20,
|
| 17 |
+
"pretraining_tp": 1,
|
| 18 |
+
"rms_norm_eps": 1e-05,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 10000.0,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.50.3",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 128256
|
| 26 |
+
}
|
0000008000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c4821e269dd8869d60bd4bc0c23100ff514aadd153b19ade39beba317ad657d
|
| 3 |
+
size 709111698
|
0000009000/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"attention_bias": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"bos_token_id": 1,
|
| 5 |
+
"eos_token_id": 2,
|
| 6 |
+
"head_dim": 64,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 1280,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 5120,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"mlp_bias": false,
|
| 13 |
+
"model_type": "llama",
|
| 14 |
+
"num_attention_heads": 20,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"num_key_value_heads": 20,
|
| 17 |
+
"pretraining_tp": 1,
|
| 18 |
+
"rms_norm_eps": 1e-05,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 10000.0,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.50.3",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 128256
|
| 26 |
+
}
|
0000009000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fdf8425347c73270d2c0a33c186b4380f87b218433ffd532b5f670fccb3a3fc3
|
| 3 |
+
size 709111698
|
0000010000/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"attention_bias": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"bos_token_id": 1,
|
| 5 |
+
"eos_token_id": 2,
|
| 6 |
+
"head_dim": 64,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 1280,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 5120,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"mlp_bias": false,
|
| 13 |
+
"model_type": "llama",
|
| 14 |
+
"num_attention_heads": 20,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"num_key_value_heads": 20,
|
| 17 |
+
"pretraining_tp": 1,
|
| 18 |
+
"rms_norm_eps": 1e-05,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 10000.0,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.50.3",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 128256
|
| 26 |
+
}
|
0000010000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6db6f20aee284badabcf509c3ad3dd12c2513441fec9951c45eb32093adac273
|
| 3 |
+
size 709111698
|
0000011000/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"attention_bias": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"bos_token_id": 1,
|
| 5 |
+
"eos_token_id": 2,
|
| 6 |
+
"head_dim": 64,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 1280,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 5120,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"mlp_bias": false,
|
| 13 |
+
"model_type": "llama",
|
| 14 |
+
"num_attention_heads": 20,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"num_key_value_heads": 20,
|
| 17 |
+
"pretraining_tp": 1,
|
| 18 |
+
"rms_norm_eps": 1e-05,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 10000.0,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.50.3",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 128256
|
| 26 |
+
}
|
0000011000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b12a82c8f43bd8ffb7fd6544bed3958d3b1be93469d1e8ad9c6c06f1f8f33452
|
| 3 |
+
size 709111698
|
0000012000/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"attention_bias": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"bos_token_id": 1,
|
| 5 |
+
"eos_token_id": 2,
|
| 6 |
+
"head_dim": 64,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 1280,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 5120,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"mlp_bias": false,
|
| 13 |
+
"model_type": "llama",
|
| 14 |
+
"num_attention_heads": 20,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"num_key_value_heads": 20,
|
| 17 |
+
"pretraining_tp": 1,
|
| 18 |
+
"rms_norm_eps": 1e-05,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 10000.0,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.50.3",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 128256
|
| 26 |
+
}
|
0000012000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b024dbcc3db216ac95f5835d8a0d208ac08fb419bd1ed99aee7b0660dd888f7
|
| 3 |
+
size 709111698
|
0000013000/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"attention_bias": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"bos_token_id": 1,
|
| 5 |
+
"eos_token_id": 2,
|
| 6 |
+
"head_dim": 64,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 1280,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 5120,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"mlp_bias": false,
|
| 13 |
+
"model_type": "llama",
|
| 14 |
+
"num_attention_heads": 20,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"num_key_value_heads": 20,
|
| 17 |
+
"pretraining_tp": 1,
|
| 18 |
+
"rms_norm_eps": 1e-05,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 10000.0,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.50.3",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 128256
|
| 26 |
+
}
|
0000013000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1414f1084dcf7eef245321daa5582f98c6ed60f59818c6c957cc9af0048ce917
|
| 3 |
+
size 709111698
|
0000014000/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"attention_bias": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"bos_token_id": 1,
|
| 5 |
+
"eos_token_id": 2,
|
| 6 |
+
"head_dim": 64,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 1280,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 5120,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"mlp_bias": false,
|
| 13 |
+
"model_type": "llama",
|
| 14 |
+
"num_attention_heads": 20,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"num_key_value_heads": 20,
|
| 17 |
+
"pretraining_tp": 1,
|
| 18 |
+
"rms_norm_eps": 1e-05,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 10000.0,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.50.3",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 128256
|
| 26 |
+
}
|
0000014000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e8b13a6c5a4d53f3d55ba8bd5752f4803a9a08493aeef18cf065f3834161953
|
| 3 |
+
size 709111698
|
0000015000/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"attention_bias": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"bos_token_id": 1,
|
| 5 |
+
"eos_token_id": 2,
|
| 6 |
+
"head_dim": 64,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 1280,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 5120,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"mlp_bias": false,
|
| 13 |
+
"model_type": "llama",
|
| 14 |
+
"num_attention_heads": 20,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"num_key_value_heads": 20,
|
| 17 |
+
"pretraining_tp": 1,
|
| 18 |
+
"rms_norm_eps": 1e-05,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 10000.0,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.50.3",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 128256
|
| 26 |
+
}
|
0000015000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0086fd21ed757dac97ef72d522eb51b4f83cd28a4f044917fc0ac385cf680b3e
|
| 3 |
+
size 709111698
|
0000016000/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"attention_bias": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"bos_token_id": 1,
|
| 5 |
+
"eos_token_id": 2,
|
| 6 |
+
"head_dim": 64,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 1280,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 5120,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"mlp_bias": false,
|
| 13 |
+
"model_type": "llama",
|
| 14 |
+
"num_attention_heads": 20,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"num_key_value_heads": 20,
|
| 17 |
+
"pretraining_tp": 1,
|
| 18 |
+
"rms_norm_eps": 1e-05,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 10000.0,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.50.3",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 128256
|
| 26 |
+
}
|
0000016000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fea0d263680555f64d92578e5bc49a472aaf255a82aa97f6a9f6f5b46e7a758
|
| 3 |
+
size 709111698
|
0000017000/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"attention_bias": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"bos_token_id": 1,
|
| 5 |
+
"eos_token_id": 2,
|
| 6 |
+
"head_dim": 64,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 1280,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 5120,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"mlp_bias": false,
|
| 13 |
+
"model_type": "llama",
|
| 14 |
+
"num_attention_heads": 20,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"num_key_value_heads": 20,
|
| 17 |
+
"pretraining_tp": 1,
|
| 18 |
+
"rms_norm_eps": 1e-05,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 10000.0,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.50.3",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 128256
|
| 26 |
+
}
|
0000017000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf7144795511fa5d99a2357f76f551c43122f93dd84af368a416e40a8a408c61
|
| 3 |
+
size 709111698
|
0000018000/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"attention_bias": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"bos_token_id": 1,
|
| 5 |
+
"eos_token_id": 2,
|
| 6 |
+
"head_dim": 64,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 1280,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 5120,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"mlp_bias": false,
|
| 13 |
+
"model_type": "llama",
|
| 14 |
+
"num_attention_heads": 20,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"num_key_value_heads": 20,
|
| 17 |
+
"pretraining_tp": 1,
|
| 18 |
+
"rms_norm_eps": 1e-05,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 10000.0,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.50.3",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 128256
|
| 26 |
+
}
|
0000018000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5cf5e556e041da32cadb7a080d7c59acf753967261408546230b8a0258dda5f
|
| 3 |
+
size 709111698
|
0000019000/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"attention_bias": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"bos_token_id": 1,
|
| 5 |
+
"eos_token_id": 2,
|
| 6 |
+
"head_dim": 64,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 1280,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 5120,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"mlp_bias": false,
|
| 13 |
+
"model_type": "llama",
|
| 14 |
+
"num_attention_heads": 20,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"num_key_value_heads": 20,
|
| 17 |
+
"pretraining_tp": 1,
|
| 18 |
+
"rms_norm_eps": 1e-05,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 10000.0,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.50.3",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 128256
|
| 26 |
+
}
|
0000019000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1baf6886a6d5299bdc5b13e9b89d97b190b4477429f86ff0ff957ab9c69ad55
|
| 3 |
+
size 709111698
|
0000020000/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"attention_bias": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"bos_token_id": 128000,
|
| 5 |
+
"eos_token_id": 128001,
|
| 6 |
+
"head_dim": 64,
|
| 7 |
+
"hidden_act": "silu",
|
| 8 |
+
"hidden_size": 1280,
|
| 9 |
+
"initializer_range": 0.02,
|
| 10 |
+
"intermediate_size": 5120,
|
| 11 |
+
"max_position_embeddings": 2048,
|
| 12 |
+
"mlp_bias": false,
|
| 13 |
+
"model_type": "llama",
|
| 14 |
+
"num_attention_heads": 20,
|
| 15 |
+
"num_hidden_layers": 1,
|
| 16 |
+
"num_key_value_heads": 20,
|
| 17 |
+
"pretraining_tp": 1,
|
| 18 |
+
"rms_norm_eps": 1e-05,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 10000.0,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.50.3",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 128256
|
| 26 |
+
}
|
0000020000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8f093584bc6d6b20fa567d67021f4a0e6e1ccaed744270f909c4c9edf84f07e8
|
| 3 |
+
size 709111698
|