Upload 6 files
Browse files
Model Attn/config_Attn.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"context_len": 1536,
|
| 3 |
+
"ffn_dim_multiplier": null,
|
| 4 |
+
"ffn_hidden_dims": 556,
|
| 5 |
+
"moe_active_experts": 2,
|
| 6 |
+
"moe_aux_loss_coef": 0.01,
|
| 7 |
+
"moe_eps": 1e-06,
|
| 8 |
+
"moe_num_experts": 2,
|
| 9 |
+
"moe_shared_experts": 1,
|
| 10 |
+
"num_dims": 832,
|
| 11 |
+
"num_heads": 16,
|
| 12 |
+
"num_kv_heads": 4,
|
| 13 |
+
"num_layers": 34,
|
| 14 |
+
"rmsnorm_eps": 1e-06,
|
| 15 |
+
"rope_theta": 100000.0,
|
| 16 |
+
"use_cache": false,
|
| 17 |
+
"use_flash": true,
|
| 18 |
+
"use_lossfreebalance": false,
|
| 19 |
+
"use_moe": false,
|
| 20 |
+
"vocab_size": 49152
|
| 21 |
+
}
|
Model Attn/model_Attn.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce97dae14abfa2d03873209ec9b3b64bd4e8268fb8459e8cc6ea12ac7f9c3caa
|
| 3 |
+
size 587931736
|
Model FFN/config_FFN.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"context_len": 1536,
|
| 3 |
+
"ffn_dim_multiplier": null,
|
| 4 |
+
"ffn_hidden_dims": 2048,
|
| 5 |
+
"moe_active_experts": 2,
|
| 6 |
+
"moe_aux_loss_coef": 0.01,
|
| 7 |
+
"moe_eps": 1e-06,
|
| 8 |
+
"moe_num_experts": 2,
|
| 9 |
+
"moe_shared_experts": 1,
|
| 10 |
+
"num_dims": 512,
|
| 11 |
+
"num_heads": 16,
|
| 12 |
+
"num_kv_heads": 4,
|
| 13 |
+
"num_layers": 32,
|
| 14 |
+
"rmsnorm_eps": 1e-06,
|
| 15 |
+
"rope_theta": 100000.0,
|
| 16 |
+
"use_cache": false,
|
| 17 |
+
"use_flash": true,
|
| 18 |
+
"use_lossfreebalance": false,
|
| 19 |
+
"use_moe": false,
|
| 20 |
+
"vocab_size": 49152
|
| 21 |
+
}
|
Model FFN/model_FFN.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40d14a8c629b30840dce2f4c19da7e21f120c368647b7bb874f6070bcf31f87e
|
| 3 |
+
size 587364560
|
Model MoE 2+1/config_moe_2p1_78e3.safetensors.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"context_len": 1024,
|
| 3 |
+
"ffn_dim_multiplier": null,
|
| 4 |
+
"ffn_hidden_dims": 1024,
|
| 5 |
+
"moe_active_experts": 2,
|
| 6 |
+
"moe_aux_loss_coef": 0.01,
|
| 7 |
+
"moe_eps": 1e-06,
|
| 8 |
+
"moe_num_experts": 2,
|
| 9 |
+
"moe_shared_experts": 1,
|
| 10 |
+
"num_dims": 384,
|
| 11 |
+
"num_heads": 16,
|
| 12 |
+
"num_kv_heads": 4,
|
| 13 |
+
"num_layers": 32,
|
| 14 |
+
"rmsnorm_eps": 1e-06,
|
| 15 |
+
"rope_theta": 100000.0,
|
| 16 |
+
"use_cache": false,
|
| 17 |
+
"use_flash": true,
|
| 18 |
+
"use_lossfreebalance": true,
|
| 19 |
+
"use_moe": true,
|
| 20 |
+
"vocab_size": 49152
|
| 21 |
+
}
|
Model MoE 2+1/model_moe_2p1_78e3.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4adbb1d15f2f4dd3f88f73a7abcfe32f47a9a74840d985a6536320cc70b8f78e
|
| 3 |
+
size 575923536
|