File size: 691 Bytes
f35adfe | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | {
"architectures": [
"HybridMoRMoEForCausalLM"
],
"complexity_hidden_dim": 64,
"d_ff": 1536,
"d_model": 576,
"dropout": 0.05,
"dtype": "float32",
"eos_token_id": 151645,
"max_recursions": 3,
"max_seq_len": 4096,
"model_size": "small",
"model_type": "hybrid_mor_moe",
"moe_aux_loss_coef": 0.0001,
"n_heads": 8,
"num_base_layers": 6,
"num_experts": 4,
"num_experts_per_tok": 1,
"num_hidden_layers": 30,
"num_recursions": 3,
"num_shared_blocks": 6,
"num_unique_last_layers": 2,
"pad_token_id": 151643,
"router_aux_loss_coef": 0.0001,
"router_percentile": 0.7,
"transformers_version": "5.4.0",
"use_cache": false,
"vocab_size": 151665
}
|