litelm_moe / config.json
zhupipi's picture
litelm_moe
62f303e
{
"architectures": [
"LLM"
],
"attention_bias": false,
"aux_loss_coef": 0.01,
"dropout": 0.0,
"expert_num": 4,
"flash_attn": true,
"hidden_size": 512,
"intermediate_size": 2048,
"max_seq_len": 512,
"mlp_bias": false,
"model_type": "moe_model",
"n_layers": 8,
"num_attention_heads": 16,
"num_key_value_heads": 8,
"output_router_logits": true,
"topk": 2,
"torch_dtype": "float32",
"transformers_version": "4.54.1",
"vocab_size": 6400
}