albert / config.json
rfi-irfos's picture
update: num_hidden_layers 21->22 (S10 complete)
5e7de7f verified
raw
history blame contribute delete
569 Bytes
{
"architectures": ["AlbertMoE"],
"model_type": "albert-moe",
"hidden_size": 256,
"num_hidden_layers": 22,
"num_attention_heads": 4,
"num_experts": 12,
"num_experts_per_tok": 3,
"max_position_embeddings": 256,
"vocab_size": 32000,
"weight_quantization": "ternary",
"weight_bits": 1.58,
"num_parameters": 134000000,
"active_parameters_per_token": 33000000,
"expert_skip_rate": 0.75,
"positional_encoding": "rope",
"training_framework": "candle",
"training_language": "rust",
"max_seq_len": 256,
"num_layers": 22,
"num_heads": 4
}