fastplus-50m / config.json
aixk's picture
FastPlus Relativistic SVO Dynamic-Soft-Pooling Model Train Completed Safely
f775178 verified
Raw
History Blame Contribute Delete
430 Bytes
{
"architectures": [
"FastPlusForCausalLM"
],
"dtype": "float32",
"hidden_size": 544,
"initializer_range": 0.02,
"intermediate_size": 1632,
"kd_alpha": 0.4,
"kd_temperature": 2.5,
"max_position_embeddings": 512,
"model_type": "fastplus",
"num_attention_heads": 8,
"num_hidden_layers": 12,
"tie_word_embeddings": true,
"transformers_version": "5.12.0",
"use_cache": false,
"vocab_size": 15968
}