TinyLM-5M / config.json
User01110's picture
Add Transformers AutoModel wrapper
f8c7082 verified
Raw
History Blame Contribute Delete
711 Bytes
{
"model_type": "hyper_gpt_depth_communicator",
"architectures": [
"HyperGPTForCausalLM"
],
"hyper_config": {
"vocab_size": 4096,
"block_size": 512,
"n_layer": 6,
"n_embd": 256,
"n_head": 4,
"n_kv_head": 2,
"head_dim": 64,
"intermediate_size": 640,
"conn_dim": 64,
"conn_head": 4,
"dropout": 0.0,
"rope_theta": 100000.0
},
"tokenizer_name": "AxiomicLabs/GPT-S-5M",
"use_block_residuals": true,
"use_depth_gate": false,
"depth_gate_init": 0.0,
"conn_stat_batch": 8,
"conn_stat_tokens": 16,
"auto_map": {
"AutoConfig": "hyper.HyperGPTConfig",
"AutoModelForCausalLM": "hyper.HyperGPTForCausalLM"
}
}