| { | |
| "architectures": [ | |
| "OpenLMForCausalLM" | |
| ], | |
| "model_type": "openlm", | |
| "dim": 4096, | |
| "intermediate_dim_ffn": 14336, | |
| "n_layers": 32, | |
| "n_heads": 32, | |
| "n_heads_kv": 8, | |
| "vocab_size": 32000, | |
| "norm_eps": 1e-5, | |
| "seq_len": 2048, | |
| "weight_tying": false, | |
| "apply_qk_norm": false, | |
| "qk_head_dim": 128, | |
| "v_head_dim": 128, | |
| "norm_type": "rms_norm", | |
| "attn_name": "linear_attn", | |
| "positional_embedding_type": "rotary", | |
| "ffn_type": "swiglu", | |
| "use_decay": true, | |
| "use_retnet_slopes": false, | |
| "decay_start": null | |
| } | |