| { | |
| "architectures": [ | |
| "CustomLlamaForCausalLM" | |
| ], | |
| "d_head": 64, | |
| "d_mlp_proj": 2560, | |
| "d_model": 960, | |
| "dtype": "float32", | |
| "initializer_range": 0.02, | |
| "model_type": "custom_llama", | |
| "n_attn_heads": 15, | |
| "n_kv_heads": 5, | |
| "n_layers": 16, | |
| "pad_token_id": 0, | |
| "rms_norm_eps": 1e-05, | |
| "rope_theta": 100000.0, | |
| "tie_word_embeddings": false, | |
| "transformers_version": "4.56.1", | |
| "vocab_size": 49152 | |
| } | |