{ "architectures": [ "CustomLlamaForCausalLM" ], "d_head": 64, "d_mlp_proj": 2560, "d_model": 960, "dtype": "float32", "initializer_range": 0.02, "model_type": "custom_llama", "n_attn_heads": 15, "n_kv_heads": 5, "n_layers": 16, "pad_token_id": 0, "rms_norm_eps": 1e-05, "rope_theta": 100000.0, "tie_word_embeddings": false, "transformers_version": "4.56.1", "vocab_size": 49152 }