llama3_baseline_dev / config.json
smithblack-0's picture
Update architecture and tokenizer
98abb50 verified
Raw
History Blame Contribute Delete
572 Bytes
{
"attention_dropout": 0.0,
"auto_map": {
"AutoConfig": "configuration.Llama3Config",
"AutoModelForCausalLM": "huggingface.Llama3ForCausalLM"
},
"head_dim": 48,
"hidden_size": 768,
"intermediate_size": 1568,
"max_position_embeddings": 8192,
"model_type": "llama3_baseline",
"num_attention_heads": 16,
"num_hidden_layers": 24,
"num_key_value_heads": 4,
"rms_norm_eps": 1e-05,
"rope_parameters": null,
"rope_theta": 500000.0,
"tie_word_embeddings": false,
"transformers_version": "5.3.0",
"use_cache": true,
"vocab_size": 50277
}