DSSD-Llama3-8B / config.json
valcore's picture
Upload config.json with huggingface_hub
960a41d verified
raw
history blame contribute delete
449 Bytes
{
"model_name": "meta-llama/Meta-Llama-3-8B",
"num_heads": 3,
"head_layer_indices": [
8,
16,
24
],
"quantization": "4bit",
"hidden_size": 4096,
"vocab_size": 128256,
"num_hidden_layers": 32,
"training_config": {
"dataset_name": "wikitext",
"dataset_config_name": "wikitext-2-raw-v1",
"batch_size": 2,
"gradient_accumulation_steps": 8,
"max_steps": 3000,
"lr": 0.0001,
"max_length": 512
}
}