| { | |
| "model_name": "meta-llama/Meta-Llama-3-8B", | |
| "num_heads": 3, | |
| "head_layer_indices": [ | |
| 8, | |
| 16, | |
| 24 | |
| ], | |
| "quantization": "4bit", | |
| "hidden_size": 4096, | |
| "vocab_size": 128256, | |
| "num_hidden_layers": 32, | |
| "training_config": { | |
| "dataset_name": "wikitext", | |
| "dataset_config_name": "wikitext-2-raw-v1", | |
| "batch_size": 2, | |
| "gradient_accumulation_steps": 8, | |
| "max_steps": 3000, | |
| "lr": 0.0001, | |
| "max_length": 512 | |
| } | |
| } |