File size: 738 Bytes
a683148 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
{
"model": {
"vocab_size": 50257,
"max_seq_len": 2048,
"dim": 1024,
"n_layers": 16,
"n_heads": 16,
"hidden_dim": 2736,
"dropout": 0.0
},
"training": {
"batch_size": 1,
"gradient_accumulation_steps": 32,
"max_steps": 50000,
"warmup_steps": 2000,
"learning_rate": 0.0003,
"weight_decay": 0.01,
"grad_clip": 1.0,
"mixed_precision": "bf16",
"gradient_checkpointing": true
},
"data": {
"seq_length": 1024,
"data_path": "data/tokens/packed_1024.txt"
},
"hardware": {
"device": "cuda",
"compile_model": false
},
"logging": {
"log_interval": 10,
"save_interval": 2000,
"output_dir": "checkpoints"
}
} |