Map-NEO / configs /training_config.json
Austin207's picture
Upload folder using huggingface_hub
a683148 verified
raw
history blame contribute delete
738 Bytes
{
"model": {
"vocab_size": 50257,
"max_seq_len": 2048,
"dim": 1024,
"n_layers": 16,
"n_heads": 16,
"hidden_dim": 2736,
"dropout": 0.0
},
"training": {
"batch_size": 1,
"gradient_accumulation_steps": 32,
"max_steps": 50000,
"warmup_steps": 2000,
"learning_rate": 0.0003,
"weight_decay": 0.01,
"grad_clip": 1.0,
"mixed_precision": "bf16",
"gradient_checkpointing": true
},
"data": {
"seq_length": 1024,
"data_path": "data/tokens/packed_1024.txt"
},
"hardware": {
"device": "cuda",
"compile_model": false
},
"logging": {
"log_interval": 10,
"save_interval": 2000,
"output_dir": "checkpoints"
}
}