diff-mlx / vanilla /config.json
guygrigsby's picture
Upload vanilla/config.json with huggingface_hub
e77ed6e verified
raw
history blame contribute delete
692 Bytes
{
"model": {
"dim": 768,
"n_layers": 12,
"n_heads_vanilla": 12,
"qk_head_dim": 64,
"vocab_size": 100277,
"mlp_intermediate": 2048,
"block_size": 2048,
"rope_base": 10000.0,
"rms_eps": 1e-05,
"tie_embeddings": true,
"amp_dtype": "bfloat16"
},
"train": {
"peak_lr": 0.0004,
"warmup_steps": 1000,
"total_tokens": 2000000000,
"micro_batch": 8,
"grad_accum": 4,
"weight_decay": 0.1,
"adam_beta1": 0.9,
"adam_beta2": 0.95,
"adam_eps": 1e-08,
"grad_clip": 1.0,
"eval_every": 1000,
"full_eval_every": 5000,
"monitoring_tokens": 2000000,
"full_eval_tokens": 75000000,
"save_every": 500
}
}