Transformers
maple-attn-test / config.json
alplusplus's picture
Upload config.json with huggingface_hub
98ed930 verified
raw
history blame contribute delete
383 Bytes
{
"vocab_size": 50257,
"d_model": 512,
"n_layers": 8,
"n_heads": 8,
"d_latent": 128,
"n_bits": 3,
"d_rope": 16,
"fff_depth": 2,
"max_seq_len": 2048,
"batch_size": 8,
"lr": 0.0003,
"total_steps": 7500,
"warmup_steps": 500,
"grad_clip": 1.0,
"tokens_target": 5000000000,
"log_every": 50,
"save_every": 1000,
"hf_repo": "alplusplus/maple-attn-test"
}