{ "vocab_size": 50257, "d_model": 512, "n_layers": 8, "n_heads": 8, "d_latent": 128, "n_bits": 3, "d_rope": 16, "fff_depth": 2, "max_seq_len": 2048, "batch_size": 8, "lr": 0.0003, "total_steps": 7500, "warmup_steps": 500, "grad_clip": 1.0, "tokens_target": 5000000000, "log_every": 50, "save_every": 1000, "hf_repo": "alplusplus/maple-attn-test" }