| { | |
| "d_k": 64, | |
| "d_model": 1024, | |
| "dropout_prob": 0.1, | |
| "gqa_div": 1, | |
| "max_len": 2048, | |
| "n_heads": 16, | |
| "n_layers": 24, | |
| "norm_eps": 1e-05, | |
| "share_embeds": true, | |
| "vocab_size": 32768 | |
| } |
| { | |
| "d_k": 64, | |
| "d_model": 1024, | |
| "dropout_prob": 0.1, | |
| "gqa_div": 1, | |
| "max_len": 2048, | |
| "n_heads": 16, | |
| "n_layers": 24, | |
| "norm_eps": 1e-05, | |
| "share_embeds": true, | |
| "vocab_size": 32768 | |
| } |