| { | |
| "d_model": 768, | |
| "num_heads": 12, | |
| "per_head_dim": 64, | |
| "dropout": 0.1, | |
| "enable_rope": true, | |
| "ffn_dim": 3072, | |
| "vocab_size": 50257, | |
| "num_layers": 12, | |
| "max_seq_len": 1024 | |
| } |
| { | |
| "d_model": 768, | |
| "num_heads": 12, | |
| "per_head_dim": 64, | |
| "dropout": 0.1, | |
| "enable_rope": true, | |
| "ffn_dim": 3072, | |
| "vocab_size": 50257, | |
| "num_layers": 12, | |
| "max_seq_len": 1024 | |
| } |