| { | |
| "max_seq_len": 1024, | |
| "d_model": 768, | |
| "n_layers": 12, | |
| "n_heads": 12, | |
| "use_padded_vocab_size": true, | |
| "use_rope": true, | |
| "rope_theta": 10000.0, | |
| "is_rope_full_precision": true, | |
| "embd_dropout_p": 0.0, | |
| "attn_dropout_p": 0.0, | |
| "residual_dropout_p": 0.0, | |
| "init_std": 0.02, | |
| "init_residual_scaled_factor": 2.0 | |
| } |