| # Note that some of the fields in this template haven't been filled in yet. | |
| # Please resolve any `null` fields before launching! | |
| precision: amp_bf16 | |
| max_seq_len: 2048 | |
| # Tokenizer for dataset creation | |
| tokenizer_name: bert-base-uncased | |
| # Base model config | |
| model: | |
| name: bert | |
| pretrained_model_name: ${tokenizer_name} | |
| tokenizer_name: ${tokenizer_name} | |
| model_config: | |
| num_attention_heads: 12 | |
| num_hidden_layers: 12 | |
| attention_probs_dropout_prob: 0.0 | |
| max_position_embeddings: 2048 | |
| monarch_mixer_sequence_mixing: True | |
| long_conv_l_max: 2048 | |
| long_conv_kernel_learning_rate: 1e-3 | |
| hyena_lr_pos_emb: 1e-5 | |
| hyena_w: 10 | |
| hyena_wd: 0.1 | |
| hyena_emb_dim: 5 | |
| hyena_filter_order: 128 | |
| hyena_training_additions: False | |
| bidirectional: true | |
| residual_long_conv: true | |
| use_glu_mlp: True | |
| use_monarch_mlp: True | |
| monarch_mlp_nblocks: 4 | |
| use_positional_encodings: True | |