- 12_head_baseline_lr_10e-4_head_dim_22_fixed
- 12_head_baseline_lr_11e-4
- 12_head_baseline_lr_12e-4_head_dim_22_fixed
- 12_head_baseline_lr_14e-4_head_dim_22_fixed
- 12_head_baseline_lr_16e-4
- 12_head_baseline_lr_16e-4_head_dim_22
- 12_head_baseline_lr_16e-4_head_dim_22_fixed
- 12_head_baseline_lr_20e-4_head_dim_22
- 12_head_baseline_lr_20e-4_head_dim_22_fixed
- 12_head_baseline_lr_25e-4_head_dim_22_fixed
- 12_head_baseline_lr_30e-4_head_dim_22
- 12_head_baseline_lr_30e-4_head_dim_22_fixed
- 12_head_baseline_lr_35e-4_head_dim_22
- 12_head_baseline_lr_40e-4_head_dim_22
- 12_head_baseline_lr_45e-4_head_dim_22
- 12_head_baseline_lr_50e-4_head_dim_22
- 12_head_baseline_lr_6e-4
- 12_head_baseline_lr_8e-4_head_dim_22_fixed
- 12_head_one_mask_per_head_4_latent_masks
- 12_mini_head_one_mask_per_head_1_latent_mask
- 12_mini_head_one_mask_per_head_1_latent_mask_halved_lr
- 12_mini_head_one_mask_per_head_2_latent_masks
- 12_mini_head_two_masks_4_heads
- 4x_smaller_bs_half_lr_half_seq_len
- allowing_more_selection_patterns
- att_conv_playground
- attention_kindselective_n_heads2_seed1338
- attention_kindselective_n_heads2_seed1339
- attention_kindselective_n_heads2_seed1340
- attention_kindselective_n_heads2_seed1341
- attention_kindselective_n_heads4_seed1338
- attention_kindselective_n_heads4_seed1339
- attention_kindselective_n_heads4_seed1340
- attention_kindselective_n_heads4_seed1341
- attention_kindselective_n_heads4_seed1342
- attention_kindselective_n_heads4_seed1343
- attention_kindselective_n_heads4_seed1344
- attention_kindselective_n_heads4_seed1345
- attention_kindselective_n_heads8_seed1338
- attention_kindselective_n_heads8_seed1339
- attention_kindselective_n_heads8_seed1340
- attention_kindselective_n_heads8_seed1341
- attention_kindself_n_heads2_seed1338
- attention_kindself_n_heads2_seed1339
- attention_kindself_n_heads2_seed1340
- attention_kindself_n_heads2_seed1341
- attention_kindself_n_heads4_seed1338
- attention_kindself_n_heads4_seed1339
- attention_kindself_n_heads4_seed1340
- attention_kindself_n_heads4_seed1341