biology
genomics
DNA
evo2_20b / config.yml
gbrixi's picture
Add model configuration
205923d verified
model_name: shc-evo2-20b
vocab_size: 512
hidden_size: 8192
# Number of independent filters in Hyena-LI
num_filters: 8192
hcl_layer_idxs: [2,6,9,13,16,20,23]
hcm_layer_idxs: [1,5,8,12,15,19,22]
hcs_layer_idxs: [0,4,7,11,14,18,21]
attn_layer_idxs: [3,10,17]
hcm_filter_length: 128
hcl_filter_groups: 8192
hcm_filter_groups: 512
hcs_filter_groups: 512
hcs_filter_length: 7
num_layers: 24
# Length of the short, depthwise FIR applied to input projections
short_filter_length: 3
num_attention_heads: 64
short_filter_bias: false # add bias to FIR
mlp_init_method: torch.nn.init.zeros_
mlp_output_init_method: torch.nn.init.zeros_
eps: 0.000001
state_size: 16
rotary_emb_base: 1000000
rotary_emb_scaling_factor: 128
use_interpolated_rotary_pos_emb: True
make_vocab_size_divisible_by: 8
inner_size_multiple_of: 128 # force GLU inner_size to be a multiple of
inner_mlp_size: 22528
log_intermediate_values: False
# Number of groups in GQA
proj_groups: 1
# Number of groups in grouped
hyena_filter_groups: 1
# Split strategy for channels
column_split_hyena: False
column_split: True
interleave: True
# Layer > 0 nn.identity activation
evo2_style_activations: True
use_fp8_input_projections: True
# Legacy options for MP / PP inference
model_parallel_size: 1
pipe_parallel_size: 1
tie_embeddings: True
mha_out_proj_bias: True
hyena_out_proj_bias: True
hyena_flip_x1x2: False
qkv_proj_bias: False
max_seqlen: 1048576
max_batch_size: 1
final_norm: True
use_flash_attn: True
use_flash_rmsnorm: False
use_flash_depthwise: False
use_flashfft: False
use_laughing_hyena: False
inference_mode: True
tokenizer_type: CharLevelTokenizer
prefill_style: fft
mlp_activation: gelu
print_activations: False