biology
genomics
DNA
gbrixi commited on
Commit
205923d
·
verified ·
1 Parent(s): 46ea8db

Add model configuration

Browse files
Files changed (1) hide show
  1. config.yml +66 -0
config.yml ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: shc-evo2-20b
2
+
3
+ vocab_size: 512
4
+ hidden_size: 8192
5
+ # Number of independent filters in Hyena-LI
6
+ num_filters: 8192
7
+ hcl_layer_idxs: [2,6,9,13,16,20,23]
8
+ hcm_layer_idxs: [1,5,8,12,15,19,22]
9
+ hcs_layer_idxs: [0,4,7,11,14,18,21]
10
+ attn_layer_idxs: [3,10,17]
11
+ hcm_filter_length: 128
12
+ hcl_filter_groups: 8192
13
+ hcm_filter_groups: 512
14
+ hcs_filter_groups: 512
15
+ hcs_filter_length: 7
16
+ num_layers: 24
17
+
18
+ # Length of the short, depthwise FIR applied to input projections
19
+ short_filter_length: 3
20
+ num_attention_heads: 64
21
+ short_filter_bias: false # add bias to FIR
22
+ mlp_init_method: torch.nn.init.zeros_
23
+ mlp_output_init_method: torch.nn.init.zeros_
24
+ eps: 0.000001
25
+ state_size: 16
26
+ rotary_emb_base: 1000000
27
+ rotary_emb_scaling_factor: 128
28
+ use_interpolated_rotary_pos_emb: True
29
+ make_vocab_size_divisible_by: 8
30
+ inner_size_multiple_of: 128 # force GLU inner_size to be a multiple of
31
+ inner_mlp_size: 22528
32
+ log_intermediate_values: False
33
+ # Number of groups in GQA
34
+ proj_groups: 1
35
+ # Number of groups in grouped
36
+ hyena_filter_groups: 1
37
+ # Split strategy for channels
38
+ column_split_hyena: False
39
+ column_split: True
40
+ interleave: True
41
+ # Layer > 0 nn.identity activation
42
+ evo2_style_activations: True
43
+
44
+ use_fp8_input_projections: True
45
+
46
+ # Legacy options for MP / PP inference
47
+ model_parallel_size: 1
48
+ pipe_parallel_size: 1
49
+ tie_embeddings: True
50
+ mha_out_proj_bias: True
51
+ hyena_out_proj_bias: True
52
+ hyena_flip_x1x2: False
53
+ qkv_proj_bias: False
54
+ max_seqlen: 1048576
55
+ max_batch_size: 1
56
+ final_norm: True
57
+ use_flash_attn: True
58
+ use_flash_rmsnorm: False
59
+ use_flash_depthwise: False
60
+ use_flashfft: False
61
+ use_laughing_hyena: False
62
+ inference_mode: True
63
+ tokenizer_type: CharLevelTokenizer
64
+ prefill_style: fft
65
+ mlp_activation: gelu
66
+ print_activations: False