Evo2_1b_base / config.json
McClain's picture
Upload 4 files
04646d8 verified
{
"architectures": [
"Evo2ForCausalLM"
],
"attn_dropout": 0.0,
"dtype": "float32",
"eos_token_id": 0,
"hidden_dropout": 0.0,
"hidden_size": 1920,
"hyena_filter_configurations": [
{
"h_shape": [
128,
1,
7
]
},
{
"D_shape": [
1920
],
"h_shape": [
128,
1,
128
]
},
{
"D_shape": [
1920
],
"log_poles_shape": [
1920,
16,
1
],
"residues_shape": [
1920,
16
]
},
{},
{
"h_shape": [
128,
1,
7
]
},
{
"D_shape": [
1920
],
"h_shape": [
128,
1,
128
]
},
{
"D_shape": [
1920
],
"log_poles_shape": [
1920,
16,
1
],
"residues_shape": [
1920,
16
]
},
{
"h_shape": [
128,
1,
7
]
},
{
"D_shape": [
1920
],
"h_shape": [
128,
1,
128
]
},
{
"D_shape": [
1920
],
"log_poles_shape": [
1920,
16,
1
],
"residues_shape": [
1920,
16
]
},
{},
{
"h_shape": [
128,
1,
7
]
},
{
"D_shape": [
1920
],
"h_shape": [
128,
1,
128
]
},
{
"D_shape": [
1920
],
"log_poles_shape": [
1920,
16,
1
],
"residues_shape": [
1920,
16
]
},
{
"h_shape": [
128,
1,
7
]
},
{
"D_shape": [
1920
],
"h_shape": [
128,
1,
128
]
},
{
"D_shape": [
1920
],
"log_poles_shape": [
1920,
16,
1
],
"residues_shape": [
1920,
16
]
},
{},
{
"h_shape": [
128,
1,
7
]
},
{
"D_shape": [
1920
],
"h_shape": [
128,
1,
128
]
},
{
"D_shape": [
1920
],
"log_poles_shape": [
1920,
16,
1
],
"residues_shape": [
1920,
16
]
},
{
"h_shape": [
128,
1,
7
]
},
{
"D_shape": [
1920
],
"h_shape": [
128,
1,
128
]
},
{
"D_shape": [
1920
],
"log_poles_shape": [
1920,
16,
1
],
"residues_shape": [
1920,
16
]
},
{}
],
"hyena_filters": 128,
"hyena_flip_x1x2": false,
"hyena_hidden_size": 1920,
"hyena_kernel_size": 3,
"hyena_order": 3,
"initializer_range": 0.02,
"intermediate_size": 5120,
"layer_types": [
"hyena",
"hyena",
"hyena",
"attention",
"hyena",
"hyena",
"hyena",
"hyena",
"hyena",
"hyena",
"attention",
"hyena",
"hyena",
"hyena",
"hyena",
"hyena",
"hyena",
"attention",
"hyena",
"hyena",
"hyena",
"hyena",
"hyena",
"hyena",
"attention"
],
"max_position_embeddings": 2048,
"mlp_dropout": 0.0,
"model_type": "evo2",
"num_attention_heads": 15,
"num_hidden_layers": 25,
"num_key_value_heads": 15,
"pad_token_id": 1,
"rms_norm_eps": 1e-06,
"rope_parameters": {
"rope_theta": 1000000.0,
"rope_type": "default"
},
"rope_theta": 1000000.0,
"transformers_version": "5.0.0.dev0",
"use_cache": true,
"vocab_size": 512
}