amoe / config.json
SofianChay's picture
Initial upload of AMOE MoE model with custom code
fe365dd verified
{
"architectures": [
"AMOEModel"
],
"auto_map": {
"AutoConfig": "configuration_amoe.AMOEConfig",
"AutoImageProcessor": "image_processing_amoe.AMOEImageProcessor",
"AutoModel": "modeling_amoe.AMOEModel"
},
"channel_size": 3,
"dim": 768,
"dtype": "float32",
"enable_3d_rope": true,
"head_dim": 128,
"max_seq_len": 8192,
"model_type": "amoe",
"moe_args": {
"activation": "silu",
"num_experts": 28,
"num_shared_experts": 0,
"route_norm": false,
"route_scale": 1.0,
"score_before_experts": false,
"score_func": "sigmoid",
"top_k": 6
},
"moe_dim": 384,
"n_heads": 8,
"n_kv_heads": 8,
"n_layers": 18,
"n_storage_tokens": 4,
"norm_eps": 1e-05,
"rope_max_freqs": 20.0,
"rope_min_freqs": 1.0,
"rope_theta": 100000.0,
"spatial_patch_size": 16,
"teachers": [
"siglip2",
"dinov3"
],
"teachers_dim": [
1152,
1024
],
"temporal_patch_size": 1,
"transformers_version": "4.57.1",
"use_flex_attn": true,
"use_qk_norm": true,
"use_tok_norm": true
}