File size: 982 Bytes
8a2f1d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60bd814
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
{
  "model_type": "afmoe",
  "architectures": [
    "MOE"
  ],
  "profile_name": "1b_3e_8l_t4x2",
  "vocab_size": 200024,
  "text_embed_dim": 1024,
  "vision_embed_dim": 1024,
  "hidden_dim": 1024,
  "ffn_dim": 6144,
  "num_layers": 8,
  "num_heads": 16,
  "num_kv_heads": 4,
  "num_experts": 3,
  "top_k": 2,
  "max_position_embeddings": 16384,
  "router_aux_loss_coef": 0.01,
  "share_experts_across_layers": false,
  "gradient_checkpointing": true,
  "num_agents": 4,
  "moe_capacity_factor": 1.0,
  "moe_hierarchy_groups": 1,
  "moe_hierarchy_top_k": 1,
  "num_shared_experts": 0,
  "load_balancing_mode": "aux_free",
  "router_bias_update_rate": 0.01,
  "kv_latent_dim": 128,
  "kv_cache_dtype": "int4",
  "rope_training_context": 16384,
  "rope_ntk_alpha": 1.0,
  "rope_yarn_scale": 1.0,
  "ring_attention_chunk_size": 0,
  "prefill_chunk_size": 256,
  "use_q_former_projector": true,
  "q_former_queries": 8,
  "q_former_layers": 1,
  "tokenizer_name": "ai-tokenizer:GPT-5"
}