File size: 701 Bytes

cd16f07

{
  "vocab_size": 50257,
  "max_seq_len": 2048,
  "d_model": 1024,
  "n_layers": 10,
  "n_heads": 16,
  "ff_mult": 4,
  "dropout": 0.1,
  "recurse_steps": 6,
  "critique_threshold": 0.2,
  "tie_embeddings": true,
  "use_moe": true,
  "moe_num_experts": 32,
  "moe_top_k": 1,
  "moe_expert_hidden": 1280,
  "moe_router_jitter": 0.01,
  "moe_aux_loss_weight": 0.01,
  "use_layer_skip": true,
  "layer_skip_threshold": 0.8,
  "layer_skip_target": 0.03,
  "layer_skip_aux_weight": 0.01,
  "use_ternary_weights": true,
  "use_flash_attention": true,
  "use_fused_ops": true,
  "packed_execution": true,
  "use_torch_compile": false,
  "moe_backend": "auto",
  "moe_ep_size": 1
}