{ "vocab_size": 50257, "max_seq_len": 2048, "d_model": 1024, "n_layers": 10, "n_heads": 16, "ff_mult": 4, "dropout": 0.1, "recurse_steps": 6, "critique_threshold": 0.2, "tie_embeddings": true, "use_moe": true, "moe_num_experts": 32, "moe_top_k": 1, "moe_expert_hidden": 1280, "moe_router_jitter": 0.01, "moe_aux_loss_weight": 0.01, "use_layer_skip": true, "layer_skip_threshold": 0.8, "layer_skip_target": 0.03, "layer_skip_aux_weight": 0.01, "use_ternary_weights": true, "use_flash_attention": true, "use_fused_ops": true, "packed_execution": true, "use_torch_compile": false, "moe_backend": "auto", "moe_ep_size": 1 }