{ "method": "drop_upcycling", "drop_ratio": 0.66, "num_experts_per_layer": 288, "base_experts_from_ffn": 12, "replication_factor": 24, "total_experts": 8064, "source_ffn_dim": 8960, "target_expert_dim": 768, "layers": 28, "total_params": 28938036224, "total_params_calculated": 28860965888, "active_params_per_forward": 917464064, "activation_ratio": 0.03170443415365876, "expert_params": 3538944, "total_expert_params": 28538044416 }