| { | |
| "method": "drop_upcycling", | |
| "drop_ratio": 0.66, | |
| "num_experts_per_layer": 288, | |
| "base_experts_from_ffn": 12, | |
| "replication_factor": 24, | |
| "total_experts": 8064, | |
| "source_ffn_dim": 8960, | |
| "target_expert_dim": 768, | |
| "layers": 28, | |
| "total_params": 28938036224, | |
| "total_params_calculated": 28860965888, | |
| "active_params_per_forward": 917464064, | |
| "activation_ratio": 0.03170443415365876, | |
| "expert_params": 3538944, | |
| "total_expert_params": 28538044416 | |
| } |