| from enum import Enum, auto | |
| from typing import Optional | |
| import torch | |
| from sglang.srt.elastic_ep.elastic_ep import ElasticEPStateManager | |
| from sglang.srt.eplb.eplb_algorithms import deepseek, deepseek_vec, elasticity_aware | |
| class EplbAlgorithm(Enum): | |
| deepseek = auto() | |
| deepseek_hierarchical = auto() | |
| deepseek_vec = auto() | |
| deepseek_vec_hierarchical = auto() | |
| elasticity_aware = auto() | |
| # TODO may have more algorithm later | |
| def rebalance_experts( | |
| tokens_per_expert: torch.Tensor, | |
| num_physical_experts: int, | |
| num_local_physical_experts: int, | |
| num_groups: Optional[int], | |
| num_nodes: int, | |
| algorithm: EplbAlgorithm, | |
| ): | |
| if algorithm in [EplbAlgorithm.deepseek, EplbAlgorithm.deepseek_hierarchical]: | |
| return deepseek.rebalance_experts( | |
| weight=tokens_per_expert.sum(dim=0), | |
| num_replicas=num_physical_experts, | |
| num_groups=num_groups, | |
| num_nodes=num_nodes, | |
| num_gpus=num_physical_experts // num_local_physical_experts, | |
| enable_hierarchical=algorithm == EplbAlgorithm.deepseek_hierarchical, | |
| ) | |
| if algorithm in [ | |
| EplbAlgorithm.deepseek_vec, | |
| EplbAlgorithm.deepseek_vec_hierarchical, | |
| ]: | |
| return deepseek_vec.rebalance_experts( | |
| tokens_per_expert=tokens_per_expert, | |
| num_physical_experts=num_physical_experts, | |
| num_local_physical_experts=num_local_physical_experts, | |
| num_groups=num_groups, | |
| num_nodes=num_nodes, | |
| enable_hierarchical=algorithm == EplbAlgorithm.deepseek_vec_hierarchical, | |
| ) | |
| if algorithm == EplbAlgorithm.elasticity_aware: | |
| return elasticity_aware.rebalance_experts( | |
| weight=tokens_per_expert.sum(dim=0), | |
| num_replicas=num_physical_experts, | |
| num_groups=num_groups, | |
| num_nodes=num_nodes, | |
| num_gpus=num_physical_experts // num_local_physical_experts, | |
| enable_hierarchical=True, | |
| active_ranks=( | |
| ElasticEPStateManager.instance().active_ranks | |
| if ElasticEPStateManager.instance() is not None | |
| else ElasticEPStateManager.healthy_rank_state() | |
| ), | |
| ) | |
| raise NotImplementedError | |
| def compute_algorithm( | |
| raw_algorithm: str, | |
| num_groups: Optional[int], | |
| num_nodes: int, | |
| ) -> EplbAlgorithm: | |
| if raw_algorithm != "auto": | |
| return EplbAlgorithm[raw_algorithm] | |
| # TODO test on real scenarios and know which ones perform better | |
| if (num_groups is not None) and (num_groups % num_nodes == 0): | |
| return EplbAlgorithm.deepseek_hierarchical | |
| else: | |
| return EplbAlgorithm.deepseek | |
Xet Storage Details
- Size:
- 2.7 kB
- Xet hash:
- aa249089fcaa387a344ede11f34173fd8527aa39b1701aa289c02ab277ec0b4d
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.