remove num_small_expert overwrite
Browse files- myolmoe/modeling_myolmoe.py +0 -11
myolmoe/modeling_myolmoe.py
CHANGED
|
@@ -556,17 +556,6 @@ class OlmoeSparseMoeBlock(nn.Module):
|
|
| 556 |
self.top_k = config.num_experts_per_tok
|
| 557 |
self.norm_topk_prob = config.norm_topk_prob
|
| 558 |
|
| 559 |
-
# Determine if this block is in the second half
|
| 560 |
-
in_second_half = layer_idx >= self.total_layers // 2
|
| 561 |
-
|
| 562 |
-
# Determine small expert count for this layer
|
| 563 |
-
if in_second_half:
|
| 564 |
-
second_half_idx = layer_idx - (self.total_layers // 2)
|
| 565 |
-
num_second_half_blocks = self.total_layers - (self.total_layers // 2)
|
| 566 |
-
self.num_small_experts = config.max_small_expert_count // num_second_half_blocks
|
| 567 |
-
else:
|
| 568 |
-
self.num_small_experts = 0
|
| 569 |
-
|
| 570 |
self.experts = nn.ModuleList([OlmoeMLP(config) for _ in range(self.num_experts)])
|
| 571 |
self.small_experts = nn.ModuleList([
|
| 572 |
OlmoeMLP(config, is_small=True) for _ in range(self.num_small_experts)
|
|
|
|
| 556 |
self.top_k = config.num_experts_per_tok
|
| 557 |
self.norm_topk_prob = config.norm_topk_prob
|
| 558 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 559 |
self.experts = nn.ModuleList([OlmoeMLP(config) for _ in range(self.num_experts)])
|
| 560 |
self.small_experts = nn.ModuleList([
|
| 561 |
OlmoeMLP(config, is_small=True) for _ in range(self.num_small_experts)
|