Charlie81 commited on
Commit
d05c72b
·
1 Parent(s): ae4c1ca

remove strategies

Browse files
Files changed (1) hide show
  1. myolmoe/modeling_myolmoe.py +1 -12
myolmoe/modeling_myolmoe.py CHANGED
@@ -65,7 +65,6 @@ class OlmoeConfig(PretrainedConfig):
65
  small_expert_intermediate_ratio=64,
66
  small_expert_count=64,
67
  small_expert_sparsity_coef=0.1,
68
- small_expert_strategy="constant", # increment
69
  max_small_expert_count=64,
70
  **kwargs,
71
  ):
@@ -100,7 +99,6 @@ class OlmoeConfig(PretrainedConfig):
100
  self.small_expert_intermediate_ratio = small_expert_intermediate_ratio
101
  self.small_expert_count = small_expert_count
102
  self.small_expert_sparsity_coef = small_expert_sparsity_coef
103
- self.small_expert_strategy = small_expert_strategy
104
  self.max_small_expert_count = max_small_expert_count
105
 
106
  # Validate the correctness of rotary position embeddings parameters
@@ -565,16 +563,7 @@ class OlmoeSparseMoeBlock(nn.Module):
565
  if in_second_half:
566
  second_half_idx = layer_idx - (self.total_layers // 2)
567
  num_second_half_blocks = self.total_layers - (self.total_layers // 2)
568
-
569
- if config.small_expert_strategy == "constant":
570
- self.num_small_experts = config.max_small_expert_count // num_second_half_blocks
571
- elif config.small_expert_strategy == "increment":
572
- # Linearly scale small experts from 1 to max_small_expert_count
573
- self.num_small_experts = (
574
- (second_half_idx + 1) * config.max_small_expert_count // ((num_second_half_blocks * (num_second_half_blocks + 1)) // 2)
575
- )
576
- else:
577
- raise ValueError(f"Unknown strategy: {config.small_expert_strategy}")
578
  else:
579
  self.num_small_experts = 0
580
 
 
65
  small_expert_intermediate_ratio=64,
66
  small_expert_count=64,
67
  small_expert_sparsity_coef=0.1,
 
68
  max_small_expert_count=64,
69
  **kwargs,
70
  ):
 
99
  self.small_expert_intermediate_ratio = small_expert_intermediate_ratio
100
  self.small_expert_count = small_expert_count
101
  self.small_expert_sparsity_coef = small_expert_sparsity_coef
 
102
  self.max_small_expert_count = max_small_expert_count
103
 
104
  # Validate the correctness of rotary position embeddings parameters
 
563
  if in_second_half:
564
  second_half_idx = layer_idx - (self.total_layers // 2)
565
  num_second_half_blocks = self.total_layers - (self.total_layers // 2)
566
+ self.num_small_experts = config.max_small_expert_count // num_second_half_blocks
 
 
 
 
 
 
 
 
 
567
  else:
568
  self.num_small_experts = 0
569