Charlie81
/

LoRE

TensorBoard

Safetensors

Model card Files Files and versions

xet

Metrics Training metrics Community

Charlie81 commited on Aug 28, 2025

Commit

d05c72b

1 Parent(s): ae4c1ca

remove strategies

Browse files

Files changed (1) hide show

myolmoe/modeling_myolmoe.py +1 -12

myolmoe/modeling_myolmoe.py CHANGED Viewed

@@ -65,7 +65,6 @@ class OlmoeConfig(PretrainedConfig):
         small_expert_intermediate_ratio=64,
         small_expert_count=64,
         small_expert_sparsity_coef=0.1,
-        small_expert_strategy="constant",  # increment
         max_small_expert_count=64,
         **kwargs,
     ):
@@ -100,7 +99,6 @@ class OlmoeConfig(PretrainedConfig):
         self.small_expert_intermediate_ratio = small_expert_intermediate_ratio
         self.small_expert_count = small_expert_count
         self.small_expert_sparsity_coef = small_expert_sparsity_coef
-        self.small_expert_strategy = small_expert_strategy
         self.max_small_expert_count = max_small_expert_count
         # Validate the correctness of rotary position embeddings parameters
@@ -565,16 +563,7 @@ class OlmoeSparseMoeBlock(nn.Module):
         if in_second_half:
             second_half_idx = layer_idx - (self.total_layers // 2)
             num_second_half_blocks = self.total_layers - (self.total_layers // 2)
-            if config.small_expert_strategy == "constant":
-                self.num_small_experts = config.max_small_expert_count // num_second_half_blocks
-            elif config.small_expert_strategy == "increment":
-                # Linearly scale small experts from 1 to max_small_expert_count
-                self.num_small_experts = (
-                    (second_half_idx + 1) * config.max_small_expert_count // ((num_second_half_blocks * (num_second_half_blocks + 1)) // 2)
-                )
-            else:
-                raise ValueError(f"Unknown strategy: {config.small_expert_strategy}")
         else:
             self.num_small_experts = 0

         small_expert_intermediate_ratio=64,
         small_expert_count=64,
         small_expert_sparsity_coef=0.1,
         max_small_expert_count=64,
         **kwargs,
     ):
         self.small_expert_intermediate_ratio = small_expert_intermediate_ratio
         self.small_expert_count = small_expert_count
         self.small_expert_sparsity_coef = small_expert_sparsity_coef
         self.max_small_expert_count = max_small_expert_count
         # Validate the correctness of rotary position embeddings parameters
         if in_second_half:
             second_half_idx = layer_idx - (self.total_layers // 2)
             num_second_half_blocks = self.total_layers - (self.total_layers // 2)
+            self.num_small_experts = config.max_small_expert_count // num_second_half_blocks
         else:
             self.num_small_experts = 0