change to constant strategy
Browse files
myolmoe/modeling_myolmoe.py
CHANGED
|
@@ -65,8 +65,8 @@ class OlmoeConfig(PretrainedConfig):
|
|
| 65 |
small_expert_intermediate_ratio=64,
|
| 66 |
small_expert_count=64,
|
| 67 |
small_expert_sparsity_coef=0.1,
|
| 68 |
-
small_expert_strategy="
|
| 69 |
-
max_small_expert_count=64,
|
| 70 |
**kwargs,
|
| 71 |
):
|
| 72 |
self.vocab_size = vocab_size
|
|
|
|
| 65 |
small_expert_intermediate_ratio=64,
|
| 66 |
small_expert_count=64,
|
| 67 |
small_expert_sparsity_coef=0.1,
|
| 68 |
+
small_expert_strategy="constant", # increment
|
| 69 |
+
max_small_expert_count=64,
|
| 70 |
**kwargs,
|
| 71 |
):
|
| 72 |
self.vocab_size = vocab_size
|