Charlie81
/

ThinExperts

Model card Files Files and versions

Charlie81 commited on Jun 9, 2025

Commit

e868dbf

·

1 Parent(s): c085dea

2 new depth routing types

Files changed (1) hide show

myolmoe/modeling_myolmoe.py +8 -1

myolmoe/modeling_myolmoe.py CHANGED Viewed

@@ -447,8 +447,9 @@ OLMOE_ATTENTION_CLASSES = {
 class OlmoeSparseMoeBlock(nn.Module):
-    def __init__(self, config):
         super().__init__()
         self.num_experts = config.num_experts
         self.top_k = config.num_experts_per_tok
         self.norm_topk_prob = config.norm_topk_prob
@@ -481,6 +482,12 @@ class OlmoeSparseMoeBlock(nn.Module):
             sorted_weights, sorted_indices = torch.sort(routing_probs, dim=-1, descending=True)
             selected_experts = sorted_indices[:, ::self.n_step][:, :self.top_k]
             routing_weights = routing_probs.gather(1, selected_experts)
         else:
             raise ValueError(f"Unknown routing type: {self.routing_type}")

 class OlmoeSparseMoeBlock(nn.Module):
+    def __init__(self, config, layer_idx: int):
         super().__init__()
+        self.layer_idx = layer_idx
         self.num_experts = config.num_experts
         self.top_k = config.num_experts_per_tok
         self.norm_topk_prob = config.norm_topk_prob
             sorted_weights, sorted_indices = torch.sort(routing_probs, dim=-1, descending=True)
             selected_experts = sorted_indices[:, ::self.n_step][:, :self.top_k]
             routing_weights = routing_probs.gather(1, selected_experts)
+        elif self.routing_type == "depthconstant":
+            effective_top_k = max(1, self.top_k - (self.layer_idx // 2))
+            routing_weights, selected_experts = torch.topk(routing_probs, effective_top_k, dim=-1)
+        elif self.routing_type == "depthlatter":
+            effective_top_k = self.top_k if self.layer_idx < 8 else max(1, self.top_k + 8 - self.layer_idx)
+            routing_weights, selected_experts = torch.topk(routing_probs, effective_top_k, dim=-1)
         else:
             raise ValueError(f"Unknown routing type: {self.routing_type}")