SequentialLearning
/

SuperLinear

mixture-of-experts

Model card Files Files and versions

razmars commited on Apr 28, 2025

Commit

a0f4ed0

·

verified ·

1 Parent(s): 4b63879

Update modeling_super_linear.py

Files changed (1) hide show

modeling_super_linear.py +2 -4

modeling_super_linear.py CHANGED Viewed

@@ -360,8 +360,8 @@ class SparseNoisyMoE(nn.Module):
         self.topk_gates = F.softmax(self.topk_values, dim=1)
         batch_size = x.size(0)
-        if x.shape[1] < 512:
-            x = self.fourier_interp_dim1(x)
         expert_outputs = torch.stack([self.experts[i](x) for i in range(self.num_experts)], dim=1)
         topk_indices_expanded = topk_indices.unsqueeze(-1).expand(-1, -1, expert_outputs.size(2))
@@ -639,7 +639,6 @@ class SuperLinearForCausalLM(PreTrainedModel, GenerationMixin):
         # backbone expects (B, C, L)
         x_enc = inputs_embeds
         if x_enc.shape[1] < 512:
@@ -647,7 +646,6 @@ class SuperLinearForCausalLM(PreTrainedModel, GenerationMixin):
             #x_enc = self.fourier_interp_dim1(x_enc)
             pass
-        #self.backbone.inf_pred_len = 336
         # backbone returns (B, pred_len, C)

         self.topk_gates = F.softmax(self.topk_values, dim=1)
         batch_size = x.size(0)
+        '''if x.shape[1] < 512:
+            x = self.fourier_interp_dim1(x)'''
         expert_outputs = torch.stack([self.experts[i](x) for i in range(self.num_experts)], dim=1)
         topk_indices_expanded = topk_indices.unsqueeze(-1).expand(-1, -1, expert_outputs.size(2))
         # backbone expects (B, C, L)
         x_enc = inputs_embeds
         if x_enc.shape[1] < 512:
             #x_enc = self.fourier_interp_dim1(x_enc)
             pass
         # backbone returns (B, pred_len, C)