SequentialLearning
/

SuperLinear

mixture-of-experts

Model card Files Files and versions

razmars commited on Apr 27, 2025

Commit

1c6415b

·

verified ·

1 Parent(s): 5e056b1

Update modeling_super_linear.py

Files changed (1) hide show

modeling_super_linear.py +13 -1

modeling_super_linear.py CHANGED Viewed

@@ -548,6 +548,18 @@ class SuperLinearForCausalLM(PreTrainedModel, GenerationMixin):
         # ------------------ restore original dimension ordering -------------------
         return unstack(y)
     def forward(self,
                 inputs_embeds: torch.Tensor = None,
                 attention_mask: Optional[torch.Tensor] = None,
@@ -564,7 +576,7 @@ class SuperLinearForCausalLM(PreTrainedModel, GenerationMixin):
         x_enc = inputs_embeds
         print(x_enc.shape)
         if x_enc.shape[1] < 512:
-            x_enc = self.upsample_dim1(x_enc)
         print(x_enc.shape)
         # backbone returns (B, pred_len, C)

         # ------------------ restore original dimension ordering -------------------
         return unstack(y)
+    def fourier_interp_dim1(self,x, target_len: int = 512):
+        L = x.size(1)
+        assert L == 48, "dim-1 length must be 48"
+        X      = torch.fft.rfft(x, dim=1)                   # (..., 25, ...)
+        pad    = target_len // 2 + 1 - X.size(1)
+        X_pad  = torch.cat([X, X.new_zeros(*X.shape[:-1], pad)], dim=1)
+        y      = torch.fft.irfft(X_pad, n=target_len, dim=1)
+        return y
     def forward(self,
                 inputs_embeds: torch.Tensor = None,
                 attention_mask: Optional[torch.Tensor] = None,
         x_enc = inputs_embeds
         print(x_enc.shape)
         if x_enc.shape[1] < 512:
+            x_enc = self.fourier_interp_dim1(x_enc)
         print(x_enc.shape)
         # backbone returns (B, pred_len, C)