SequentialLearning
/

SuperLinear

@@ -518,6 +518,36 @@ class SuperLinearForCausalLM(PreTrainedModel, GenerationMixin):
     # ------------------------------------------------------------------
     # Forward pass expected by AutoModelForCausalLM
     # ------------------------------------------------------------------
     def forward(self,
                 inputs_embeds: torch.Tensor = None,
                 attention_mask: Optional[torch.Tensor] = None,
@@ -532,13 +562,10 @@ class SuperLinearForCausalLM(PreTrainedModel, GenerationMixin):
         # backbone expects (B, C, L)
         x_enc = inputs_embeds
-        #print(x_enc.shape)
         if x_enc.shape[1] < 512:
-            if len(x_enc.shape) == 2:
-                x_enc = F.interpolate(x_enc.unsqueeze(0),size = 512,mode="linear", align_corners=False)
-            else:
-                x_enc = F.interpolate(x_enc,size = 512,mode="linear", align_corners=False)
         # backbone returns (B, pred_len, C)
         preds = self.backbone(x_enc)

     # ------------------------------------------------------------------
     # Forward pass expected by AutoModelForCausalLM
     # ------------------------------------------------------------------
+    def upsample_dim1(self, x, target_len: int = 512, mode: str = "linear"):
+        # -------- bring the dim-1 axis to the PyTorch 1-D “length” position --------
+        orig_shape = x.shape
+        ndim       = x.ndim
+        # Reshape to (N, C, L) where L is the axis we want to scale
+        if ndim == 1:                                  # (L,)
+            x_ = x.unsqueeze(0).unsqueeze(0)           # (1,1,L)
+            unstack = lambda t: t.squeeze(0).squeeze(0)
+        elif ndim == 2:                                # (L,C)  or  (C,L)
+            if orig_shape[0] == 48:                    # assume (L,C)
+                x_ = x.permute(1, 0).unsqueeze(0)      # (1,C,L)
+                unstack = lambda t: t.squeeze(0).permute(1, 0)
+            else:                                      # assume (C,L)
+                x_ = x.unsqueeze(0)                    # (1,C,L)
+                unstack = lambda t: t.squeeze(0)
+        else:                                          # ≥3 dims, assume (B,L,C, …) with L at dim-1
+            x_ = x.transpose(1, 2)                     # (B,C,L,...)
+            new_order = list(range(ndim))
+            new_order[1], new_order[2] = 2, 1          # swap back later
+            unstack = lambda t: t.permute(*new_order)
+        # ------------------ actual interpolation in length dimension --------------
+        y = F.interpolate(x_, size=target_len, mode=mode, align_corners=False)
+        # ------------------ restore original dimension ordering -------------------
+        return unstack(y)
     def forward(self,
                 inputs_embeds: torch.Tensor = None,
                 attention_mask: Optional[torch.Tensor] = None,
         # backbone expects (B, C, L)
         x_enc = inputs_embeds
+        print(x_enc.shape)
         if x_enc.shape[1] < 512:
+            x_enc = self.upsample_dim1(x_enc)
+        print(x_enc.shape)
         # backbone returns (B, pred_len, C)
         preds = self.backbone(x_enc)