SequentialLearning
/

SuperLinear

@@ -4,6 +4,7 @@ import torch, torch.nn as nn, torch.nn.functional as F
 from transformers                          import (PreTrainedModel,GenerationMixin,AutoConfig,AutoModelForCausalLM,)
 from transformers.modeling_outputs         import CausalLMOutputWithCrossAttentions
 from .configuration_super_linear           import SuperLinearConfig
 import datetime
@@ -562,39 +563,8 @@ class SuperLinearForCausalLM(PreTrainedModel, GenerationMixin):
         self.revin_layer = RevIN(num_features = None, affine=False, norm_type = None, subtract_last = False)
         self.post_init()
-    # ------------------------------------------------------------------
-    # Forward pass expected by AutoModelForCausalLM
-    # ------------------------------------------------------------------
-    def upsample_dim1(self, x, target_len: int = 512, mode: str = "bicubic"):
-        # -------- bring the dim-1 axis to the PyTorch 1-D “length” position --------
-        orig_shape = x.shape
-        ndim       = x.ndim
-        # Reshape to (N, C, L) where L is the axis we want to scale
-        if ndim == 1:                                  # (L,)
-            x_ = x.unsqueeze(0).unsqueeze(0)           # (1,1,L)
-            unstack = lambda t: t.squeeze(0).squeeze(0)
-        elif ndim == 2:                                # (L,C)  or  (C,L)
-            if orig_shape[0] == 48:                    # assume (L,C)
-                x_ = x.permute(1, 0).unsqueeze(0)      # (1,C,L)
-                unstack = lambda t: t.squeeze(0).permute(1, 0)
-            else:                                      # assume (C,L)
-                x_ = x.unsqueeze(0)                    # (1,C,L)
-                unstack = lambda t: t.squeeze(0)
-        else:                                          # ≥3 dims, assume (B,L,C, …) with L at dim-1
-            x_ = x.transpose(1, 2)                     # (B,C,L,...)
-            new_order = list(range(ndim))
-            new_order[1], new_order[2] = 2, 1          # swap back later
-            unstack = lambda t: t.permute(*new_order)
-        # ------------------ actual interpolation in length dimension --------------
-        y = F.interpolate(x_, size=target_len, mode=mode, align_corners=False)
-        # ------------------ restore original dimension ordering -------------------
-        return unstack(y)
     def fourier_interp_dim1(self,x, target_len: int = 512):
         L = x.size(1)
@@ -621,7 +591,12 @@ class SuperLinearForCausalLM(PreTrainedModel, GenerationMixin):
         return y
@@ -643,7 +618,7 @@ class SuperLinearForCausalLM(PreTrainedModel, GenerationMixin):
         if x_enc.shape[1] < 512:
             #x_enc = self.revin_layer(x_enc, 'norm')
-            #x_enc = self.fourier_interp_dim1(x_enc)
             pass
@@ -665,3 +640,4 @@ class SuperLinearForCausalLM(PreTrainedModel, GenerationMixin):
     def _reorder_cache(self, past, beam_idx, **kwargs):
         return past  # backbone keeps no KV cache

 from transformers                          import (PreTrainedModel,GenerationMixin,AutoConfig,AutoModelForCausalLM,)
 from transformers.modeling_outputs         import CausalLMOutputWithCrossAttentions
 from .configuration_super_linear           import SuperLinearConfig
+from torch.nn.functional import interpolate
 import datetime
         self.revin_layer = RevIN(num_features = None, affine=False, norm_type = None, subtract_last = False)
         self.post_init()
     def fourier_interp_dim1(self,x, target_len: int = 512):
         L = x.size(1)
         return y
+    def upsample_interpolate(self,x, target_len: int = 512):
+        scale_factor = 512/x.shape[1]
+        upsample     = interpolate(x, scale_factor=scale_factor, mode='linear').permute(0,2,1)[:, -500:, :]
+        print(upsample.shape)
         if x_enc.shape[1] < 512:
             #x_enc = self.revin_layer(x_enc, 'norm')
+            x_enc = self.upsample_interpolate(x_enc)
             pass
     def _reorder_cache(self, past, beam_idx, **kwargs):
         return past  # backbone keeps no KV cache