Update modeling_super_linear.py
Browse files- modeling_super_linear.py +2 -4
modeling_super_linear.py
CHANGED
|
@@ -360,8 +360,8 @@ class SparseNoisyMoE(nn.Module):
|
|
| 360 |
self.topk_gates = F.softmax(self.topk_values, dim=1)
|
| 361 |
|
| 362 |
batch_size = x.size(0)
|
| 363 |
-
if x.shape[1] < 512:
|
| 364 |
-
x = self.fourier_interp_dim1(x)
|
| 365 |
expert_outputs = torch.stack([self.experts[i](x) for i in range(self.num_experts)], dim=1)
|
| 366 |
|
| 367 |
topk_indices_expanded = topk_indices.unsqueeze(-1).expand(-1, -1, expert_outputs.size(2))
|
|
@@ -639,7 +639,6 @@ class SuperLinearForCausalLM(PreTrainedModel, GenerationMixin):
|
|
| 639 |
|
| 640 |
# backbone expects (B, C, L)
|
| 641 |
x_enc = inputs_embeds
|
| 642 |
-
|
| 643 |
|
| 644 |
|
| 645 |
if x_enc.shape[1] < 512:
|
|
@@ -647,7 +646,6 @@ class SuperLinearForCausalLM(PreTrainedModel, GenerationMixin):
|
|
| 647 |
#x_enc = self.fourier_interp_dim1(x_enc)
|
| 648 |
pass
|
| 649 |
|
| 650 |
-
#self.backbone.inf_pred_len = 336
|
| 651 |
|
| 652 |
# backbone returns (B, pred_len, C)
|
| 653 |
|
|
|
|
| 360 |
self.topk_gates = F.softmax(self.topk_values, dim=1)
|
| 361 |
|
| 362 |
batch_size = x.size(0)
|
| 363 |
+
'''if x.shape[1] < 512:
|
| 364 |
+
x = self.fourier_interp_dim1(x)'''
|
| 365 |
expert_outputs = torch.stack([self.experts[i](x) for i in range(self.num_experts)], dim=1)
|
| 366 |
|
| 367 |
topk_indices_expanded = topk_indices.unsqueeze(-1).expand(-1, -1, expert_outputs.size(2))
|
|
|
|
| 639 |
|
| 640 |
# backbone expects (B, C, L)
|
| 641 |
x_enc = inputs_embeds
|
|
|
|
| 642 |
|
| 643 |
|
| 644 |
if x_enc.shape[1] < 512:
|
|
|
|
| 646 |
#x_enc = self.fourier_interp_dim1(x_enc)
|
| 647 |
pass
|
| 648 |
|
|
|
|
| 649 |
|
| 650 |
# backbone returns (B, pred_len, C)
|
| 651 |
|