Update modeling_super_linear.py
Browse files- modeling_super_linear.py +3 -2
modeling_super_linear.py
CHANGED
|
@@ -347,7 +347,7 @@ class SparseNoisyMoE(nn.Module):
|
|
| 347 |
x_0 = x
|
| 348 |
|
| 349 |
self.gate_outputs = self.gating_network(x_0)
|
| 350 |
-
print(self.gate_outputs.shape)
|
| 351 |
|
| 352 |
if not self.training:
|
| 353 |
self.gate_outputs = self.gate_outputs / self.moe_temp
|
|
@@ -519,7 +519,8 @@ class superLinear(nn.Module):
|
|
| 519 |
x = x_enc.permute(0, 2, 1)
|
| 520 |
B, V, L = x.shape
|
| 521 |
else:
|
| 522 |
-
x
|
|
|
|
| 523 |
B, L = x.shape
|
| 524 |
V = 1
|
| 525 |
|
|
|
|
| 347 |
x_0 = x
|
| 348 |
|
| 349 |
self.gate_outputs = self.gating_network(x_0)
|
| 350 |
+
#print(self.gate_outputs.shape)
|
| 351 |
|
| 352 |
if not self.training:
|
| 353 |
self.gate_outputs = self.gate_outputs / self.moe_temp
|
|
|
|
| 519 |
x = x_enc.permute(0, 2, 1)
|
| 520 |
B, V, L = x.shape
|
| 521 |
else:
|
| 522 |
+
x = x_enc
|
| 523 |
+
x_enc = x_enc.unsqueeze(-1)
|
| 524 |
B, L = x.shape
|
| 525 |
V = 1
|
| 526 |
|