OpenLab-NLP
/

model-prototype

Model card Files Files and versions

Yuchan commited on Nov 22, 2025

Commit

e5497f3

·

verified ·

1 Parent(s): 1f4c0fc

Update AlphaS2S.py

Files changed (1) hide show

AlphaS2S.py +1 -5

AlphaS2S.py CHANGED Viewed

@@ -222,7 +222,7 @@ class LoU(layers.Layer):
         self.alpha_linear = layers.Dense(1, activation='sigmoid', dtype='float32')
         self.cross = CrossBlock()
-        self.glu = SwiGLU(d_model, 320)
     def _ema_over_time(self, score, alpha_dynamic):
         seq = tf.transpose(score, perm=[1, 0, 2])
@@ -253,10 +253,6 @@ class LoU(layers.Layer):
         q = self.Q(x_f32)
         k = self.K(x_f32)
         V = self.V(x_f32)
-        # Unidirectional Masking: 미래 정보를 막는 Look-ahead Mask를 수동으로 적용해야 하지만,
-        # 기존 LoU 구현은 Self-Attention이 아니므로 Skip.
         g_q = (tf.nn.tanh(q) + 1.0) / 2.0
         g_k = (tf.nn.tanh(k) + 1.0) / 2.0
         score = g_q * g_k

         self.alpha_linear = layers.Dense(1, activation='sigmoid', dtype='float32')
         self.cross = CrossBlock()
+        self.glu = SwiGLU(d_model, d_model)
     def _ema_over_time(self, score, alpha_dynamic):
         seq = tf.transpose(score, perm=[1, 0, 2])
         q = self.Q(x_f32)
         k = self.K(x_f32)
         V = self.V(x_f32)
         g_q = (tf.nn.tanh(q) + 1.0) / 2.0
         g_k = (tf.nn.tanh(k) + 1.0) / 2.0
         score = g_q * g_k