OpenLab-NLP
/

model-prototype

Model card Files Files and versions

Yuchan commited on Nov 22, 2025

Commit

f566493

·

verified ·

1 Parent(s): ee7e7de

Update AlphaS2S.py

Files changed (1) hide show

AlphaS2S.py +8 -2

AlphaS2S.py CHANGED Viewed

@@ -243,7 +243,13 @@ class CrossBlock(layers.Layer):
         # a의 shape: (Batch, Seq_len, D_model)
         g_q = (tf.nn.tanh(x) + 1.0) / 2.0
         g_k = (tf.nn.tanh(z) + 1.0) / 2.0
-        y = (g_q * g_k) * z
         return y
 class LoU(layers.Layer):
@@ -258,7 +264,7 @@ class LoU(layers.Layer):
         self.norm = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
         self.norm1 = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
-        self.glu = SwiGLU(d_model, d_model)
         self.cross = CrossBlock()
     # LoU는 원래 Uni-directional Attention/Recurrent Block 역할

         # a의 shape: (Batch, Seq_len, D_model)
         g_q = (tf.nn.tanh(x) + 1.0) / 2.0
         g_k = (tf.nn.tanh(z) + 1.0) / 2.0
+        score = (g_q * g_k)
+        score = tf.cumsum(score, axis=1)
+        mean_last = tf.reduce_mean(score, axis=-1, keepdims=True)
+        denom = tf.maximum(mean_last, self.eps)
+        score_norm = score / denom
+        score_clipped = tf.clip_by_value(score_norm, -self.clip_value, self.clip_value)
+        y = score_clipped * z
         return y
 class LoU(layers.Layer):
         self.norm = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
         self.norm1 = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
+        self.glu = SwiGLU(d_model, 320)
         self.cross = CrossBlock()
     # LoU는 원래 Uni-directional Attention/Recurrent Block 역할