OpenLab-NLP
/

model-prototype

Model card Files Files and versions

Yuchan commited on Nov 22, 2025

Commit

ee7e7de

·

verified ·

1 Parent(s): 68d51a1

Update AlphaS2S.py

Files changed (1) hide show

AlphaS2S.py +3 -25

AlphaS2S.py CHANGED Viewed

@@ -258,30 +258,9 @@ class LoU(layers.Layer):
         self.norm = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
         self.norm1 = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
-        self.alpha_linear = layers.Dense(1, activation='sigmoid', dtype='float32')
         self.glu = SwiGLU(d_model, d_model)
         self.cross = CrossBlock()
-    def _ema_over_time(self, score, alpha_dynamic):
-        seq = tf.transpose(score, perm=[1, 0, 2])
-        alpha_seq = tf.transpose(alpha_dynamic, perm=[1, 0, 2])
-        def step(prev_ema, inputs):
-            x_t, alpha_t = inputs
-            new = alpha_t * x_t + (1.0 - alpha_t) * prev_ema
-            return new
-        init = seq[0]
-        first_alpha = alpha_seq[0]
-        remaining_seq = seq[1:]
-        remaining_alpha = alpha_seq[1:]
-        elems = (remaining_seq, remaining_alpha)
-        # tf.scan을 사용하여 시계열 EMA 계산
-        ema_seq = tf.scan(fn=step, elems=elems, initializer=init)
-        ema_seq = tf.concat([tf.expand_dims(init, 0), ema_seq], axis=0)
-        ema = tf.transpose(ema_seq, perm=[1, 0, 2])
-        return ema
     # LoU는 원래 Uni-directional Attention/Recurrent Block 역할
     def call(self, x, z):
         x_f32 = tf.cast(x, tf.float32)
@@ -295,11 +274,10 @@ class LoU(layers.Layer):
         g_k = (tf.nn.tanh(k) + 1.0) / 2.0
         score = g_q * g_k
-        alpha_dynamic = self.alpha_linear(x_f32)
-        score_ema = self._ema_over_time(score, alpha_dynamic)
-        mean_last = tf.reduce_mean(score_ema, axis=-1, keepdims=True)
         denom = tf.maximum(mean_last, self.eps)
-        score_norm = score_ema / denom
         score_clipped = tf.clip_by_value(score_norm, -self.clip_value, self.clip_value)
         x_comb = score_clipped * V

         self.norm = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
         self.norm1 = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
         self.glu = SwiGLU(d_model, d_model)
         self.cross = CrossBlock()
     # LoU는 원래 Uni-directional Attention/Recurrent Block 역할
     def call(self, x, z):
         x_f32 = tf.cast(x, tf.float32)
         g_k = (tf.nn.tanh(k) + 1.0) / 2.0
         score = g_q * g_k
+        score = tf.cumsum(score, axis=1)
+        mean_last = tf.reduce_mean(score, axis=-1, keepdims=True)
         denom = tf.maximum(mean_last, self.eps)
+        score_norm = score / denom
         score_clipped = tf.clip_by_value(score_norm, -self.clip_value, self.clip_value)
         x_comb = score_clipped * V