OpenLab-NLP
/

model-prototype

Model card Files Files and versions

xet

Community

Yuchan commited on Nov 22, 2025

Commit

83f6465

verified ·

1 Parent(s): e5497f3

Update AlphaS2S.py

Browse files

Files changed (1) hide show

AlphaS2S.py +40 -14

AlphaS2S.py CHANGED Viewed

@@ -192,20 +192,46 @@ class CrossBlock(layers.Layer):
         y = a * x + (1.0 - a) * z
         return y
-class EncoderBlock(layers.Layer):
-    def __init__(self, d_model, num_heads, dff, dropout=0.1):
         super().__init__()
-        self.mha = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
-        self.ffn = SwiGLU(d_model, 320)
-        self.norm1 = layers.LayerNormalization(epsilon=1e-6)
-        self.norm2 = layers.LayerNormalization(epsilon=1e-6)
-        self.dropout1 = layers.Dropout(dropout)
-        self.dropout2 = layers.Dropout(dropout)
-    def call(self, x, mask=None, training=False):
-        attn_out = self.dropout1(self.mha(x, x, x, attention_mask=mask), training=training)
-        out1 = self.norm1(x + attn_out)
-        ffn_out = self.dropout2(self.ffn(out1), training=training)
-        return self.norm2(out1 + ffn_out)
 class LoU(layers.Layer):
     def __init__(self, d_model, clip_value=5.0, eps=1e-6):
@@ -288,7 +314,7 @@ class AlphaS2S(tf.keras.Model):
         self.dec_pos_embedding = layers.Embedding(max_len, d_model)
         # EncoderBlock과 LoU는 기존 코드와 동일한 구조
-        self.enc_layers = [EncoderBlock(d_model, num_heads, d_model * 4, dropout) for _ in range(num_layers)]
         self.dec_layers = [LoU(d_model) for _ in range(num_layers)]
         self.final_layer = layers.Dense(target_vocab_size, use_bias=False)

         y = a * x + (1.0 - a) * z
         return y
+class gMLPBlock(layers.Layer):
+    def __init__(self, d_model, seq_len, dropout=0.1):
         super().__init__()
+        self.norm = layers.LayerNormalization(epsilon=1e-6)
+        self.channel_proj = layers.Dense(d_model * 4, use_bias=True)
+        self.dropout = layers.Dropout(dropout)
+        # Spatial Gating Unit (SGU)
+        self.sgu_norm = layers.LayerNormalization(epsilon=1e-6)
+        self.sgu_proj = layers.Dense(seq_len, use_bias=False)
+        self.sgu_final = layers.Dense(d_model, use_bias=True)
+        self.out_proj = layers.Dense(d_model, use_bias=True)
+    def call(self, x, training=False):
+        # 1. Channel Projection (Expansion)
+        residual = x
+        x = self.norm(x)
+        x = self.channel_proj(x)
+        # 2. Split into Gated and Value Streams
+        u, v = tf.split(x, 2, axis=-1)
+        # 3. Spatial Gating Unit (SGU)
+        # SGU는 채널(d_model) 축으로 순전파하며, 시퀀스(seq_len) 축으로 게이팅을 수행
+        v_norm = self.sgu_norm(v)
+        v_norm_T = tf.transpose(v_norm, perm=[0, 2, 1])
+        v_proj = self.sgu_proj(v_norm_T)
+        v_proj_T = tf.transpose(v_proj, perm=[0, 2, 1])
+        v_gate = self.sgu_final(v_proj_T)
+        # 4. Gating (Element-wise multiplication)
+        z = u * v_gate
+        # 5. Output Projection (Contraction)
+        z = self.dropout(z, training=training)
+        out = self.out_proj(z)
+        # 6. Residual Connection
+        return residual + out
 class LoU(layers.Layer):
     def __init__(self, d_model, clip_value=5.0, eps=1e-6):
         self.dec_pos_embedding = layers.Embedding(max_len, d_model)
         # EncoderBlock과 LoU는 기존 코드와 동일한 구조
+        self.enc_layers = [gMLPBlock(d_model, seq_len=max_len) for _ in range(num_layers)]
         self.dec_layers = [LoU(d_model) for _ in range(num_layers)]
         self.final_layer = layers.Dense(target_vocab_size, use_bias=False)