OpenLab-NLP
/

model-prototype

Model card Files Files and versions

xet

Community

OpenLab-NLP commited on Dec 4, 2025

Commit

7f8fd1d

verified ·

1 Parent(s): 67804dc

Update Test.py

Browse files

Files changed (1) hide show

Test.py +31 -14

Test.py CHANGED Viewed

@@ -66,13 +66,18 @@ dataset = tf.data.Dataset.from_generator(
 class EncoderBlock(tf.keras.layers.Layer):
     def __init__(self, embed_dim=EMBED_DIM, ff_dim=1152, seq_len=MAX_LEN):
         super().__init__()
         self.fc1 = layers.Dense(ff_dim)
         self.fc2 = layers.Dense(embed_dim)
         self.fc3 = layers.Dense(ff_dim)
         self.fc4 = layers.Dense(embed_dim)
         self.w_proj = self.add_weight(
-            shape=(embed_dim, embed_dim),
             initializer="glorot_uniform",
             trainable=True
         )
@@ -82,26 +87,38 @@ class EncoderBlock(tf.keras.layers.Layer):
         self.ln = layers.LayerNormalization(epsilon=1e-5)
         self.ln1 = layers.LayerNormalization(epsilon=1e-5)
         self.ln2 = layers.LayerNormalization(epsilon=1e-5)
     def call(self, x):
         x_norm = self.ln(x)
-        x = self.fc1(x_norm)
-        g, v = tf.split(x, 2, axis=-1)
-        x = tf.nn.silu(g) * v
-        x = self.fc2(x)
-        x = tf.matmul(x, x, transpose_b=True)          # (B,L,L)
-        x = tf.tensordot(x, self.w_proj, axes=[-1, 0]) # (B,L,D)
-        v = tf.nn.softmax(self.alpha2(v), axis=1) * x
         x_norm = x_norm + self.ln2(v)
-        x = self.fc3(x_norm)
-        g, v = tf.split(x, 2, axis=-1)
-        x = tf.nn.silu(g) * v
-        x = self.fc4(x)
-        return x_norm + self.ln1(x)
 class L2NormLayer(layers.Layer):

 class EncoderBlock(tf.keras.layers.Layer):
     def __init__(self, embed_dim=EMBED_DIM, ff_dim=1152, seq_len=MAX_LEN):
         super().__init__()
+        self.embed_dim = embed_dim
+        self.seq_len = seq_len
         self.fc1 = layers.Dense(ff_dim)
         self.fc2 = layers.Dense(embed_dim)
         self.fc3 = layers.Dense(ff_dim)
         self.fc4 = layers.Dense(embed_dim)
+        # (seq_len, embed_dim)로 정의 — (L -> D) 투사용
         self.w_proj = self.add_weight(
+            name="w_proj_L_to_D",
+            shape=(seq_len, embed_dim),
             initializer="glorot_uniform",
             trainable=True
         )
         self.ln = layers.LayerNormalization(epsilon=1e-5)
         self.ln1 = layers.LayerNormalization(epsilon=1e-5)
         self.ln2 = layers.LayerNormalization(epsilon=1e-5)
     def call(self, x):
+        # x: (B, L, D)
         x_norm = self.ln(x)
+        h = self.fc1(x_norm)                    # (B, L, ff_dim)
+        g, v = tf.split(h, 2, axis=-1)          # (B, L, ff_dim/2) 각
+        h = tf.nn.silu(g) * v
+        h = self.fc2(h)                         # (B, L, D)
+        # --- matmul -> (B, L, L) ---
+        sim = tf.matmul(h, h, transpose_b=True)  # (B, L, L)
+        # (옵션) 정규화/스케일링 원하면 추가
+        sim = tf.nn.softmax(sim, axis=-1)        # (B, L, L)
+        # --- (B, L, L) -> (B, L, D) : tensordot axes 맞춰서 투사 ---
+        # w_proj: (L, D), sim last axis matches w_proj first axis
+        h2 = tf.tensordot(sim, self.w_proj, axes=[[2], [0]])  # (B, L, D)
+        # 이제 shape 맞음 — v와 element-wise 곱 가능
+        v_gate = tf.nn.softmax(self.alpha2(v), axis=1)        # (B, L, 1)
+        v = v_gate * h2                                       # (B, L, D)
         x_norm = x_norm + self.ln2(v)
+        z = self.fc3(x_norm)
+        g, v = tf.split(z, 2, axis=-1)
+        z = tf.nn.silu(g) * v
+        z = self.fc4(z)
+        return x_norm + self.ln1(z)
 class L2NormLayer(layers.Layer):