Yuchan commited on
Commit
696475d
·
verified ·
1 Parent(s): 0d505a8

Update Mo.py

Browse files
Files changed (1) hide show
  1. Mo.py +2 -15
Mo.py CHANGED
@@ -134,40 +134,27 @@ class LoU(layers.Layer):
134
          self.K = layers.Dense(d_model, dtype='float32')
135
          self.V = layers.Dense(d_model, dtype='float32')
136
          self.norm = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
137
-         self.norm1 = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
138
-         
139
          self.glu = SwiGLU(d_model, 320)
140
      def call(self, x):
141
          x_f32 = tf.cast(x, tf.float32)
142
          residual = x_f32
143
          x_f32 = self.norm1(x)
144
-
145
          q = self.Q(x_f32)
146
          k = self.K(x_f32)
147
          V = self.V(x_f32)
148
          g_q = (tf.nn.tanh(q) + 1.0) / 2.0
149
          g_k = (tf.nn.tanh(k) + 1.0) / 2.0
150
          score = g_q * g_k
151
-
152
-         score = tf.cumsum(score, axis=1) # (B, L, D)
153
-         
154
-         # 💡 수정된 부분: 현재 토큰까지의 누적합 평균으로 정규화
155
          seq_len = tf.shape(score)[1]
156
-         # [1, 2, 3, ..., L]을 D_model 차원으로 확장
157
          count_for_mean = tf.cast(tf.range(seq_len) + 1, score.dtype)
158
          count_for_mean = tf.reshape(count_for_mean, (1, seq_len, 1))
159
-         
160
-         # 누적합을 현재까지의 토큰 개수로 나누어 평균 누적합 계산 (B, L, D)
161
          score_mean = score / count_for_mean
162
-         
163
-         # 정규화 분모 설정
164
          denom = tf.maximum(score_mean, self.eps)
165
          score_norm = score / denom
166
-         # -----------------------------------------------
167
-
168
          score_clipped = tf.clip_by_value(score_norm, -self.clip_value, self.clip_value)
169
          x_comb = score_clipped * V
170
-         
171
          out = self.norm(x_comb + residual)
172
          out = self.glu(out)
173
          return tf.cast(out, x.dtype)
 
134
          self.K = layers.Dense(d_model, dtype='float32')
135
          self.V = layers.Dense(d_model, dtype='float32')
136
          self.norm = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
137
+         self.norm1 = layers.LayerNormalization(epsilon=1e-5, dtype='float32') 
 
138
          self.glu = SwiGLU(d_model, 320)
139
      def call(self, x):
140
          x_f32 = tf.cast(x, tf.float32)
141
          residual = x_f32
142
          x_f32 = self.norm1(x)
 
143
          q = self.Q(x_f32)
144
          k = self.K(x_f32)
145
          V = self.V(x_f32)
146
          g_q = (tf.nn.tanh(q) + 1.0) / 2.0
147
          g_k = (tf.nn.tanh(k) + 1.0) / 2.0
148
          score = g_q * g_k
149
+         score = tf.cumsum(score, axis=1)
 
 
 
150
          seq_len = tf.shape(score)[1]
 
151
          count_for_mean = tf.cast(tf.range(seq_len) + 1, score.dtype)
152
          count_for_mean = tf.reshape(count_for_mean, (1, seq_len, 1))
 
 
153
          score_mean = score / count_for_mean
 
 
154
          denom = tf.maximum(score_mean, self.eps)
155
          score_norm = score / denom
 
 
156
          score_clipped = tf.clip_by_value(score_norm, -self.clip_value, self.clip_value)
157
          x_comb = score_clipped * V
 
158
          out = self.norm(x_comb + residual)
159
          out = self.glu(out)
160
          return tf.cast(out, x.dtype)