OpenLab-NLP commited on
Commit
52becc8
·
verified ·
1 Parent(s): aec1b8a

Update 연구중.py

Browse files
Files changed (1) hide show
  1. 연구중.py +5 -17
연구중.py CHANGED
@@ -135,18 +135,14 @@ class MixerBlock(layers.Layer):
135
  self.dim = dim
136
 
137
  self.ln_token = layers.LayerNormalization(epsilon=1e-6)
138
- self.ln_gate = layers.LayerNormalization(epsilon=1e-6) # 이름 변경
139
  self.ln_channel = layers.LayerNormalization(epsilon=1e-6)
140
 
141
  # Token Mixer
142
- self.token_fc1 = layers.Dense(seq_len * 2)
143
  self.token_fc2 = layers.Dense(seq_len)
144
 
145
- # Gating (Sigmoid) - Temperature 불필요
146
- self.gate_dense = layers.Dense(1)
147
-
148
  # Channel Mixer
149
- self.ch_fc1 = layers.Dense(self.dim * 4, activation='gelu')
150
  self.ch_fc2 = layers.Dense(self.dim)
151
 
152
  def call(self, x, training=None):
@@ -159,19 +155,11 @@ class MixerBlock(layers.Layer):
159
  y = tf.transpose(y_t, perm=[0, 2, 1])
160
  x = x + y
161
 
162
- # 2. Scalar Gating (수정됨)
163
- # Softmax의 1/N 희석 문제를 해결하기 위해 Sigmoid 사용
164
- y = self.ln_gate(x)
165
- gate = tf.nn.sigmoid(self.gate_dense(y)) # (B, L, 1) Range: 0~1
166
- y = y * gate
167
- x = x + y
168
-
169
- # 3. Channel Mixer
170
  y = self.ln_channel(x)
171
- y = self.ch_fc1(y)
172
- y = self.ch_fc2(y)
173
  x = x + y
174
-
175
  return x
176
 
177
 
 
135
  self.dim = dim
136
 
137
  self.ln_token = layers.LayerNormalization(epsilon=1e-6)
 
138
  self.ln_channel = layers.LayerNormalization(epsilon=1e-6)
139
 
140
  # Token Mixer
141
+ self.token_fc1 = layers.Dense(seq_len * 4)
142
  self.token_fc2 = layers.Dense(seq_len)
143
 
 
 
 
144
  # Channel Mixer
145
+ self.ch_fc1 = layers.Dense(self.dim * 4)
146
  self.ch_fc2 = layers.Dense(self.dim)
147
 
148
  def call(self, x, training=None):
 
155
  y = tf.transpose(y_t, perm=[0, 2, 1])
156
  x = x + y
157
 
 
 
 
 
 
 
 
 
158
  y = self.ln_channel(x)
159
+ a, b = tf.split(self.ch_fc1(y), 2, axis=-1)
160
+ y = self.ch_fc2(a * tf.nn.gelu(b))
161
  x = x + y
162
+
163
  return x
164
 
165