Yuchan
commited on
Update Mo.py
Browse files
Mo.py
CHANGED
|
@@ -137,7 +137,7 @@ class LoU(layers.Layer):
|
|
| 137 |
self.norm = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
|
| 138 |
self.norm1 = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
|
| 139 |
|
| 140 |
-
self.glu = SwiGLU(d_model,
|
| 141 |
def call(self, x):
|
| 142 |
x_f32 = tf.cast(x, tf.float32)
|
| 143 |
residual = x_f32
|
|
@@ -242,8 +242,8 @@ def create_lr_schedule(initial_lr=5e-5, decay_steps=10000, decay_rate=0.9):
|
|
| 242 |
model = ReLM(
|
| 243 |
vocab_size=vocab_size,
|
| 244 |
max_seq_len=max_len,
|
| 245 |
-
d_model=
|
| 246 |
-
n_layers=
|
| 247 |
)
|
| 248 |
|
| 249 |
# 옵티마이저 설정
|
|
|
|
| 137 |
self.norm = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
|
| 138 |
self.norm1 = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
|
| 139 |
|
| 140 |
+
self.glu = SwiGLU(d_model, 3500)
|
| 141 |
def call(self, x):
|
| 142 |
x_f32 = tf.cast(x, tf.float32)
|
| 143 |
residual = x_f32
|
|
|
|
| 242 |
model = ReLM(
|
| 243 |
vocab_size=vocab_size,
|
| 244 |
max_seq_len=max_len,
|
| 245 |
+
d_model=700,
|
| 246 |
+
n_layers=16
|
| 247 |
)
|
| 248 |
|
| 249 |
# 옵티마이저 설정
|