Yuchan
commited on
Update Mo.py
Browse files
Mo.py
CHANGED
|
@@ -99,7 +99,7 @@ def txt_stream(file_path):
|
|
| 99 |
)
|
| 100 |
|
| 101 |
|
| 102 |
-
LIMIT =
|
| 103 |
|
| 104 |
dataset = tf.data.Dataset.from_generator(
|
| 105 |
lambda: txt_stream(DATA_PATH),
|
|
@@ -141,7 +141,7 @@ class LoU(layers.Layer):
|
|
| 141 |
self.norm1 = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
|
| 142 |
|
| 143 |
# ๋น์ ํ ํํ๋ ฅ
|
| 144 |
-
self.glu = SwiGLU(d_model,
|
| 145 |
|
| 146 |
# ํ์ต ๊ฐ๋ฅํ ๊ณผ๊ฑฐ ํ ํฐ ๊ฐ์ค์น
|
| 147 |
self.alpha = self.add_weight(shape=(d_model,), initializer='ones', trainable=True)
|
|
@@ -241,7 +241,7 @@ def masked_perplexity(y_true, y_pred, eps=0.1):
|
|
| 241 |
# ๋ชจ๋ธ ์์ฑ & ์ปดํ์ผ
|
| 242 |
# =======================
|
| 243 |
with strategy.scope():
|
| 244 |
-
model = CumaLM(vocab_size=vocab_size, max_seq_len=max_len, d_ff=
|
| 245 |
dummy_input = tf.zeros((batch_size, max_len), dtype=tf.int32)
|
| 246 |
_ = model(dummy_input, training=False)
|
| 247 |
model.summary()
|
|
|
|
| 99 |
)
|
| 100 |
|
| 101 |
|
| 102 |
+
LIMIT = 2000000 # ์ํ๋ ๋งํผ
|
| 103 |
|
| 104 |
dataset = tf.data.Dataset.from_generator(
|
| 105 |
lambda: txt_stream(DATA_PATH),
|
|
|
|
| 141 |
self.norm1 = layers.LayerNormalization(epsilon=1e-5, dtype='float32')
|
| 142 |
|
| 143 |
# ๋น์ ํ ํํ๋ ฅ
|
| 144 |
+
self.glu = SwiGLU(d_model, 320)
|
| 145 |
|
| 146 |
# ํ์ต ๊ฐ๋ฅํ ๊ณผ๊ฑฐ ํ ํฐ ๊ฐ์ค์น
|
| 147 |
self.alpha = self.add_weight(shape=(d_model,), initializer='ones', trainable=True)
|
|
|
|
| 241 |
# ๋ชจ๋ธ ์์ฑ & ์ปดํ์ผ
|
| 242 |
# =======================
|
| 243 |
with strategy.scope():
|
| 244 |
+
model = CumaLM(vocab_size=vocab_size, max_seq_len=max_len, d_ff=256, n_layers=1)
|
| 245 |
dummy_input = tf.zeros((batch_size, max_len), dtype=tf.int32)
|
| 246 |
_ = model(dummy_input, training=False)
|
| 247 |
model.summary()
|