Yuchan
commited on
Update Mo.py
Browse files
Mo.py
CHANGED
|
@@ -69,7 +69,7 @@ vocab_size = sp.get_piece_size()
|
|
| 69 |
print(f"✅ Vocabulary size: {vocab_size}")
|
| 70 |
|
| 71 |
max_len = 512
|
| 72 |
-
batch_size =
|
| 73 |
|
| 74 |
def text_to_ids(text):
|
| 75 |
return sp.encode(text, out_type=int)
|
|
@@ -117,7 +117,7 @@ with strategy.scope():
|
|
| 117 |
class SwiGLU(layers.Layer):
|
| 118 |
def __init__(self, d_model, d_ff):
|
| 119 |
super().__init__()
|
| 120 |
-
self.proj = layers.Dense(
|
| 121 |
self.out = layers.Dense(d_model)
|
| 122 |
def call(self, x):
|
| 123 |
x_proj = self.proj(x)
|
|
@@ -257,7 +257,7 @@ def masked_perplexity(y_true, y_pred, eps=0.1):
|
|
| 257 |
# 모델 생성 & 컴파일
|
| 258 |
# =======================
|
| 259 |
with strategy.scope():
|
| 260 |
-
model = ReLM(vocab_size=vocab_size, max_seq_len=max_len, d_model=
|
| 261 |
dummy_input = tf.zeros((batch_size, max_len), dtype=tf.int32)
|
| 262 |
_ = model(dummy_input, training=False)
|
| 263 |
model.summary()
|
|
|
|
| 69 |
print(f"✅ Vocabulary size: {vocab_size}")
|
| 70 |
|
| 71 |
max_len = 512
|
| 72 |
+
batch_size = 256
|
| 73 |
|
| 74 |
def text_to_ids(text):
|
| 75 |
return sp.encode(text, out_type=int)
|
|
|
|
| 117 |
class SwiGLU(layers.Layer):
|
| 118 |
def __init__(self, d_model, d_ff):
|
| 119 |
super().__init__()
|
| 120 |
+
self.proj = layers.Dense(2048)
|
| 121 |
self.out = layers.Dense(d_model)
|
| 122 |
def call(self, x):
|
| 123 |
x_proj = self.proj(x)
|
|
|
|
| 257 |
# 모델 생성 & 컴파일
|
| 258 |
# =======================
|
| 259 |
with strategy.scope():
|
| 260 |
+
model = ReLM(vocab_size=vocab_size, max_seq_len=max_len, d_model=512, n_layers=9)
|
| 261 |
dummy_input = tf.zeros((batch_size, max_len), dtype=tf.int32)
|
| 262 |
_ = model(dummy_input, training=False)
|
| 263 |
model.summary()
|