Yuchan
commited on
Update Mo.py
Browse files
Mo.py
CHANGED
|
@@ -209,7 +209,7 @@ class Block(layers.Layer):
|
|
| 209 |
x = self.lo(x)
|
| 210 |
return x
|
| 211 |
|
| 212 |
-
class
|
| 213 |
def __init__(self, vocab_size, max_seq_len, d_model, n_layers, dropout_rate=0.1):
|
| 214 |
super().__init__()
|
| 215 |
self.token_embedding = layers.Embedding(vocab_size, d_model)
|
|
@@ -228,8 +228,6 @@ class ReLM(tf.keras.Model):
|
|
| 228 |
logits = tf.matmul(x, embedding_matrix, transpose_b=True)
|
| 229 |
return tf.cast(logits, tf.float32)
|
| 230 |
|
| 231 |
-
|
| 232 |
-
|
| 233 |
def smoothed_loss_keras(y_true, y_pred, eps=0.1):
|
| 234 |
y_true = tf.cast(y_true, tf.int32)
|
| 235 |
mask = tf.cast(tf.not_equal(y_true, pad_id), tf.float32)
|
|
@@ -257,7 +255,7 @@ def masked_perplexity(y_true, y_pred, eps=0.1):
|
|
| 257 |
# 모델 생성 & 컴파일
|
| 258 |
# =======================
|
| 259 |
with strategy.scope():
|
| 260 |
-
model =
|
| 261 |
dummy_input = tf.zeros((batch_size, max_len), dtype=tf.int32)
|
| 262 |
_ = model(dummy_input, training=False)
|
| 263 |
model.summary()
|
|
|
|
| 209 |
x = self.lo(x)
|
| 210 |
return x
|
| 211 |
|
| 212 |
+
class LaSLM(tf.keras.Model):
|
| 213 |
def __init__(self, vocab_size, max_seq_len, d_model, n_layers, dropout_rate=0.1):
|
| 214 |
super().__init__()
|
| 215 |
self.token_embedding = layers.Embedding(vocab_size, d_model)
|
|
|
|
| 228 |
logits = tf.matmul(x, embedding_matrix, transpose_b=True)
|
| 229 |
return tf.cast(logits, tf.float32)
|
| 230 |
|
|
|
|
|
|
|
| 231 |
def smoothed_loss_keras(y_true, y_pred, eps=0.1):
|
| 232 |
y_true = tf.cast(y_true, tf.int32)
|
| 233 |
mask = tf.cast(tf.not_equal(y_true, pad_id), tf.float32)
|
|
|
|
| 255 |
# 모델 생성 & 컴파일
|
| 256 |
# =======================
|
| 257 |
with strategy.scope():
|
| 258 |
+
model = LaSLM(vocab_size=vocab_size, max_seq_len=max_len, d_model=384, n_layers=3)
|
| 259 |
dummy_input = tf.zeros((batch_size, max_len), dtype=tf.int32)
|
| 260 |
_ = model(dummy_input, training=False)
|
| 261 |
model.summary()
|