Yuchan
commited on
Update Mo.py
Browse files
Mo.py
CHANGED
|
@@ -123,6 +123,7 @@ class SwiGLU(layers.Layer):
|
|
| 123 |
x_proj = self.proj(x)
|
| 124 |
x_val, x_gate = tf.split(x_proj, 2, axis=-1)
|
| 125 |
return self.out(x_val * tf.nn.silu(x_gate))
|
|
|
|
| 126 |
|
| 127 |
class LoU(layers.Layer):
|
| 128 |
def __init__(self, d_model, clip_value=5.0, eps=1e-6):
|
|
@@ -196,7 +197,7 @@ class Block(layers.Layer):
|
|
| 196 |
x = self.lo(x)
|
| 197 |
return x
|
| 198 |
|
| 199 |
-
class
|
| 200 |
def __init__(self, vocab_size, max_seq_len, d_model, n_layers, dropout_rate=0.1):
|
| 201 |
super().__init__()
|
| 202 |
self.token_embedding = layers.Embedding(vocab_size, d_model)
|
|
@@ -215,6 +216,8 @@ class CumaLM(tf.keras.Model):
|
|
| 215 |
logits = tf.matmul(x, embedding_matrix, transpose_b=True)
|
| 216 |
return tf.cast(logits, tf.float32)
|
| 217 |
|
|
|
|
|
|
|
| 218 |
def smoothed_loss_keras(y_true, y_pred, eps=0.1):
|
| 219 |
y_true = tf.cast(y_true, tf.int32)
|
| 220 |
mask = tf.cast(tf.not_equal(y_true, pad_id), tf.float32)
|
|
@@ -242,7 +245,7 @@ def masked_perplexity(y_true, y_pred, eps=0.1):
|
|
| 242 |
# 모델 생성 & 컴파일
|
| 243 |
# =======================
|
| 244 |
with strategy.scope():
|
| 245 |
-
model =
|
| 246 |
dummy_input = tf.zeros((batch_size, max_len), dtype=tf.int32)
|
| 247 |
_ = model(dummy_input, training=False)
|
| 248 |
model.summary()
|
|
|
|
| 123 |
x_proj = self.proj(x)
|
| 124 |
x_val, x_gate = tf.split(x_proj, 2, axis=-1)
|
| 125 |
return self.out(x_val * tf.nn.silu(x_gate))
|
| 126 |
+
|
| 127 |
|
| 128 |
class LoU(layers.Layer):
|
| 129 |
def __init__(self, d_model, clip_value=5.0, eps=1e-6):
|
|
|
|
| 197 |
x = self.lo(x)
|
| 198 |
return x
|
| 199 |
|
| 200 |
+
class ReLM(tf.keras.Model):
|
| 201 |
def __init__(self, vocab_size, max_seq_len, d_model, n_layers, dropout_rate=0.1):
|
| 202 |
super().__init__()
|
| 203 |
self.token_embedding = layers.Embedding(vocab_size, d_model)
|
|
|
|
| 216 |
logits = tf.matmul(x, embedding_matrix, transpose_b=True)
|
| 217 |
return tf.cast(logits, tf.float32)
|
| 218 |
|
| 219 |
+
|
| 220 |
+
|
| 221 |
def smoothed_loss_keras(y_true, y_pred, eps=0.1):
|
| 222 |
y_true = tf.cast(y_true, tf.int32)
|
| 223 |
mask = tf.cast(tf.not_equal(y_true, pad_id), tf.float32)
|
|
|
|
| 245 |
# 모델 생성 & 컴파일
|
| 246 |
# =======================
|
| 247 |
with strategy.scope():
|
| 248 |
+
model = ReLM(vocab_size=vocab_size, max_seq_len=max_len, d_model=256, n_layers=1)
|
| 249 |
dummy_input = tf.zeros((batch_size, max_len), dtype=tf.int32)
|
| 250 |
_ = model(dummy_input, training=False)
|
| 251 |
model.summary()
|