Yuchan commited on
Commit
f2448de
·
verified ·
1 Parent(s): fc704cf

Update Mo.py

Browse files
Files changed (1) hide show
  1. Mo.py +5 -2
Mo.py CHANGED
@@ -123,6 +123,7 @@ class SwiGLU(layers.Layer):
123
  x_proj = self.proj(x)
124
  x_val, x_gate = tf.split(x_proj, 2, axis=-1)
125
  return self.out(x_val * tf.nn.silu(x_gate))
 
126
 
127
  class LoU(layers.Layer):
128
  def __init__(self, d_model, clip_value=5.0, eps=1e-6):
@@ -196,7 +197,7 @@ class Block(layers.Layer):
196
  x = self.lo(x)
197
  return x
198
 
199
- class CumaLM(tf.keras.Model):
200
  def __init__(self, vocab_size, max_seq_len, d_model, n_layers, dropout_rate=0.1):
201
  super().__init__()
202
  self.token_embedding = layers.Embedding(vocab_size, d_model)
@@ -215,6 +216,8 @@ class CumaLM(tf.keras.Model):
215
  logits = tf.matmul(x, embedding_matrix, transpose_b=True)
216
  return tf.cast(logits, tf.float32)
217
 
 
 
218
  def smoothed_loss_keras(y_true, y_pred, eps=0.1):
219
  y_true = tf.cast(y_true, tf.int32)
220
  mask = tf.cast(tf.not_equal(y_true, pad_id), tf.float32)
@@ -242,7 +245,7 @@ def masked_perplexity(y_true, y_pred, eps=0.1):
242
  # 모델 생성 & 컴파일
243
  # =======================
244
  with strategy.scope():
245
- model = CumaLM(vocab_size=vocab_size, max_seq_len=max_len, d_model=256, n_layers=1)
246
  dummy_input = tf.zeros((batch_size, max_len), dtype=tf.int32)
247
  _ = model(dummy_input, training=False)
248
  model.summary()
 
123
  x_proj = self.proj(x)
124
  x_val, x_gate = tf.split(x_proj, 2, axis=-1)
125
  return self.out(x_val * tf.nn.silu(x_gate))
126
+
127
 
128
  class LoU(layers.Layer):
129
  def __init__(self, d_model, clip_value=5.0, eps=1e-6):
 
197
  x = self.lo(x)
198
  return x
199
 
200
+ class ReLM(tf.keras.Model):
201
  def __init__(self, vocab_size, max_seq_len, d_model, n_layers, dropout_rate=0.1):
202
  super().__init__()
203
  self.token_embedding = layers.Embedding(vocab_size, d_model)
 
216
  logits = tf.matmul(x, embedding_matrix, transpose_b=True)
217
  return tf.cast(logits, tf.float32)
218
 
219
+
220
+
221
  def smoothed_loss_keras(y_true, y_pred, eps=0.1):
222
  y_true = tf.cast(y_true, tf.int32)
223
  mask = tf.cast(tf.not_equal(y_true, pad_id), tf.float32)
 
245
  # 모델 생성 & 컴파일
246
  # =======================
247
  with strategy.scope():
248
+ model = ReLM(vocab_size=vocab_size, max_seq_len=max_len, d_model=256, n_layers=1)
249
  dummy_input = tf.zeros((batch_size, max_len), dtype=tf.int32)
250
  _ = model(dummy_input, training=False)
251
  model.summary()