Yuchan commited on
Commit
b3fccf3
·
verified ·
1 Parent(s): 650b1da

Update Mo.py

Browse files
Files changed (1) hide show
  1. Mo.py +3 -3
Mo.py CHANGED
@@ -69,7 +69,7 @@ vocab_size = sp.get_piece_size()
69
  print(f"✅ Vocabulary size: {vocab_size}")
70
 
71
  max_len = 512
72
- batch_size = 128
73
 
74
  def text_to_ids(text):
75
  return sp.encode(text, out_type=int)
@@ -117,7 +117,7 @@ with strategy.scope():
117
  class SwiGLU(layers.Layer):
118
  def __init__(self, d_model, d_ff):
119
  super().__init__()
120
- self.proj = layers.Dense(d_ff)
121
  self.out = layers.Dense(d_model)
122
  def call(self, x):
123
  x_proj = self.proj(x)
@@ -257,7 +257,7 @@ def masked_perplexity(y_true, y_pred, eps=0.1):
257
  # 모델 생성 & 컴파일
258
  # =======================
259
  with strategy.scope():
260
- model = ReLM(vocab_size=vocab_size, max_seq_len=max_len, d_model=256, n_layers=1)
261
  dummy_input = tf.zeros((batch_size, max_len), dtype=tf.int32)
262
  _ = model(dummy_input, training=False)
263
  model.summary()
 
69
  print(f"✅ Vocabulary size: {vocab_size}")
70
 
71
  max_len = 512
72
+ batch_size = 256
73
 
74
  def text_to_ids(text):
75
  return sp.encode(text, out_type=int)
 
117
  class SwiGLU(layers.Layer):
118
  def __init__(self, d_model, d_ff):
119
  super().__init__()
120
+ self.proj = layers.Dense(2048)
121
  self.out = layers.Dense(d_model)
122
  def call(self, x):
123
  x_proj = self.proj(x)
 
257
  # 모델 생성 & 컴파일
258
  # =======================
259
  with strategy.scope():
260
+ model = ReLM(vocab_size=vocab_size, max_seq_len=max_len, d_model=512, n_layers=9)
261
  dummy_input = tf.zeros((batch_size, max_len), dtype=tf.int32)
262
  _ = model(dummy_input, training=False)
263
  model.summary()