Yuchan commited on
Commit
c41b0cf
·
verified ·
1 Parent(s): cc8e480

Update AlphaS2S.py

Browse files
Files changed (1) hide show
  1. AlphaS2S.py +6 -6
AlphaS2S.py CHANGED
@@ -13,7 +13,7 @@ tf.get_logger().setLevel("ERROR")
13
  SEED = 42
14
  tf.random.set_seed(SEED)
15
  np.random.seed(SEED)
16
- max_len = 220 # 기존 코드에서 200으로 설정됨
17
  batch_size = 48
18
 
19
  # TPU 초기화 (기존 코드와 동일)
@@ -180,7 +180,7 @@ class SwiGLU(layers.Layer):
180
  class EncoderBlock(layers.Layer):
181
  def __init__(self, d_model, num_heads, dff, dropout=0.1):
182
  super().__init__()
183
- self.mha = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
184
  self.ffn = SwiGLU(d_model, dff)
185
  self.norm1 = layers.LayerNormalization(epsilon=1e-6)
186
  self.norm2 = layers.LayerNormalization(epsilon=1e-6)
@@ -195,8 +195,8 @@ class EncoderBlock(layers.Layer):
195
  class DecoderBlock(layers.Layer):
196
  def __init__(self, d_model, num_heads, dff, dropout=0.1):
197
  super().__init__()
198
- self.self_mha = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
199
- self.cross_mha = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
200
  self.ffn = SwiGLU(d_model, dff)
201
  self.norm1 = layers.LayerNormalization(epsilon=1e-6)
202
  self.norm2 = layers.LayerNormalization(epsilon=1e-6)
@@ -213,7 +213,7 @@ class DecoderBlock(layers.Layer):
213
  return self.norm3(out2 + ffn_out)
214
 
215
  class Transformer(tf.keras.Model):
216
- def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, max_len=256, dropout=0.1):
217
  super().__init__()
218
  self.max_len = max_len
219
  self.d_model = d_model
@@ -271,7 +271,7 @@ def create_lr_schedule(initial_lr=5e-5, decay_steps=10000, decay_rate=0.9):
271
 
272
  with strategy.scope():
273
  # ⚠️ 수정: chat_vocab_size 대신 정의된 vocab_size 사용
274
- chat_model = Transformer(num_layers=2, d_model=256, num_heads=4, dff=768, input_vocab_size=vocab_size, target_vocab_size=evocab_size, max_len=256, dropout=0.1)
275
 
276
  dummy_input = {
277
  "enc_inputs": tf.zeros((1, max_len), dtype=tf.int32),
 
13
  SEED = 42
14
  tf.random.set_seed(SEED)
15
  np.random.seed(SEED)
16
+ max_len = 128 # 기존 코드에서 200으로 설정됨
17
  batch_size = 48
18
 
19
  # TPU 초기화 (기존 코드와 동일)
 
180
  class EncoderBlock(layers.Layer):
181
  def __init__(self, d_model, num_heads, dff, dropout=0.1):
182
  super().__init__()
183
+ self.mha = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model//num_heads)
184
  self.ffn = SwiGLU(d_model, dff)
185
  self.norm1 = layers.LayerNormalization(epsilon=1e-6)
186
  self.norm2 = layers.LayerNormalization(epsilon=1e-6)
 
195
  class DecoderBlock(layers.Layer):
196
  def __init__(self, d_model, num_heads, dff, dropout=0.1):
197
  super().__init__()
198
+ self.self_mha = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model//num_heads)
199
+ self.cross_mha = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model//num_heads)
200
  self.ffn = SwiGLU(d_model, dff)
201
  self.norm1 = layers.LayerNormalization(epsilon=1e-6)
202
  self.norm2 = layers.LayerNormalization(epsilon=1e-6)
 
213
  return self.norm3(out2 + ffn_out)
214
 
215
  class Transformer(tf.keras.Model):
216
+ def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, max_len=128, dropout=0.1):
217
  super().__init__()
218
  self.max_len = max_len
219
  self.d_model = d_model
 
271
 
272
  with strategy.scope():
273
  # ⚠️ 수정: chat_vocab_size 대신 정의된 vocab_size 사용
274
+ chat_model = Transformer(num_layers=2, d_model=160, num_heads=4, dff=640, input_vocab_size=vocab_size, target_vocab_size=evocab_size, max_len=128, dropout=0.1)
275
 
276
  dummy_input = {
277
  "enc_inputs": tf.zeros((1, max_len), dtype=tf.int32),