OpenLab-NLP
/

model-prototype

Model card Files Files and versions

Yuchan commited on Nov 24, 2025

Commit

324b6bd

·

verified ·

1 Parent(s): ee9c45d

Update AlphaS2S.py

Files changed (1) hide show

AlphaS2S.py +2 -5

AlphaS2S.py CHANGED Viewed

@@ -255,16 +255,13 @@ def create_lr_schedule(initial_lr=5e-5, decay_steps=10000, decay_rate=0.9):
 with strategy.scope():
     # ⚠️ 수정: chat_vocab_size 대신 정의된 vocab_size 사용
-    chat_model = Transformer(num_layers=2, d_model=304, num_heads=4, dff=912, input_vocab_size=vocab_size, target_vocab_size=vocab_size, max_len=256, dropout=0.1)
     dummy_input = {
         "enc_inputs": tf.zeros((1, max_len), dtype=tf.int32),
         "dec_inputs": tf.zeros((1, max_len), dtype=tf.int32)
     }
     _ = chat_model(dummy_input)
-    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')
     # 옵티마이저 설정
     optimizer = tf.keras.optimizers.Adam(
         learning_rate=create_lr_schedule(),
@@ -283,7 +280,7 @@ with strategy.scope():
 chat_model.save_weights("chat_model.weights.h5")
 print("\n✅ 모델 가중치 저장 완료!")
-def generate_text_topp(model, context, prompt, max_len=256, max_gen=100, p=0.9, temperature=0.8, min_len=20):
     # Encoder input: ID 레벨로 특수 토큰 삽입
     enc_ids = [context_s_id] + text_to_ids(context) + [context_e_id] + \
               [user_s_id] + text_to_ids(prompt) + [user_e_id]

 with strategy.scope():
     # ⚠️ 수정: chat_vocab_size 대신 정의된 vocab_size 사용
+    chat_model = Transformer(num_layers=2, d_model=256, num_heads=4, dff=768, input_vocab_size=vocab_size, target_vocab_size=vocab_size, max_len=256, dropout=0.1)
     dummy_input = {
         "enc_inputs": tf.zeros((1, max_len), dtype=tf.int32),
         "dec_inputs": tf.zeros((1, max_len), dtype=tf.int32)
     }
     _ = chat_model(dummy_input)
     # 옵티마이저 설정
     optimizer = tf.keras.optimizers.Adam(
         learning_rate=create_lr_schedule(),
 chat_model.save_weights("chat_model.weights.h5")
 print("\n✅ 모델 가중치 저장 완료!")
+def generate_text_topp(model, context, prompt, max_len=220, max_gen=100, p=0.9, temperature=0.8, min_len=20):
     # Encoder input: ID 레벨로 특수 토큰 삽입
     enc_ids = [context_s_id] + text_to_ids(context) + [context_e_id] + \
               [user_s_id] + text_to_ids(prompt) + [user_e_id]