OpenLab-NLP
/

model-prototype

Model card Files Files and versions

xet

Community

Yuchan commited on Nov 26, 2025

Commit

cc8e480

verified ·

1 Parent(s): ab6754e

Update AlphaS2S.py

Browse files

Files changed (1) hide show

AlphaS2S.py +44 -42

AlphaS2S.py CHANGED Viewed

@@ -296,50 +296,52 @@ with strategy.scope():
 chat_model.save_weights("chat_model.weights.h5")
 print("\n✅ 모델 가중치 저장 완료!")
-def generate_text_topp(model, context, prompt, max_len=220, max_gen=100, p=0.9, temperature=0.8, min_len=20):
-    # Encoder input: ID 레벨로 특수 토큰 삽입
-    enc_ids = [context_s_id] + text_to_ids(context) + [context_e_id] + \
-              [user_s_id] + text_to_ids(prompt) + [user_e_id]
-    enc_ids = enc_ids[-max_len:]  # 길이 제한
     enc_tensor = tf.convert_to_tensor([np.pad(enc_ids, (0, max_len - len(enc_ids)), constant_values=pad_id)], dtype=tf.int32)
-    # Decoder input: <sos>로 시작
-    generated = [start_id]
-    for step in range(max_gen):
-        dec_input = generated[-max_len:]  # max_len 유지
-        dec_tensor = tf.convert_to_tensor([np.pad(dec_input, (0, max_len - len(dec_input)), constant_values=pad_id)], dtype=tf.int32)
-        # 모델 추론
-        logits = model({"enc_inputs": enc_tensor, "dec_inputs": dec_tensor}, training=False)
-        # 마지막 토큰 위치 logits 사용
-        next_token_logits = logits[0, len(dec_input) - 1].numpy()
-        # 특수 토큰 억제
-        next_token_logits[pad_id] -= 10.0
-        next_token_logits[context_s_id] -= 5.0
-        next_token_logits[context_e_id] -= 5.0
-        next_token_logits[user_s_id] -= 5.0
-        next_token_logits[user_e_id] -= 5.0
-        # Softmax + Top-p
-        probs = tf.nn.softmax(next_token_logits / temperature).numpy()
-        sorted_indices = np.argsort(probs)[::-1]
-        sorted_probs = probs[sorted_indices]
-        cumulative_probs = np.cumsum(sorted_probs)
-        cutoff = np.searchsorted(cumulative_probs, p)
-        top_indices = sorted_indices[:cutoff + 1]
-        top_probs = sorted_probs[:cutoff + 1]
-        top_probs /= np.sum(top_probs)
-        next_token_id = np.random.choice(top_indices, p=top_probs)
-        if next_token_id == end_id and len(generated) >= min_len:
             break
-        generated.append(int(next_token_id))
-    # <sos> 제거 후 텍스트로 변환
-    result_ids = generated[1:]  # 첫 토큰 <sos> 제거
-    return ids_to_text(result_ids)
-# 예시 사용
-print("\n\n===== 생성 결과 =====")
-print(generate_text_topp(chat_model, "대화 시작", "안녕하세요! 어떻게 지내셨나요?", p=0.9))

 chat_model.save_weights("chat_model.weights.h5")
 print("\n✅ 모델 가중치 저장 완료!")
+def generate_translation_beam(model, input_text, max_len=220, beam_width=5):
+    # Encoder input
+    enc_ids = text_to_ids(input_text)
+    enc_ids = enc_ids[-max_len:]
     enc_tensor = tf.convert_to_tensor([np.pad(enc_ids, (0, max_len - len(enc_ids)), constant_values=pad_id)], dtype=tf.int32)
+    # Beam 초기화
+    beams = [( [start_id], 0.0 )]  # (generated_ids, log_prob)
+    for _ in range(max_len):
+        all_candidates = []
+        for seq, score in beams:
+            if seq[-1] == end_id:
+                all_candidates.append((seq, score))
+                continue
+            dec_input = seq[-max_len:]
+            dec_tensor = tf.convert_to_tensor([np.pad(dec_input, (0, max_len - len(dec_input)), constant_values=pad_id)], dtype=tf.int32)
+            logits = model({"enc_inputs": enc_tensor, "dec_inputs": dec_tensor}, training=False)
+            next_logits = logits[0, len(dec_input) - 1].numpy()
+            next_logits[pad_id] = -1e9  # 패딩 억제
+            # 상위 beam_width 후보 선택
+            top_indices = np.argsort(next_logits)[-beam_width:][::-1]
+            top_probs = tf.nn.softmax(next_logits[top_indices]).numpy()
+            for token_id, prob in zip(top_indices, top_probs):
+                candidate = (seq + [int(token_id)], score + np.log(prob + 1e-9))
+                all_candidates.append(candidate)
+        # Score 기준 상위 beam_width 유지
+        beams = sorted(all_candidates, key=lambda x: x[1], reverse=True)[:beam_width]
+        # 모든 beam 끝났으면 종료
+        if all(seq[-1] == end_id for seq, _ in beams):
             break
+    # 최고 점수 beam 선택
+    best_seq = beams[0][0]
+    # start_id 제거 후 decode
+    return eids_to_text(best_seq[1:])
+# 사용 예시
+src_text = "안녕하세요! 오늘 날씨는 어때요?"
+translation = generate_translation_beam(chat_model, src_text, max_len=220, beam_width=5)
+print("번역 결과:", translation)