OpenLab-NLP
/

model-prototype

Model card Files Files and versions

xet

Community

Yuchan commited on Nov 24, 2025

Commit

dd6e662

verified ·

1 Parent(s): a638654

Update AlphaS2S.py

Browse files

Files changed (1) hide show

AlphaS2S.py +3 -14

AlphaS2S.py CHANGED Viewed

@@ -306,40 +306,32 @@ def generate_text_topp(model, prompt, max_len=150, max_gen=100, p=0.9, temperatu
     # 인코더 입력은 <start> Prompt <sep> 만 사용
     model_input = text_to_ids(f"<start> {prompt} <sep>")
     model_input = model_input[:max_len]
-    generated = list(model_input)
     for step in range(max_gen):
         current_len = len(generated)
         # 현재까지 생성된 시퀀스를 입력으로 사용
         if current_len > max_len:
             input_seq = generated[-max_len:]
         else:
-            input_seq = generated
         # 패딩
         input_padded = np.pad(input_seq, (0, max_len - len(input_seq)), constant_values=pad_id)
         input_tensor = tf.convert_to_tensor([input_padded])
         # 모델 추론 (enc_inputs, dec_inputs 모두 동일한 시퀀스를 사용)
         dummy_input = {
             "enc_inputs": input_tensor,
             "dec_inputs": input_tensor
         }
         logits = model(dummy_input, training=False)
         # 다음 토큰의 로짓은 시퀀스의 마지막 토큰 위치에서 가져옴 (0-based index: current_len - 1)
         # 하지만 패딩 후 input_tensor의 실제 시퀀스 길이는 len(input_seq)
         next_token_logits = logits[0, len(input_seq) - 1].numpy()
         # 특수 토큰 생성 억제
         next_token_logits[end_id] -= 5.0
         next_token_logits[pad_id] -= 10.0
         probs = tf.nn.softmax(next_token_logits / temperature).numpy()
         sorted_indices = np.argsort(probs)[::-1]
         sorted_probs = probs[sorted_indices]
         # Top-p (Nucleus) Sampling
         cumulative_probs = np.cumsum(sorted_probs)
         cutoff = np.searchsorted(cumulative_probs, p)
@@ -347,12 +339,9 @@ def generate_text_topp(model, prompt, max_len=150, max_gen=100, p=0.9, temperatu
         top_probs = sorted_probs[:cutoff + 1]
         top_probs /= np.sum(top_probs)
         next_token_id = np.random.choice(top_indices, p=top_probs)
         if next_token_id == end_id and len(generated) >= min_len:
-            break
         generated.append(int(next_token_id))
     # <start> 토큰 제거 및 <sep> 이전 부분 제거
     try:
         sep_index = generated.index(sep_id)

     # 인코더 입력은 <start> Prompt <sep> 만 사용
     model_input = text_to_ids(f"<start> {prompt} <sep>")
     model_input = model_input[:max_len]
+    generated = list(model_input)
     for step in range(max_gen):
         current_len = len(generated)
         # 현재까지 생성된 시퀀스를 입력으로 사용
         if current_len > max_len:
             input_seq = generated[-max_len:]
         else:
+            input_seq = generated
         # 패딩
         input_padded = np.pad(input_seq, (0, max_len - len(input_seq)), constant_values=pad_id)
         input_tensor = tf.convert_to_tensor([input_padded])
         # 모델 추론 (enc_inputs, dec_inputs 모두 동일한 시퀀스를 사용)
         dummy_input = {
             "enc_inputs": input_tensor,
             "dec_inputs": input_tensor
         }
         logits = model(dummy_input, training=False)
         # 다음 토큰의 로짓은 시퀀스의 마지막 토큰 위치에서 가져옴 (0-based index: current_len - 1)
         # 하지만 패딩 후 input_tensor의 실제 시퀀스 길이는 len(input_seq)
         next_token_logits = logits[0, len(input_seq) - 1].numpy()
         # 특수 토큰 생성 억제
         next_token_logits[end_id] -= 5.0
         next_token_logits[pad_id] -= 10.0
         probs = tf.nn.softmax(next_token_logits / temperature).numpy()
         sorted_indices = np.argsort(probs)[::-1]
         sorted_probs = probs[sorted_indices]
         # Top-p (Nucleus) Sampling
         cumulative_probs = np.cumsum(sorted_probs)
         cutoff = np.searchsorted(cumulative_probs, p)
         top_probs = sorted_probs[:cutoff + 1]
         top_probs /= np.sum(top_probs)
         next_token_id = np.random.choice(top_indices, p=top_probs)
         if next_token_id == end_id and len(generated) >= min_len:
+            break
         generated.append(int(next_token_id))
     # <start> 토큰 제거 및 <sep> 이전 부분 제거
     try:
         sep_index = generated.index(sep_id)