Spaces:

yiming-0120
/

Interactive_AI_Debate_Bot

Sleeping

App Files Files Community

yiming-0120 commited on Apr 16, 2025

Commit

8fee980

verified ·

1 Parent(s): 8fac0d8

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -44

app.py CHANGED Viewed

@@ -115,7 +115,7 @@ def debate_turn(topic, user_stance, user_input_text, user_input_audio, history):
         # 將用戶原始語音(路徑)加入歷史
         history.append(((processed_audio_path,), None)) # 用戶音訊路徑作為輸入顯示
         if user_text and not user_text.startswith("["):
-             # 成功辨識，在下面會統一加入 history
              pass
         elif user_text.startswith("["): # 辨識失敗
              history.append((user_text, None)) # 顯示錯誤訊息
@@ -127,66 +127,73 @@ def debate_turn(topic, user_stance, user_input_text, user_input_audio, history):
         if user_input_text:
              user_text = user_input_text
              # 將用戶文字輸入加入 history，這會是這一輪的開始
-             history.append((user_text, None))
         else:
-            # 如果兩者都無效 (沒有輸入文字，且語音無效/未提供)
             if not processed_audio_path: # 僅當連音訊都沒嘗試時才報錯
                 history.append(("[錯誤：請提供文字或語音論點]", None))
-                return history, None, ""
-            # 如果是語音辨識失敗導致的 user_text 為空，前面已記錄錯誤，這裡不用重複加
     # 確保 user_text 是有效的字串才繼續
     if not isinstance(user_text, str) or user_text.startswith("["):
         print("Invalid user text, stopping turn.")
-        return history, None, "" # 可能前面語音辨識失敗，終止這輪
     # --- 準備呼叫 LLM ---
-    # 從 history 整理出 LLM 需要的 messages 格式
     llm_messages = []
     for i, turn in enumerate(history):
         user_msg, ai_msg = turn
-        # --- 處理用戶訊息 ---
         user_content = None
-        if isinstance(user_msg, str): # 純文字輸入 或 之前的錯誤/辨識訊息
              if not user_msg.startswith("[") and not user_msg.startswith("(語音辨識結果:"):
                  user_content = user_msg
-        elif isinstance(user_msg, tuple): # 音訊輸入元組 (路徑,)
-            # 需要找到對應的辨識結果 (如果有的話)
-            # 簡單處理：如果這是最新一輪的音訊，user_text 應該已經包含辨識結果
-            if i == len(history) - 1 and not user_text.startswith("["):
                  user_content = user_text
-            # (更健壯的作法是檢查 history[i+1] 是否為辨識結果，但目前結構下直接用 user_text 更直接)
         if user_content:
             llm_messages.append({"role": "user", "content": user_content})
-        # --- 處理 AI 訊息 ---
         ai_content = None
-        if isinstance(ai_msg, str): # 純文字回應 或 之前的錯誤訊息
             if not ai_msg.startswith("["):
                 ai_content = ai_msg
-        elif isinstance(ai_msg, tuple) and len(ai_msg) > 0: # (文字, (音訊路徑,))
-            if isinstance(ai_msg[0], str) and not ai_msg[0].startswith("["):
-                ai_content = ai_msg[0] # 取元組中的文字部分
         if ai_content:
             llm_messages.append({"role": "assistant", "content": ai_content})
-    # 確保最後一條訊息是 user 的輸入 (如果 llm_messages 非空)
-    if llm_messages and llm_messages[-1]["role"] != "user":
-         # 這種情況理論上不應發生在正確流程中，但以防萬一
-         print("Warning: Last message before LLM call is not from user. Appending current user text.")
-         if not user_text.startswith("["):
-             llm_messages.append({"role": "user", "content": user_text})
-         else:
-             print("Skipping LLM call due to invalid user text.")
-             return history, None, "" # 無法呼叫 LLM
-    elif not llm_messages and not user_text.startswith("["):
-         # 如果是第一輪
-         llm_messages.append({"role": "user", "content": user_text})
     # --- 呼叫 LLM ---
     ai_response_text = call_llm(topic, user_stance, llm_messages)
@@ -198,21 +205,27 @@ def debate_turn(topic, user_stance, user_input_text, user_input_audio, history):
     # 找到用戶最新輸入的那一條記錄（它還沒有 AI 回應）
     last_user_turn_index = -1
     for i in range(len(history) - 1, -1, -1):
-        if history[i][1] is None: # 找到最後一個 AI 回應為 None 的地方
             last_user_turn_index = i
             break
     if last_user_turn_index != -1:
-        if ai_response_audio_path:
-            # 如果 TTS 成功，組合文字和 Audio 元件
-             history[last_user_turn_index] = (history[last_user_turn_index][0], (ai_response_text, (ai_response_audio_path,)))
-        else:
-            # 如果 TTS 失敗，只顯示文字
-            history[last_user_turn_index] = (history[last_user_turn_index][0], ai_response_text)
     else:
-        # 理論上不應該發生，表示找不到用戶的輸入回合
-        print("Error: Could not find user's turn in history to append AI response.")
-        history.append(("[內部錯誤]", ai_response_text)) # 至少顯示 AI 回應
     # 清空輸入框
     return history, None, ""

         # 將用戶原始語音(路徑)加入歷史
         history.append(((processed_audio_path,), None)) # 用戶音訊路徑作為輸入顯示
         if user_text and not user_text.startswith("["):
+             # 成功辨識，在下面會統一處理
              pass
         elif user_text.startswith("["): # 辨識失敗
              history.append((user_text, None)) # 顯示錯誤訊息
         if user_input_text:
              user_text = user_input_text
              # 將用戶文字輸入加入 history，這會是這一輪的開始
+             # 如果前面已經因為 audio 加入了 history，這裡就不重複加
+             if not processed_audio_path:
+                history.append((user_text, None))
+             # 如果 audio 辨識失敗，且有文字輸入，用文字覆蓋辨識結果 (或者提示用戶?)
+             # 這裡選擇使用文字輸入（如果有的話）
+             elif user_input_text:
+                 user_text = user_input_text
+                 # 替換掉 history 中 audio 辨識失敗的訊息，或者追加？ 這裡選擇追加文字輸入
+                 history.append((user_text, None))
         else:
+            # 如果兩者都無效 (沒有輸入文字，且音訊無效/未提供/辨識失敗)
             if not processed_audio_path: # 僅當連音訊都沒嘗試時才報錯
                 history.append(("[錯誤：請提供文字或語音論點]", None))
+            # 如果是語音辨識失敗，前面已加入錯誤訊息
+            return history, None, ""
     # 確保 user_text 是有效的字串才繼續
     if not isinstance(user_text, str) or user_text.startswith("["):
         print("Invalid user text, stopping turn.")
+        # 如果 history 最後一筆是 audio tuple，且辨識失敗，避免錯誤繼續
+        if history and isinstance(history[-1][0], tuple) and history[-1][1] is None:
+             history.append((f"[無法處理用戶輸入: {user_text}]", None))
+        return history, None, ""
     # --- 準備呼叫 LLM ---
     llm_messages = []
+    # 從 history 整理出 LLM 需要的 messages 格式 (保持不變)
     for i, turn in enumerate(history):
         user_msg, ai_msg = turn
         user_content = None
+        if isinstance(user_msg, str):
              if not user_msg.startswith("[") and not user_msg.startswith("(語音辨識結果:"):
                  user_content = user_msg
+        elif isinstance(user_msg, tuple):
+            # 如果是最新一輪的音訊，用 call_asr 的結果
+            # 這裡需要更可靠的方式找到辨識結果，但為了簡化，先假設 user_text 是對的
+            if i == len(history) - 1 and not user_text.startswith("["): # 假設 user_text 是剛辨識的
                  user_content = user_text
+            # 注意：如果歷史中有多次語音，這裡的邏輯需要加強才能正確配對
         if user_content:
             llm_messages.append({"role": "user", "content": user_content})
         ai_content = None
+        if isinstance(ai_msg, str):
             if not ai_msg.startswith("["):
                 ai_content = ai_msg
+        elif isinstance(ai_msg, tuple) and len(ai_msg) > 0:
+             # 之前的結構是 (文字, (音訊,))，現在已修改，但先保留兼容舊結構的解析
+             if isinstance(ai_msg[0], str) and not ai_msg[0].startswith("["):
+                ai_content = ai_msg[0]
+             # 新結構是 (音訊路徑,)
+             elif isinstance(ai_msg[0], str) and ai_msg[0].endswith(".mp3"):
+                 pass # 這是音訊，不是文本內容
         if ai_content:
             llm_messages.append({"role": "assistant", "content": ai_content})
+    # 確保最後是 user message
+    if not llm_messages or llm_messages[-1]["role"] == "assistant":
+        if not user_text.startswith("["):
+            llm_messages.append({"role": "user", "content": user_text})
+        else:
+            print("Skipping LLM call due to invalid final user text.")
+            return history, None, ""
     # --- 呼叫 LLM ---
     ai_response_text = call_llm(topic, user_stance, llm_messages)
     # 找到用戶最新輸入的那一條記錄（它還沒有 AI 回應）
     last_user_turn_index = -1
     for i in range(len(history) - 1, -1, -1):
+        if history[i][1] is None and not history[i][0] is None: # 找到最後一個用戶有輸入但AI無回應的地方
             last_user_turn_index = i
             break
     if last_user_turn_index != -1:
+        # ****** 修改核心 ******
+        # 1. 更新找到的那一回合，填入 AI 的文字回應
+        history[last_user_turn_index] = (history[last_user_turn_index][0], ai_response_text)
+        # 2. 如果 TTS 成功，*追加*一個新的回合，只包含 AI 的音訊
+        if ai_response_audio_path and not ai_response_text.startswith("["):
+            # AI 的音訊回應格式應該是 (filepath,)
+            history.append((None, (ai_response_audio_path,))) # User 設為 None, AI 設為音訊元組
+        # ****** 修改結束 ******
     else:
+        # 如果找不到用戶回合（理論上不該發生），直接追加
+        print("Warning: Could not find user's turn. Appending AI response.")
+        history.append(("[用戶回合丟失?]", ai_response_text))
+        if ai_response_audio_path and not ai_response_text.startswith("["):
+            history.append((None, (ai_response_audio_path,)))
     # 清空輸入框
     return history, None, ""