Spaces:

yiming-0120
/

Interactive_AI_Debate_Bot

Sleeping

App Files Files Community

yiming-0120 commited on Apr 16, 2025

Commit

a52b880

verified ·

1 Parent(s): 8fee980

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -109

app.py CHANGED Viewed

@@ -1,50 +1,46 @@
 import gradio as gr
 import os
-import openai # Use the OpenAI library
 import time
 from pathlib import Path
-# from pydub import AudioSegment # Still potentially needed if input format varies
 # --- OpenAI API 設定 ---
-# **強烈建議** 將 API Key 設為環境變數 OPENAI_API_KEY
-# 或在使用 Hugging Face Spaces 部署時設為 Secret
-# 不要直接寫在程式碼中！
 openai.api_key = os.getenv("OPENAI_API_KEY")
 if not openai.api_key:
     raise ValueError("請設定 OpenAI API Key 環境變數 OPENAI_API_KEY")
-# Initialize OpenAI client (new SDK style)
 client = openai.OpenAI()
 # --- 模型選擇 (OpenAI Models) ---
-LLM_MODEL = "gpt-3.5-turbo" # Or "gpt-4", "gpt-4-turbo-preview", etc.
 ASR_MODEL = "whisper-1"
-TTS_MODEL = "tts-1" # Or "tts-1-hd"
-TTS_VOICE = "alloy" # Choose from: alloy, echo, fable, onyx, nova, shimmer
-# --- 預設辯論主題 ---
-TOPICS = [
-    "AI 是否會大規模取代人類工作？",
-    "社群媒體對社會利大於弊嗎？",
-    "是否應該推行全民基本收入 (UBI)？",
-    "全球化對本地文化是威脅還是機遇？",
-    "基因編輯技術應受更嚴格的倫理限制嗎？"
 ]
-# --- Helper 函數：呼叫 OpenAI API ---
 def call_asr(audio_filepath):
     """將音訊檔案轉換為文字 (使用 OpenAI Whisper)"""
     if not audio_filepath:
         return ""
     try:
-        # OpenAI Whisper API 需要文件句柄
         with open(audio_filepath, "rb") as audio_file:
             transcript = client.audio.transcriptions.create(
               model=ASR_MODEL,
               file=audio_file
             )
-        return transcript.text # 返回辨識出的文字
     except Exception as e:
         print(f"ASR Error (OpenAI): {e}")
         return f"[語音辨識失敗: {e}]"
@@ -52,15 +48,14 @@ def call_asr(audio_filepath):
 def call_llm(topic, user_stance, messages):
     """呼叫 OpenAI LLM 進行辯論回應"""
     ai_stance = "反方" if user_stance == "正方" else "正方"
     system_prompt = f"你正在參與一場關於「{topic}」的辯論。你扮演的是堅定的「{ai_stance}」。請根據對話歷史，針對使用者的最新論點，提出具有批判性、質疑性或反駁性的回應。保持簡潔有力，專注於論證，字數控制在150字以內。"
-    # 準備 OpenAI messages 格式
     openai_messages = [{"role": "system", "content": system_prompt}]
     for msg in messages:
-         # 確保 role 是 'user' 或 'assistant'
-         role = msg.get("role") if msg.get("role") in ["user", "assistant"] else "user" # 預設為 user
          content = msg.get("content", "")
-         if content: # 確保內容不為空
             openai_messages.append({"role": role, "content": content})
     try:
@@ -79,95 +74,85 @@ def call_llm(topic, user_stance, messages):
 def call_tts(text):
     """將文字轉換為語音 (使用 OpenAI TTS)"""
     try:
-        # 檢查文字是否有效
         if not text or not isinstance(text, str) or text.startswith("["):
              print(f"Skipping TTS for invalid text: {text}")
              return None
-        # 產生唯一的暫存檔案路徑
-        speech_file_path = Path(f"/tmp/speech_{int(time.time() * 1000)}.mp3") # OpenAI TTS 輸出 mp3
         response = client.audio.speech.create(
             model=TTS_MODEL,
             voice=TTS_VOICE,
             input=text
         )
-        # 將音訊流式寫入檔案
         response.stream_to_file(speech_file_path)
-        return str(speech_file_path) # 返回檔案路徑
     except Exception as e:
         print(f"TTS Error (OpenAI): {e}")
         return None
-# --- Gradio 主函數 (與之前版本基本相同，只是調用的 helper 不同) ---
-def debate_turn(topic, user_stance, user_input_text, user_input_audio, history):
-    """處理一輪辯論"""
     user_text = ""
-    processed_audio_path = None # 用來記錄實際處理的音訊路徑
-    # 優先處理語音輸入
     if user_input_audio:
         print(f"Processing audio input: {user_input_audio}")
-        processed_audio_path = user_input_audio # 記錄路徑
         user_text = call_asr(processed_audio_path)
-        # 將用戶原始語音(路徑)加入歷史
-        history.append(((processed_audio_path,), None)) # 用戶音訊路徑作為輸入顯示
-        if user_text and not user_text.startswith("["):
-             # 成功辨識，在下面會統一處理
-             pass
-        elif user_text.startswith("["): # 辨識失敗
-             history.append((user_text, None)) # 顯示錯誤訊息
-             # 清空輸入並返回
-             return history, None, ""
-    # 如果沒有有效語音輸入 或 語音辨識失敗，則使用文字輸入
     if not user_text or user_text.startswith("["):
         if user_input_text:
              user_text = user_input_text
-             # 將用戶文字輸入加入 history，這會是這一輪的開始
-             # 如果前面已經因為 audio 加入了 history，這裡就不重複加
-             if not processed_audio_path:
                 history.append((user_text, None))
-             # 如果 audio 辨識失敗，且有文字輸入，用文字覆蓋辨識結果 (或者提示用戶?)
-             # 這裡選擇使用文字輸入（如果有的話）
-             elif user_input_text:
                  user_text = user_input_text
-                 # 替換掉 history 中 audio 辨識失敗的訊息，或者追加？ 這裡選擇追加文字輸入
-                 history.append((user_text, None))
         else:
-            # 如果兩者都無效 (沒有輸入文字，且音訊無效/未提供/辨識失敗)
-            if not processed_audio_path: # 僅當連音訊都沒嘗試時才報錯
                 history.append(("[錯誤：請提供文字或語音論點]", None))
             # 如果是語音辨識失敗，前面已加入錯誤訊息
-            return history, None, ""
-    # 確保 user_text 是有效的字串才繼續
     if not isinstance(user_text, str) or user_text.startswith("["):
         print("Invalid user text, stopping turn.")
-        # 如果 history 最後一筆是 audio tuple，且辨識失敗，避免錯誤繼續
         if history and isinstance(history[-1][0], tuple) and history[-1][1] is None:
              history.append((f"[無法處理用戶輸入: {user_text}]", None))
-        return history, None, ""
     # --- 準備呼叫 LLM ---
     llm_messages = []
-    # 從 history 整理出 LLM 需要的 messages 格式 (保持不變)
     for i, turn in enumerate(history):
         user_msg, ai_msg = turn
         user_content = None
         if isinstance(user_msg, str):
-             if not user_msg.startswith("[") and not user_msg.startswith("(語音辨識結果:"):
                  user_content = user_msg
         elif isinstance(user_msg, tuple):
-            # 如果是最新一輪的音訊，用 call_asr 的結果
-            # 這裡需要更可靠的���式找到辨識結果，但為了簡化，先假設 user_text 是對的
-            if i == len(history) - 1 and not user_text.startswith("["): # 假設 user_text 是剛辨識的
                  user_content = user_text
-            # 注意：如果歷史中有多次語音，這裡的邏輯需要加強才能正確配對
         if user_content:
             llm_messages.append({"role": "user", "content": user_content})
@@ -177,12 +162,10 @@ def debate_turn(topic, user_stance, user_input_text, user_input_audio, history):
             if not ai_msg.startswith("["):
                 ai_content = ai_msg
         elif isinstance(ai_msg, tuple) and len(ai_msg) > 0:
-             # 之前的結構是 (文字, (音訊,))，現在已修改，但先保留兼容舊結構的解析
-             if isinstance(ai_msg[0], str) and not ai_msg[0].startswith("["):
-                ai_content = ai_msg[0]
-             # 新結構是 (音訊路徑,)
-             elif isinstance(ai_msg[0], str) and ai_msg[0].endswith(".mp3"):
-                 pass # 這是音訊，不是文本內容
         if ai_content:
             llm_messages.append({"role": "assistant", "content": ai_content})
@@ -193,75 +176,89 @@ def debate_turn(topic, user_stance, user_input_text, user_input_audio, history):
             llm_messages.append({"role": "user", "content": user_text})
         else:
             print("Skipping LLM call due to invalid final user text.")
-            return history, None, ""
-    # --- 呼叫 LLM ---
-    ai_response_text = call_llm(topic, user_stance, llm_messages)
     # --- 呼叫 TTS ---
     ai_response_audio_path = call_tts(ai_response_text)
-    # --- 格式化 AI 回應並更新歷史 ---
-    # 找到用戶最新輸入的那一條記錄（它還沒有 AI 回應）
     last_user_turn_index = -1
     for i in range(len(history) - 1, -1, -1):
-        if history[i][1] is None and not history[i][0] is None: # 找到最後一個用戶有輸入但AI無回應的地方
             last_user_turn_index = i
             break
     if last_user_turn_index != -1:
-        # ****** 修改核心 ******
-        # 1. 更新找到的那一回合，填入 AI 的文字回應
         history[last_user_turn_index] = (history[last_user_turn_index][0], ai_response_text)
-        # 2. 如果 TTS 成功，*追加*一個新的回合，只包含 AI 的音訊
         if ai_response_audio_path and not ai_response_text.startswith("["):
-            # AI 的音訊回應格式應該是 (filepath,)
-            history.append((None, (ai_response_audio_path,))) # User 設為 None, AI 設為音訊元組
-        # ****** 修改結束 ******
     else:
-        # 如果找不到用戶回合（理論上不該發生），直接追加
         print("Warning: Could not find user's turn. Appending AI response.")
         history.append(("[用戶回合丟失?]", ai_response_text))
         if ai_response_audio_path and not ai_response_text.startswith("["):
             history.append((None, (ai_response_audio_path,)))
-    # 清空輸入框
-    return history, None, ""
-# --- Gradio UI (與之前版本相同) ---
-with gr.Blocks(theme=gr.themes.Soft(), title="時事觀點對對碰 (OpenAI)") as demo:
-    gr.Markdown("## 🗣️ 時事觀點對對碰 (OpenAI 版本)")
-    gr.Markdown("選擇一個議題和你的立場，用文字或語音提出論點，AI 將扮演對手與你辯論！")
     chat_history = gr.State([]) # 儲存對話歷史
     with gr.Row():
-        topic_dd = gr.Dropdown(TOPICS, label="選擇辯論主題", value=TOPICS[0])
         stance_radio = gr.Radio(["正方", "反方"], label="選擇你的立場", value="正方")
-    # Chatbot 現在可以顯示 Audio 元件
     chatbot_ui = gr.Chatbot(label="辯論區", height=500, render_markdown=True, bubble_full_width=False)
     with gr.Row():
         with gr.Column(scale=7):
             user_txt = gr.Textbox(label="輸入你的論點 (文字)", placeholder="在此輸入文字...")
         with gr.Column(scale=3):
-             # 讓 Audio 元件接收麥克風輸入，輸出為檔案路徑
              user_audio = gr.Audio(sources=["microphone"], type="filepath", label="或錄製你的論點 (語音)")
     submit_btn = gr.Button("送出論點", variant="primary")
-    # --- 事件綁定 ---
     submit_btn.click(
         fn=debate_turn,
-        inputs=[topic_dd, stance_radio, user_txt, user_audio, chatbot_ui], # chatbot_ui 作為 history 輸入
-        outputs=[chatbot_ui, user_audio, user_txt] # chatbot_ui 作為 history 輸出，並清空 audio 和 text 輸入
     )
 if __name__ == "__main__":
-    # 注意：在 Hugging Face Spaces 上部署時不需要 share=True
-    # 在 Colab 或本地 Jupyter Notebook 可能需要
-    demo.launch(debug=True) # 開啟 debug 可以在控制台看到更詳細的錯誤

 import gradio as gr
 import os
+import openai
 import time
 from pathlib import Path
+# from pydub import AudioSegment
 # --- OpenAI API 設定 ---
 openai.api_key = os.getenv("OPENAI_API_KEY")
 if not openai.api_key:
     raise ValueError("請設定 OpenAI API Key 環境變數 OPENAI_API_KEY")
 client = openai.OpenAI()
 # --- 模型選擇 (OpenAI Models) ---
+LLM_MODEL = "gpt-3.5-turbo"
 ASR_MODEL = "whisper-1"
+TTS_MODEL = "tts-1"
+TTS_VOICE = "nova" # 改用聽起來可能較清晰的聲音
+# --- 預設辯論主題 (更新為台灣時事) ---
+CURRENT_TW_TOPICS = [
+    "台灣是否應提高核能發電比例以應對能源需求？",
+    "面對高房價，興建社會住宅是最佳解方嗎？",
+    "如何平衡經濟發展與環境保護（例如：能源轉型、生態保育）？",
+    "少子化下，延後退休年齡或引進更多外籍移工哪個更急迫？",
+    "改善台灣交通亂象，應優先加強執法還是改善道路設計？",
+    "健保制度改革：提高保費、部分負擔，或有其他永續方案？",
+    # 可以再加入其他近期熱門議題
 ]
+# --- Helper 函數：呼叫 OpenAI API (call_asr, call_llm, call_tts 保持不變) ---
 def call_asr(audio_filepath):
     """將音訊檔案轉換為文字 (使用 OpenAI Whisper)"""
     if not audio_filepath:
         return ""
     try:
         with open(audio_filepath, "rb") as audio_file:
             transcript = client.audio.transcriptions.create(
               model=ASR_MODEL,
               file=audio_file
             )
+        return transcript.text
     except Exception as e:
         print(f"ASR Error (OpenAI): {e}")
         return f"[語音辨識失敗: {e}]"
 def call_llm(topic, user_stance, messages):
     """呼叫 OpenAI LLM 進行辯論回應"""
     ai_stance = "反方" if user_stance == "正方" else "正方"
+    # System prompt 保持不變，因為 topic 會動態傳入
     system_prompt = f"你正在參與一場關於「{topic}」的辯論。你扮演的是堅定的「{ai_stance}」。請根據對話歷史，針對使用者的最新論點，提出具有批判性、質疑性或反駁性的回應。保持簡潔有力，專注於論證，字數控制在150字以內。"
     openai_messages = [{"role": "system", "content": system_prompt}]
     for msg in messages:
+         role = msg.get("role") if msg.get("role") in ["user", "assistant"] else "user"
          content = msg.get("content", "")
+         if content:
             openai_messages.append({"role": role, "content": content})
     try:
 def call_tts(text):
     """將文字轉換為語音 (使用 OpenAI TTS)"""
     try:
         if not text or not isinstance(text, str) or text.startswith("["):
              print(f"Skipping TTS for invalid text: {text}")
              return None
+        speech_file_path = Path(f"/tmp/speech_{int(time.time() * 1000)}.mp3")
         response = client.audio.speech.create(
             model=TTS_MODEL,
             voice=TTS_VOICE,
             input=text
         )
+        # 使用 stream_to_file (即使有 DeprecationWarning，目前仍可用)
         response.stream_to_file(speech_file_path)
+        return str(speech_file_path)
     except Exception as e:
         print(f"TTS Error (OpenAI): {e}")
         return None
+# --- Gradio 主函數 (修改 inputs 和 topic 決定邏輯) ---
+def debate_turn(topic_from_dropdown, custom_topic, user_stance, user_input_text, user_input_audio, history):
+    """處理一輪辯論，增加自訂主題處理"""
+    # --- 決定最終辯論主題 ---
+    final_topic = ""
+    if custom_topic and custom_topic.strip(): # 檢查自訂主題是否非空
+        final_topic = custom_topic.strip()
+        print(f"Using custom topic: {final_topic}")
+    elif topic_from_dropdown:
+        final_topic = topic_from_dropdown
+        print(f"Using dropdown topic: {final_topic}")
+    else:
+        # 如果兩者都無效，給一個預設或錯誤提示
+        history.append(("[錯誤：請選擇或輸入一個辯論主題]", None))
+        return history, None, "", "" # 返回時也要對應 outputs 數量
+    # --- 處理用戶輸入 (文字或語音) ---
     user_text = ""
+    processed_audio_path = None
     if user_input_audio:
         print(f"Processing audio input: {user_input_audio}")
+        processed_audio_path = user_input_audio
         user_text = call_asr(processed_audio_path)
+        history.append(((processed_audio_path,), None))
+        if user_text.startswith("["):
+             history.append((user_text, None))
+             return history, None, "", final_topic # 返回 history, 清空 audio, 清空 text, 保持 custom_topic 不變
     if not user_text or user_text.startswith("["):
         if user_input_text:
              user_text = user_input_text
+             if not processed_audio_path: # 僅當沒有語音輸入時才新增文字輸入歷史
                 history.append((user_text, None))
+             elif user_input_text: # 如果語音辨識失敗但有文字輸入
                  user_text = user_input_text
+                 history.append((f"(改用文字輸入: {user_text})", None)) # 標註一下
         else:
+            if not processed_audio_path:
                 history.append(("[錯誤：請提供文字或語音論點]", None))
             # 如果是語音辨識失敗，前面已加入錯誤訊息
+            return history, None, "", final_topic
     if not isinstance(user_text, str) or user_text.startswith("["):
         print("Invalid user text, stopping turn.")
         if history and isinstance(history[-1][0], tuple) and history[-1][1] is None:
              history.append((f"[無法處理用戶輸入: {user_text}]", None))
+        return history, None, "", final_topic
     # --- 準備呼叫 LLM ---
     llm_messages = []
+    # (從 history 整理出 LLM messages 的邏輯保持不變)
     for i, turn in enumerate(history):
         user_msg, ai_msg = turn
         user_content = None
         if isinstance(user_msg, str):
+             if not user_msg.startswith("[") and not user_msg.startswith("(改用文字輸入:") and not user_msg.startswith("(語音辨識結果:"): # 避免將提示文字加入對話
                  user_content = user_msg
         elif isinstance(user_msg, tuple):
+            # 假設 user_text 是剛辨識的結果
+            if i == len(history) - 1 and not user_text.startswith("["):
                  user_content = user_text
         if user_content:
             llm_messages.append({"role": "user", "content": user_content})
             if not ai_msg.startswith("["):
                 ai_content = ai_msg
         elif isinstance(ai_msg, tuple) and len(ai_msg) > 0:
+             if isinstance(ai_msg[0], str) and ai_msg[0].endswith(".mp3"):
+                 pass # 音訊檔，略過
+             elif isinstance(ai_msg[0], str) and not ai_msg[0].startswith("["):
+                 ai_content = ai_msg[0] # 假設是 (文字, (音訊路徑,)) 結構中的文字
         if ai_content:
             llm_messages.append({"role": "assistant", "content": ai_content})
             llm_messages.append({"role": "user", "content": user_text})
         else:
             print("Skipping LLM call due to invalid final user text.")
+            return history, None, "", final_topic
+    # --- 呼叫 LLM (使用 final_topic) ---
+    ai_response_text = call_llm(final_topic, user_stance, llm_messages)
     # --- 呼叫 TTS ---
     ai_response_audio_path = call_tts(ai_response_text)
+    # --- 格式化 AI 回應並更新歷史 (使用之前修正的邏輯) ---
     last_user_turn_index = -1
     for i in range(len(history) - 1, -1, -1):
+        if history[i][1] is None and not history[i][0] is None:
             last_user_turn_index = i
             break
     if last_user_turn_index != -1:
+        # 1. 更新找到的回合，填入 AI 文字回應
         history[last_user_turn_index] = (history[last_user_turn_index][0], ai_response_text)
+        # 2. 如果 TTS 成功，追加一個只包含 AI 音訊的回合
         if ai_response_audio_path and not ai_response_text.startswith("["):
+            history.append((None, (ai_response_audio_path,)))
     else:
         print("Warning: Could not find user's turn. Appending AI response.")
         history.append(("[用戶回合丟失?]", ai_response_text))
         if ai_response_audio_path and not ai_response_text.startswith("["):
             history.append((None, (ai_response_audio_path,)))
+    # 清空本輪的文字和語音輸入，不清空自訂主題輸入框
+    return history, None, "", final_topic # 返回 history, 清空 audio, 清空 text, custom_topic 保持不變
+# --- Gradio UI (增加自訂主題輸入框) ---
+with gr.Blocks(theme=gr.themes.Soft(), title="時事觀點對對碰 (OpenAI + 自訂主題)") as demo:
+    gr.Markdown("## 🗣️ 時事觀點對對碰 (OpenAI + 自訂主題)")
+    gr.Markdown("選擇預設議題或輸入自訂議題，選擇立場，用文字或語音提出論點，AI 將扮演對手與你辯論！")
     chat_history = gr.State([]) # 儲存對話歷史
     with gr.Row():
+        # 選項一：下拉選單
+        topic_dd = gr.Dropdown(CURRENT_TW_TOPICS, label="選擇預設辯論主題", value=CURRENT_TW_TOPICS[0])
+        # 選項二：文字輸入框
+        custom_topic_txt = gr.Textbox(label="或輸入自訂辯論主題", placeholder="若此處輸入，將優先使用此主題...")
+        # 立場選擇
         stance_radio = gr.Radio(["正方", "反方"], label="選擇你的立場", value="正方")
     chatbot_ui = gr.Chatbot(label="辯論區", height=500, render_markdown=True, bubble_full_width=False)
     with gr.Row():
         with gr.Column(scale=7):
             user_txt = gr.Textbox(label="輸入你的論點 (文字)", placeholder="在此輸入文字...")
         with gr.Column(scale=3):
              user_audio = gr.Audio(sources=["microphone"], type="filepath", label="或錄製你的論點 (語音)")
     submit_btn = gr.Button("送出論點", variant="primary")
+    # --- 事件綁定 (更新 inputs 和 outputs) ---
     submit_btn.click(
         fn=debate_turn,
+        # inputs 順序需要與 debate_turn 函數參數對應
+        inputs=[topic_dd, custom_topic_txt, stance_radio, user_txt, user_audio, chatbot_ui],
+        # outputs: 更新 chatbot, 清空 audio 輸入, 清空 text 輸入, custom_topic 不變所以不用輸出更新
+        outputs=[chatbot_ui, user_audio, user_txt, custom_topic_txt] # custom_topic_txt 也作為輸出，使其保持不變
     )
+    # 當選擇下拉選單時，清空自訂輸入框 (可選，提升體驗)
+    def clear_custom_topic(dropdown_value):
+        # 只有當下拉選單有值被選擇時才觸發清空
+        if dropdown_value:
+             return ""
+        return gr.Skip() # 如果 dropdown 被清空，則不改變 custom_topic_txt
+    topic_dd.change(fn=clear_custom_topic, inputs=[topic_dd], outputs=[custom_topic_txt])
+    # 當在自訂輸入框打字時，清除下拉選單的選擇 (可選，提升體驗)
+    def clear_dropdown(custom_text):
+        # 只有當 custom_text 有內容時才觸發清空
+        if custom_text and custom_text.strip():
+            return None # 返回 None 會清除 Dropdown 的選擇
+        return gr.Skip() # 如果 custom_text 被清空，不改變 dropdown
+    custom_topic_txt.change(fn=clear_dropdown, inputs=[custom_topic_txt], outputs=[topic_dd])
 if __name__ == "__main__":
+    demo.launch(debug=True)