Spaces:

yiming-0120
/

Interactive_AI_Debate_Bot

Sleeping

App Files Files Community

yiming-0120 commited on Apr 16, 2025

Commit

350bd0d

verified ·

1 Parent(s): c2f09a8

Create app.py

Browse files

Files changed (1) hide show

app.py +254 -0

app.py ADDED Viewed

	@@ -0,0 +1,254 @@

+import gradio as gr
+import os
+import openai # Use the OpenAI library
+import time
+from pathlib import Path
+# from pydub import AudioSegment # Still potentially needed if input format varies
+# --- OpenAI API 設定 ---
+# **強烈建議** 將 API Key 設為環境變數 OPENAI_API_KEY
+# 或在使用 Hugging Face Spaces 部署時設為 Secret
+# 不要直接寫在程式碼中！
+openai.api_key = os.getenv("OPENAI_API_KEY")
+if not openai.api_key:
+    raise ValueError("請設定 OpenAI API Key 環境變數 OPENAI_API_KEY")
+# Initialize OpenAI client (new SDK style)
+client = openai.OpenAI()
+# --- 模型選擇 (OpenAI Models) ---
+LLM_MODEL = "gpt-3.5-turbo" # Or "gpt-4", "gpt-4-turbo-preview", etc.
+ASR_MODEL = "whisper-1"
+TTS_MODEL = "tts-1" # Or "tts-1-hd"
+TTS_VOICE = "alloy" # Choose from: alloy, echo, fable, onyx, nova, shimmer
+# --- 預設辯論主題 ---
+TOPICS = [
+    "AI 是否會大規模取代人類工作？",
+    "社群媒體對社會利大於弊嗎？",
+    "是否應該推行全民基本收入 (UBI)？",
+    "全球化對本地文化是威脅還是機遇？",
+    "基因編輯技術應受更嚴格的倫理限制嗎？"
+]
+# --- Helper 函數：呼叫 OpenAI API ---
+def call_asr(audio_filepath):
+    """將音訊檔案轉換為文字 (使用 OpenAI Whisper)"""
+    if not audio_filepath:
+        return ""
+    try:
+        # OpenAI Whisper API 需要文件句柄
+        with open(audio_filepath, "rb") as audio_file:
+            transcript = client.audio.transcriptions.create(
+              model=ASR_MODEL,
+              file=audio_file
+            )
+        return transcript.text # 返回辨識出的文字
+    except Exception as e:
+        print(f"ASR Error (OpenAI): {e}")
+        return f"[語音辨識失敗: {e}]"
+def call_llm(topic, user_stance, messages):
+    """呼叫 OpenAI LLM 進行辯論回應"""
+    ai_stance = "反方" if user_stance == "正方" else "正方"
+    system_prompt = f"你正在參與一場關於「{topic}」的辯論。你扮演的是堅定的「{ai_stance}」。請根據對話歷史，針對使用者的最新論點，提出具有批判性、質疑性或反駁性的回應。保持簡潔有力，專注於論證，字數控制在150字以內。"
+    # 準備 OpenAI messages 格式
+    openai_messages = [{"role": "system", "content": system_prompt}]
+    for msg in messages:
+         # 確保 role 是 'user' 或 'assistant'
+         role = msg.get("role") if msg.get("role") in ["user", "assistant"] else "user" # 預設為 user
+         content = msg.get("content", "")
+         if content: # 確保內容不為空
+            openai_messages.append({"role": role, "content": content})
+    try:
+        response = client.chat.completions.create(
+            model=LLM_MODEL,
+            messages=openai_messages,
+            max_tokens=250,
+            temperature=0.7,
+        )
+        ai_response = response.choices[0].message.content.strip()
+        return ai_response
+    except Exception as e:
+        print(f"LLM Error (OpenAI): {e}")
+        return f"[AI 回應生成失敗: {e}]"
+def call_tts(text):
+    """將文字轉換為語音 (使用 OpenAI TTS)"""
+    try:
+        # 檢查文字是否有效
+        if not text or not isinstance(text, str) or text.startswith("["):
+             print(f"Skipping TTS for invalid text: {text}")
+             return None
+        # 產生唯一的暫存檔案路徑
+        speech_file_path = Path(f"/tmp/speech_{int(time.time() * 1000)}.mp3") # OpenAI TTS 輸出 mp3
+        response = client.audio.speech.create(
+            model=TTS_MODEL,
+            voice=TTS_VOICE,
+            input=text
+        )
+        # 將音訊流式寫入檔案
+        response.stream_to_file(speech_file_path)
+        return str(speech_file_path) # 返回檔案路徑
+    except Exception as e:
+        print(f"TTS Error (OpenAI): {e}")
+        return None
+# --- Gradio 主函數 (與之前版本基本相同，只是調用的 helper 不同) ---
+def debate_turn(topic, user_stance, user_input_text, user_input_audio, history):
+    """處理一輪辯論"""
+    user_text = ""
+    processed_audio_path = None # 用來記錄實際處理的音訊路徑
+    # 優先處理語音輸入
+    if user_input_audio:
+        print(f"Processing audio input: {user_input_audio}")
+        processed_audio_path = user_input_audio # 記錄路徑
+        user_text = call_asr(processed_audio_path)
+        # 將用戶原始語音(路徑)加入歷史
+        history.append(((processed_audio_path,), None)) # 用戶音訊路徑作為輸入顯示
+        if user_text and not user_text.startswith("["):
+             # 成功辨識，在下面會統一加入 history
+             pass
+        elif user_text.startswith("["): # 辨識失敗
+             history.append((user_text, None)) # 顯示錯誤訊息
+             # 清空輸入並返回
+             return history, None, ""
+    # 如果沒有有效語音輸入 或 語音辨識失敗，則使用文字輸入
+    if not user_text or user_text.startswith("["):
+        if user_input_text:
+             user_text = user_input_text
+             # 將用戶文字輸入加入 history，這會是這一輪的開始
+             history.append((user_text, None))
+        else:
+            # 如果兩者都無效 (沒有輸入文字，且語音無效/未提供)
+            if not processed_audio_path: # 僅當連音訊都沒嘗試時才報錯
+                history.append(("[錯誤：請提供文字或語音論點]", None))
+                return history, None, ""
+            # 如果是語音辨識失敗導致的 user_text 為空，前面已記錄錯誤，這裡不用重複加
+    # 確保 user_text 是有效的字串才繼續
+    if not isinstance(user_text, str) or user_text.startswith("["):
+        print("Invalid user text, stopping turn.")
+        return history, None, "" # 可能前面語音辨識失敗，終止這輪
+    # --- 準備呼叫 LLM ---
+    # 從 history 整理出 LLM 需要的 messages 格式
+    llm_messages = []
+    for i, turn in enumerate(history):
+        user_msg, ai_msg = turn
+        # --- 處理用戶訊息 ---
+        user_content = None
+        if isinstance(user_msg, str): # 純文字輸入 或 之前的錯誤/辨識訊息
+             if not user_msg.startswith("[") and not user_msg.startswith("(語音辨識結果:"):
+                 user_content = user_msg
+        elif isinstance(user_msg, tuple): # 音訊輸入元組 (路徑,)
+            # 需要找到對應的辨識結果 (如果有的話)
+            # 簡單處理：如果這是最新一輪的音訊，user_text 應該已經包含辨識結果
+            if i == len(history) - 1 and not user_text.startswith("["):
+                 user_content = user_text
+            # (更健壯的作法是檢查 history[i+1] 是否為辨識結果，但目前結構下直接用 user_text 更直接)
+        if user_content:
+            llm_messages.append({"role": "user", "content": user_content})
+        # --- 處理 AI 訊息 ---
+        ai_content = None
+        if isinstance(ai_msg, str): # 純文字回應 或 之前的錯誤訊息
+            if not ai_msg.startswith("["):
+                ai_content = ai_msg
+        elif isinstance(ai_msg, tuple) and len(ai_msg) > 0: # (文字, (音訊路徑,))
+            if isinstance(ai_msg[0], str) and not ai_msg[0].startswith("["):
+                ai_content = ai_msg[0] # 取元組中的文字部分
+        if ai_content:
+            llm_messages.append({"role": "assistant", "content": ai_content})
+    # 確保最後一條訊息是 user 的輸入 (如果 llm_messages 非空)
+    if llm_messages and llm_messages[-1]["role"] != "user":
+         # 這種情況理論上不應發生在正確流程中，但以防萬一
+         print("Warning: Last message before LLM call is not from user. Appending current user text.")
+         if not user_text.startswith("["):
+             llm_messages.append({"role": "user", "content": user_text})
+         else:
+             print("Skipping LLM call due to invalid user text.")
+             return history, None, "" # 無法呼叫 LLM
+    elif not llm_messages and not user_text.startswith("["):
+         # 如果是第一輪
+         llm_messages.append({"role": "user", "content": user_text})
+    # --- 呼叫 LLM ---
+    ai_response_text = call_llm(topic, user_stance, llm_messages)
+    # --- 呼叫 TTS ---
+    ai_response_audio_path = call_tts(ai_response_text)
+    # --- 格式化 AI 回應並更新歷史 ---
+    # 找到用戶最新輸入的那一條記錄（它還沒有 AI 回應）
+    last_user_turn_index = -1
+    for i in range(len(history) - 1, -1, -1):
+        if history[i][1] is None: # 找到最後一個 AI 回應為 None 的地方
+            last_user_turn_index = i
+            break
+    if last_user_turn_index != -1:
+        if ai_response_audio_path:
+            # 如果 TTS 成功，組合文字和 Audio 元件
+             history[last_user_turn_index] = (history[last_user_turn_index][0], (ai_response_text, (ai_response_audio_path,)))
+        else:
+            # 如果 TTS 失敗，只顯示文字
+            history[last_user_turn_index] = (history[last_user_turn_index][0], ai_response_text)
+    else:
+        # 理論上不應該發生，表示找不到用戶的輸入回合
+        print("Error: Could not find user's turn in history to append AI response.")
+        history.append(("[內部錯誤]", ai_response_text)) # 至少顯示 AI 回應
+    # 清空輸入框
+    return history, None, ""
+# --- Gradio UI (與之前版本相同) ---
+with gr.Blocks(theme=gr.themes.Soft(), title="時事觀點對對碰 (OpenAI)") as demo:
+    gr.Markdown("## 🗣️ 時事觀點對對碰 (OpenAI 版本)")
+    gr.Markdown("選擇一個議題和你的立場，用文字或語音提出論點，AI 將扮演對手與你辯論！")
+    chat_history = gr.State([]) # 儲存對話歷史
+    with gr.Row():
+        topic_dd = gr.Dropdown(TOPICS, label="選擇辯論主題", value=TOPICS[0])
+        stance_radio = gr.Radio(["正方", "反方"], label="選擇你的立場", value="正方")
+    # Chatbot 現在可以顯示 Audio 元件
+    chatbot_ui = gr.Chatbot(label="辯論區", height=500, render_markdown=True, bubble_full_width=False)
+    with gr.Row():
+        with gr.Column(scale=7):
+            user_txt = gr.Textbox(label="輸入你的論點 (文字)", placeholder="在此輸入文字...")
+        with gr.Column(scale=3):
+             # 讓 Audio 元件接收麥克風輸入，輸出為檔案路徑
+             user_audio = gr.Audio(sources=["microphone"], type="filepath", label="或錄製你的論點 (語音)")
+    submit_btn = gr.Button("送出論點", variant="primary")
+    # --- 事件綁定 ---
+    submit_btn.click(
+        fn=debate_turn,
+        inputs=[topic_dd, stance_radio, user_txt, user_audio, chatbot_ui], # chatbot_ui 作為 history 輸入
+        outputs=[chatbot_ui, user_audio, user_txt] # chatbot_ui 作為 history 輸出，並清空 audio 和 text 輸入
+    )
+if __name__ == "__main__":
+    # 注意：在 Hugging Face Spaces 上部署時不需要 share=True
+    # 在 Colab 或本地 Jupyter Notebook 可能需要
+    demo.launch(debug=True) # 開啟 debug 可以在控制台看到更詳細的錯誤