Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
import openai # Use the OpenAI library
|
| 4 |
+
import time
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
# from pydub import AudioSegment # Still potentially needed if input format varies
|
| 7 |
+
|
| 8 |
+
# --- OpenAI API 設定 ---
|
| 9 |
+
# **強烈建議** 將 API Key 設為環境變數 OPENAI_API_KEY
|
| 10 |
+
# 或在使用 Hugging Face Spaces 部署時設為 Secret
|
| 11 |
+
# 不要直接寫在程式碼中!
|
| 12 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
| 13 |
+
if not openai.api_key:
|
| 14 |
+
raise ValueError("請設定 OpenAI API Key 環境變數 OPENAI_API_KEY")
|
| 15 |
+
|
| 16 |
+
# Initialize OpenAI client (new SDK style)
|
| 17 |
+
client = openai.OpenAI()
|
| 18 |
+
|
| 19 |
+
# --- 模型選擇 (OpenAI Models) ---
|
| 20 |
+
LLM_MODEL = "gpt-3.5-turbo" # Or "gpt-4", "gpt-4-turbo-preview", etc.
|
| 21 |
+
ASR_MODEL = "whisper-1"
|
| 22 |
+
TTS_MODEL = "tts-1" # Or "tts-1-hd"
|
| 23 |
+
TTS_VOICE = "alloy" # Choose from: alloy, echo, fable, onyx, nova, shimmer
|
| 24 |
+
|
| 25 |
+
# --- 預設辯論主題 ---
|
| 26 |
+
TOPICS = [
|
| 27 |
+
"AI 是否會大規模取代人類工作?",
|
| 28 |
+
"社群媒體對社會利大於弊嗎?",
|
| 29 |
+
"是否應該推行全民基本收入 (UBI)?",
|
| 30 |
+
"全球化對本地文化是威脅還是機遇?",
|
| 31 |
+
"基因編輯技術應受更嚴格的倫理限制嗎?"
|
| 32 |
+
]
|
| 33 |
+
|
| 34 |
+
# --- Helper 函數:呼叫 OpenAI API ---
|
| 35 |
+
|
| 36 |
+
def call_asr(audio_filepath):
|
| 37 |
+
"""將音訊檔案轉換為文字 (使用 OpenAI Whisper)"""
|
| 38 |
+
if not audio_filepath:
|
| 39 |
+
return ""
|
| 40 |
+
try:
|
| 41 |
+
# OpenAI Whisper API 需要文件句柄
|
| 42 |
+
with open(audio_filepath, "rb") as audio_file:
|
| 43 |
+
transcript = client.audio.transcriptions.create(
|
| 44 |
+
model=ASR_MODEL,
|
| 45 |
+
file=audio_file
|
| 46 |
+
)
|
| 47 |
+
return transcript.text # 返回辨識出的文字
|
| 48 |
+
except Exception as e:
|
| 49 |
+
print(f"ASR Error (OpenAI): {e}")
|
| 50 |
+
return f"[語音辨識失敗: {e}]"
|
| 51 |
+
|
| 52 |
+
def call_llm(topic, user_stance, messages):
|
| 53 |
+
"""呼叫 OpenAI LLM 進行辯論回應"""
|
| 54 |
+
ai_stance = "反方" if user_stance == "正方" else "正方"
|
| 55 |
+
system_prompt = f"你正在參與一場關於「{topic}」的辯論。你扮演的是堅定的「{ai_stance}」。請根據對話歷史,針對使用者的最新論點,提出具有批判性、質疑性或反駁性的回應。保持簡潔有力,專注於論證,字數控制在150字以內。"
|
| 56 |
+
|
| 57 |
+
# 準備 OpenAI messages 格式
|
| 58 |
+
openai_messages = [{"role": "system", "content": system_prompt}]
|
| 59 |
+
for msg in messages:
|
| 60 |
+
# 確保 role 是 'user' 或 'assistant'
|
| 61 |
+
role = msg.get("role") if msg.get("role") in ["user", "assistant"] else "user" # 預設為 user
|
| 62 |
+
content = msg.get("content", "")
|
| 63 |
+
if content: # 確保內容不為空
|
| 64 |
+
openai_messages.append({"role": role, "content": content})
|
| 65 |
+
|
| 66 |
+
try:
|
| 67 |
+
response = client.chat.completions.create(
|
| 68 |
+
model=LLM_MODEL,
|
| 69 |
+
messages=openai_messages,
|
| 70 |
+
max_tokens=250,
|
| 71 |
+
temperature=0.7,
|
| 72 |
+
)
|
| 73 |
+
ai_response = response.choices[0].message.content.strip()
|
| 74 |
+
return ai_response
|
| 75 |
+
except Exception as e:
|
| 76 |
+
print(f"LLM Error (OpenAI): {e}")
|
| 77 |
+
return f"[AI 回應生成失敗: {e}]"
|
| 78 |
+
|
| 79 |
+
def call_tts(text):
|
| 80 |
+
"""將文字轉換為語音 (使用 OpenAI TTS)"""
|
| 81 |
+
try:
|
| 82 |
+
# 檢查文字是否有效
|
| 83 |
+
if not text or not isinstance(text, str) or text.startswith("["):
|
| 84 |
+
print(f"Skipping TTS for invalid text: {text}")
|
| 85 |
+
return None
|
| 86 |
+
|
| 87 |
+
# 產生唯一的暫存檔案路徑
|
| 88 |
+
speech_file_path = Path(f"/tmp/speech_{int(time.time() * 1000)}.mp3") # OpenAI TTS 輸出 mp3
|
| 89 |
+
|
| 90 |
+
response = client.audio.speech.create(
|
| 91 |
+
model=TTS_MODEL,
|
| 92 |
+
voice=TTS_VOICE,
|
| 93 |
+
input=text
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
# 將音訊流式寫入檔案
|
| 97 |
+
response.stream_to_file(speech_file_path)
|
| 98 |
+
|
| 99 |
+
return str(speech_file_path) # 返回檔案路徑
|
| 100 |
+
except Exception as e:
|
| 101 |
+
print(f"TTS Error (OpenAI): {e}")
|
| 102 |
+
return None
|
| 103 |
+
|
| 104 |
+
# --- Gradio 主函數 (與之前版本基本相同,只是調用的 helper 不同) ---
|
| 105 |
+
def debate_turn(topic, user_stance, user_input_text, user_input_audio, history):
|
| 106 |
+
"""處理一輪辯論"""
|
| 107 |
+
user_text = ""
|
| 108 |
+
processed_audio_path = None # 用來記錄實際處理的音訊路徑
|
| 109 |
+
|
| 110 |
+
# 優先處理語音輸入
|
| 111 |
+
if user_input_audio:
|
| 112 |
+
print(f"Processing audio input: {user_input_audio}")
|
| 113 |
+
processed_audio_path = user_input_audio # 記錄路徑
|
| 114 |
+
user_text = call_asr(processed_audio_path)
|
| 115 |
+
# 將用戶原始語音(路徑)加入歷史
|
| 116 |
+
history.append(((processed_audio_path,), None)) # 用戶音訊路徑作為輸入顯示
|
| 117 |
+
if user_text and not user_text.startswith("["):
|
| 118 |
+
# 成功辨識,在下面會統一加入 history
|
| 119 |
+
pass
|
| 120 |
+
elif user_text.startswith("["): # 辨識失敗
|
| 121 |
+
history.append((user_text, None)) # 顯示錯誤訊息
|
| 122 |
+
# 清空輸入並返回
|
| 123 |
+
return history, None, ""
|
| 124 |
+
|
| 125 |
+
# 如果沒有有效語音輸入 或 語音辨識失敗,則使用文字輸入
|
| 126 |
+
if not user_text or user_text.startswith("["):
|
| 127 |
+
if user_input_text:
|
| 128 |
+
user_text = user_input_text
|
| 129 |
+
# 將用戶文字輸入加入 history,這會是這一輪的開始
|
| 130 |
+
history.append((user_text, None))
|
| 131 |
+
else:
|
| 132 |
+
# 如果兩者都無效 (沒有輸入文字,且語音無效/未提供)
|
| 133 |
+
if not processed_audio_path: # 僅當連音訊都沒嘗試時才報錯
|
| 134 |
+
history.append(("[錯誤:請提供文字或語音論點]", None))
|
| 135 |
+
return history, None, ""
|
| 136 |
+
# 如果是語音辨識失敗導致的 user_text 為空,前面已記錄錯誤,這裡不用重複加
|
| 137 |
+
|
| 138 |
+
# 確保 user_text 是有效的字串才繼續
|
| 139 |
+
if not isinstance(user_text, str) or user_text.startswith("["):
|
| 140 |
+
print("Invalid user text, stopping turn.")
|
| 141 |
+
return history, None, "" # 可能前面語音辨識失敗,終止這輪
|
| 142 |
+
|
| 143 |
+
# --- 準備呼叫 LLM ---
|
| 144 |
+
# 從 history 整理出 LLM 需要的 messages 格式
|
| 145 |
+
llm_messages = []
|
| 146 |
+
for i, turn in enumerate(history):
|
| 147 |
+
user_msg, ai_msg = turn
|
| 148 |
+
|
| 149 |
+
# --- 處理用戶訊息 ---
|
| 150 |
+
user_content = None
|
| 151 |
+
if isinstance(user_msg, str): # 純文字輸入 或 之前的錯誤/辨識訊息
|
| 152 |
+
if not user_msg.startswith("[") and not user_msg.startswith("(語音辨識結果:"):
|
| 153 |
+
user_content = user_msg
|
| 154 |
+
elif isinstance(user_msg, tuple): # 音訊輸入元組 (路徑,)
|
| 155 |
+
# 需要找到對應的辨識結果 (如果有的話)
|
| 156 |
+
# 簡單處理:如果這是最新一輪的音訊,user_text 應該已經包含辨識結果
|
| 157 |
+
if i == len(history) - 1 and not user_text.startswith("["):
|
| 158 |
+
user_content = user_text
|
| 159 |
+
# (更健壯的作法是檢查 history[i+1] 是否為辨識結果,但目前結構下直接用 user_text 更直接)
|
| 160 |
+
|
| 161 |
+
if user_content:
|
| 162 |
+
llm_messages.append({"role": "user", "content": user_content})
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
# --- 處理 AI 訊息 ---
|
| 166 |
+
ai_content = None
|
| 167 |
+
if isinstance(ai_msg, str): # 純文字回應 或 之前的錯誤訊息
|
| 168 |
+
if not ai_msg.startswith("["):
|
| 169 |
+
ai_content = ai_msg
|
| 170 |
+
elif isinstance(ai_msg, tuple) and len(ai_msg) > 0: # (文字, (音訊路徑,))
|
| 171 |
+
if isinstance(ai_msg[0], str) and not ai_msg[0].startswith("["):
|
| 172 |
+
ai_content = ai_msg[0] # 取元組中的文字部分
|
| 173 |
+
|
| 174 |
+
if ai_content:
|
| 175 |
+
llm_messages.append({"role": "assistant", "content": ai_content})
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
# 確保最後一條訊息是 user 的輸入 (如果 llm_messages 非空)
|
| 179 |
+
if llm_messages and llm_messages[-1]["role"] != "user":
|
| 180 |
+
# 這種情況理論上不應發生在正確流程中,但以防萬一
|
| 181 |
+
print("Warning: Last message before LLM call is not from user. Appending current user text.")
|
| 182 |
+
if not user_text.startswith("["):
|
| 183 |
+
llm_messages.append({"role": "user", "content": user_text})
|
| 184 |
+
else:
|
| 185 |
+
print("Skipping LLM call due to invalid user text.")
|
| 186 |
+
return history, None, "" # 無法呼叫 LLM
|
| 187 |
+
elif not llm_messages and not user_text.startswith("["):
|
| 188 |
+
# 如果是第一輪
|
| 189 |
+
llm_messages.append({"role": "user", "content": user_text})
|
| 190 |
+
|
| 191 |
+
# --- 呼叫 LLM ---
|
| 192 |
+
ai_response_text = call_llm(topic, user_stance, llm_messages)
|
| 193 |
+
|
| 194 |
+
# --- 呼叫 TTS ---
|
| 195 |
+
ai_response_audio_path = call_tts(ai_response_text)
|
| 196 |
+
|
| 197 |
+
# --- 格式化 AI 回應並更新歷史 ---
|
| 198 |
+
# 找到用戶最新輸入的那一條記錄(它還沒有 AI 回應)
|
| 199 |
+
last_user_turn_index = -1
|
| 200 |
+
for i in range(len(history) - 1, -1, -1):
|
| 201 |
+
if history[i][1] is None: # 找到最後一個 AI 回應為 None 的地方
|
| 202 |
+
last_user_turn_index = i
|
| 203 |
+
break
|
| 204 |
+
|
| 205 |
+
if last_user_turn_index != -1:
|
| 206 |
+
if ai_response_audio_path:
|
| 207 |
+
# 如果 TTS 成功,組合文字和 Audio 元件
|
| 208 |
+
history[last_user_turn_index] = (history[last_user_turn_index][0], (ai_response_text, (ai_response_audio_path,)))
|
| 209 |
+
else:
|
| 210 |
+
# 如果 TTS 失敗,只顯示文字
|
| 211 |
+
history[last_user_turn_index] = (history[last_user_turn_index][0], ai_response_text)
|
| 212 |
+
else:
|
| 213 |
+
# 理論上不應該發生,表示找不到用戶的輸入回合
|
| 214 |
+
print("Error: Could not find user's turn in history to append AI response.")
|
| 215 |
+
history.append(("[內部錯誤]", ai_response_text)) # 至少顯示 AI 回應
|
| 216 |
+
|
| 217 |
+
# 清空輸入框
|
| 218 |
+
return history, None, ""
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
# --- Gradio UI (與之前版本相同) ---
|
| 222 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="時事觀點對對碰 (OpenAI)") as demo:
|
| 223 |
+
gr.Markdown("## 🗣️ 時事觀點對對碰 (OpenAI 版本)")
|
| 224 |
+
gr.Markdown("選擇一個議題和你的立場,用文字或語音提出論點,AI 將扮演對手與你辯論!")
|
| 225 |
+
|
| 226 |
+
chat_history = gr.State([]) # 儲存對話歷史
|
| 227 |
+
|
| 228 |
+
with gr.Row():
|
| 229 |
+
topic_dd = gr.Dropdown(TOPICS, label="選擇辯論主題", value=TOPICS[0])
|
| 230 |
+
stance_radio = gr.Radio(["正方", "反方"], label="選擇你的立場", value="正方")
|
| 231 |
+
|
| 232 |
+
# Chatbot 現在可以顯示 Audio 元件
|
| 233 |
+
chatbot_ui = gr.Chatbot(label="辯論區", height=500, render_markdown=True, bubble_full_width=False)
|
| 234 |
+
|
| 235 |
+
with gr.Row():
|
| 236 |
+
with gr.Column(scale=7):
|
| 237 |
+
user_txt = gr.Textbox(label="輸入你的論點 (文字)", placeholder="在此輸入文字...")
|
| 238 |
+
with gr.Column(scale=3):
|
| 239 |
+
# 讓 Audio 元件接收麥克風輸入,輸出為檔案路徑
|
| 240 |
+
user_audio = gr.Audio(sources=["microphone"], type="filepath", label="或錄製你的論點 (語音)")
|
| 241 |
+
|
| 242 |
+
submit_btn = gr.Button("送出論點", variant="primary")
|
| 243 |
+
|
| 244 |
+
# --- 事件綁定 ---
|
| 245 |
+
submit_btn.click(
|
| 246 |
+
fn=debate_turn,
|
| 247 |
+
inputs=[topic_dd, stance_radio, user_txt, user_audio, chatbot_ui], # chatbot_ui 作為 history 輸入
|
| 248 |
+
outputs=[chatbot_ui, user_audio, user_txt] # chatbot_ui 作為 history 輸出,並清空 audio 和 text 輸入
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
if __name__ == "__main__":
|
| 252 |
+
# 注意:在 Hugging Face Spaces 上部署時不需要 share=True
|
| 253 |
+
# 在 Colab 或本地 Jupyter Notebook 可能需要
|
| 254 |
+
demo.launch(debug=True) # 開啟 debug 可以在控制台看到更詳細的錯誤
|