|
|
import gradio as gr |
|
|
import os |
|
|
import openai |
|
|
import time |
|
|
from pathlib import Path |
|
|
|
|
|
openai.api_key = os.getenv("OPENAI_API_KEY") |
|
|
|
|
|
client = openai.OpenAI() |
|
|
|
|
|
LLM_MODEL = "gpt-4o" |
|
|
ASR_MODEL = "gpt-4o-transcribe" |
|
|
TTS_MODEL = "gpt-4o-mini-tts" |
|
|
TTS_VOICE = "nova" |
|
|
|
|
|
|
|
|
CURRENT_TW_TOPICS = [ |
|
|
"台灣是否應提高核能發電比例以應對能源需求?", |
|
|
"面對高房價,興建社會住宅是最佳解方嗎?", |
|
|
"台灣是否應放寬代理孕母的限制? (涉及生育權、倫理爭議、法律規範)", |
|
|
"少子化下,延後退休年齡或引進更多外籍移工哪個更急迫?", |
|
|
"台灣是否應積極推動雙語教育(中英文)作為主要教學語言?", |
|
|
"健保制度改革:提高保費、部分負擔,或有其他永續方案?", |
|
|
"台灣是否應對網路言論進行更嚴格的管制?", |
|
|
"台灣的死刑制度是否應廢除?", |
|
|
] |
|
|
|
|
|
|
|
|
def call_asr(audio_filepath): |
|
|
if not audio_filepath: return "" |
|
|
try: |
|
|
with open(audio_filepath, "rb") as audio_file: |
|
|
transcript = client.audio.transcriptions.create(model=ASR_MODEL, file=audio_file) |
|
|
return transcript.text |
|
|
except Exception as e: |
|
|
print(f"ASR Error (OpenAI): {e}") |
|
|
return f"[語音辨識失敗: {e}]" |
|
|
|
|
|
def call_llm(topic, user_stance, messages): |
|
|
ai_stance = "反方" if user_stance == "正方" else "正方" |
|
|
system_prompt = f"你正在參與一場關於「{topic}」的辯論。你扮演的是堅定的「{ai_stance}」。請根據對話歷史,針對使用者的最新論點,提出具有批判性、質疑性或反駁性的回應。保持簡潔有力,專注於論證,字數控制在150字以內。" |
|
|
openai_messages = [{"role": "system", "content": system_prompt}] |
|
|
for msg in messages: |
|
|
role = msg.get("role") if msg.get("role") in ["user", "assistant"] else "user" |
|
|
content = msg.get("content", "") |
|
|
if content: openai_messages.append({"role": role, "content": content}) |
|
|
try: |
|
|
response = client.chat.completions.create(model=LLM_MODEL, messages=openai_messages, max_tokens=250, temperature=0.7) |
|
|
ai_response = response.choices[0].message.content.strip() |
|
|
return ai_response |
|
|
except Exception as e: |
|
|
print(f"LLM Error (OpenAI): {e}") |
|
|
return f"[AI 回應生成失敗: {e}]" |
|
|
|
|
|
def call_tts(text): |
|
|
try: |
|
|
if not text or not isinstance(text, str) or text.startswith("["): |
|
|
print(f"Skipping TTS for invalid text: {text}") |
|
|
return None |
|
|
speech_file_path = Path(f"/tmp/speech_{int(time.time() * 1000)}.mp3") |
|
|
response = client.audio.speech.create(model=TTS_MODEL, voice=TTS_VOICE, input=text) |
|
|
response.stream_to_file(speech_file_path) |
|
|
return str(speech_file_path) |
|
|
except Exception as e: |
|
|
print(f"TTS Error (OpenAI): {e}") |
|
|
return None |
|
|
|
|
|
|
|
|
def debate_turn(topic_from_dropdown, custom_topic, user_stance, user_input_text, user_input_audio, history): |
|
|
final_topic = "" |
|
|
if custom_topic and custom_topic.strip(): |
|
|
final_topic = custom_topic.strip() |
|
|
print(f"Using custom topic: {final_topic}") |
|
|
elif topic_from_dropdown: |
|
|
final_topic = topic_from_dropdown |
|
|
print(f"Using dropdown topic: {final_topic}") |
|
|
else: |
|
|
history.append(("[錯誤:請選擇或輸入一個辯論主題]", None)) |
|
|
return history, None, "", "" |
|
|
|
|
|
user_text = "" |
|
|
processed_audio_path = None |
|
|
if user_input_audio: |
|
|
print(f"Processing audio input: {user_input_audio}") |
|
|
processed_audio_path = user_input_audio |
|
|
user_text = call_asr(processed_audio_path) |
|
|
history.append(((processed_audio_path,), None)) |
|
|
if user_text.startswith("["): |
|
|
history.append((user_text, None)) |
|
|
return history, None, "", final_topic |
|
|
|
|
|
if not user_text or user_text.startswith("["): |
|
|
if user_input_text: |
|
|
user_text = user_input_text |
|
|
if not processed_audio_path: |
|
|
history.append((user_text, None)) |
|
|
elif user_input_text: |
|
|
user_text = user_input_text |
|
|
history.append((f"(改用文字輸入: {user_text})", None)) |
|
|
else: |
|
|
if not processed_audio_path: |
|
|
history.append(("[錯誤:請提供文字或語音論點]", None)) |
|
|
return history, None, "", final_topic |
|
|
|
|
|
if not isinstance(user_text, str) or user_text.startswith("["): |
|
|
print("Invalid user text, stopping turn.") |
|
|
if history and isinstance(history[-1][0], tuple) and history[-1][1] is None: |
|
|
history.append((f"[無法處理用戶輸入: {user_text}]", None)) |
|
|
return history, None, "", final_topic |
|
|
|
|
|
llm_messages = [] |
|
|
for i, turn in enumerate(history): |
|
|
user_msg, ai_msg = turn |
|
|
user_content = None |
|
|
if isinstance(user_msg, str): |
|
|
if not user_msg.startswith("[") and not user_msg.startswith("(改用文字輸入:") and not user_msg.startswith("(語音辨識結果:"): |
|
|
user_content = user_msg |
|
|
elif isinstance(user_msg, tuple): |
|
|
if i == len(history) - 1 and not user_text.startswith("["): |
|
|
user_content = user_text |
|
|
if user_content: llm_messages.append({"role": "user", "content": user_content}) |
|
|
|
|
|
ai_content = None |
|
|
if isinstance(ai_msg, str): |
|
|
if not ai_msg.startswith("["): ai_content = ai_msg |
|
|
elif isinstance(ai_msg, tuple) and len(ai_msg) > 0: |
|
|
if isinstance(ai_msg[0], str) and ai_msg[0].endswith(".mp3"): pass |
|
|
elif isinstance(ai_msg[0], str) and not ai_msg[0].startswith("["): ai_content = ai_msg[0] |
|
|
if ai_content: llm_messages.append({"role": "assistant", "content": ai_content}) |
|
|
|
|
|
if not llm_messages or llm_messages[-1]["role"] == "assistant": |
|
|
if not user_text.startswith("["): llm_messages.append({"role": "user", "content": user_text}) |
|
|
else: |
|
|
print("Skipping LLM call due to invalid final user text.") |
|
|
return history, None, "", final_topic |
|
|
|
|
|
ai_response_text = call_llm(final_topic, user_stance, llm_messages) |
|
|
ai_response_audio_path = call_tts(ai_response_text) |
|
|
|
|
|
last_user_turn_index = -1 |
|
|
for i in range(len(history) - 1, -1, -1): |
|
|
if history[i][1] is None and not history[i][0] is None: |
|
|
last_user_turn_index = i |
|
|
break |
|
|
|
|
|
if last_user_turn_index != -1: |
|
|
history[last_user_turn_index] = (history[last_user_turn_index][0], ai_response_text) |
|
|
if ai_response_audio_path and not ai_response_text.startswith("["): |
|
|
history.append((None, (ai_response_audio_path,))) |
|
|
else: |
|
|
print("Warning: Could not find user's turn. Appending AI response.") |
|
|
history.append(("[用戶回合丟失?]", ai_response_text)) |
|
|
if ai_response_audio_path and not ai_response_text.startswith("["): |
|
|
history.append((None, (ai_response_audio_path,))) |
|
|
|
|
|
return history, None, "", final_topic |
|
|
|
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft(), title="時事觀點對對碰 (OpenAI + 自訂主題)") as demo: |
|
|
gr.Markdown("## 🗣️ 時事觀點對對碰") |
|
|
gr.Markdown("選擇預設議題或輸入自訂議題,選擇立場,用文字或語音提出論點,AI 將扮演對手與你辯論!") |
|
|
|
|
|
chat_history = gr.State([]) |
|
|
|
|
|
with gr.Row(): |
|
|
topic_dd = gr.Dropdown(CURRENT_TW_TOPICS, label="選擇預設辯論主題", value=CURRENT_TW_TOPICS[0]) |
|
|
custom_topic_txt = gr.Textbox(label="或輸入自訂辯論主題", placeholder="若此處輸入,將優先使用此主題...") |
|
|
stance_radio = gr.Radio(["正方", "反方"], label="選擇你的立場", value="正方") |
|
|
|
|
|
chatbot_ui = gr.Chatbot(label="辯論區", height=500, render_markdown=True, bubble_full_width=False) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=7): |
|
|
user_txt = gr.Textbox(label="輸入你的論點 (文字)", placeholder="在此輸入文字...") |
|
|
with gr.Column(scale=3): |
|
|
user_audio = gr.Audio(sources=["microphone"], type="filepath", label="或錄製你的論點 (語音)") |
|
|
|
|
|
submit_btn = gr.Button("送出論點", variant="primary") |
|
|
|
|
|
|
|
|
submit_btn.click( |
|
|
fn=debate_turn, |
|
|
inputs=[topic_dd, custom_topic_txt, stance_radio, user_txt, user_audio, chatbot_ui], |
|
|
outputs=[chatbot_ui, user_audio, user_txt, custom_topic_txt] |
|
|
) |
|
|
|
|
|
|
|
|
def clear_custom_topic(dropdown_value): |
|
|
if dropdown_value: |
|
|
return "" |
|
|
return gr.skip() |
|
|
|
|
|
topic_dd.change(fn=clear_custom_topic, inputs=[topic_dd], outputs=[custom_topic_txt]) |
|
|
|
|
|
|
|
|
def clear_dropdown(custom_text): |
|
|
if custom_text and custom_text.strip(): |
|
|
return None |
|
|
return gr.skip() |
|
|
|
|
|
custom_topic_txt.change(fn=clear_dropdown, inputs=[custom_topic_txt], outputs=[topic_dd]) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(debug=True) |