| | import os |
| | import tempfile |
| | import requests |
| | import gradio as gr |
| | import torch |
| | import whisper |
| | from gtts import gTTS |
| | from pydub import AudioSegment |
| | from sentence_transformers import SentenceTransformer, util |
| | from transformers import AutoTokenizer, AutoModelForCausalLM |
| | from bs4 import BeautifulSoup |
| | from duckduckgo_search import DDGS |
| | import time |
| |
|
| | |
| | |
| | |
| | device = "cuda" if torch.cuda.is_available() else "cpu" |
| | whisper_model = whisper.load_model("base", device=device) |
| | embedder = SentenceTransformer("all-MiniLM-L6-v2") |
| | tokenizer = AutoTokenizer.from_pretrained("gpt2") |
| | lm_model = AutoModelForCausalLM.from_pretrained("gpt2").to(device) |
| |
|
| | |
| | |
| | |
| | TRUSTED_SITES = [ |
| | "https://www.dgpa.gov.tw", |
| | "https://www.cna.com.tw", |
| | "https://www.stust.edu.tw", |
| | "https://www.moi.gov.tw", |
| | "https://www.taiwan.net.tw", |
| | "https://publicholidays.tw", |
| | "https://zh.wikipedia.org", |
| | ] |
| |
|
| | |
| | |
| | |
| | def text_to_speech(text): |
| | with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp: |
| | tts = gTTS(text=text, lang="zh") |
| | tts.save(fp.name) |
| | return fp.name |
| |
|
| | |
| | |
| | |
| | def crawl_official_calendar(query): |
| | try: |
| | url = "https://www.dgpa.gov.tw/holidaycalendar" |
| | resp = requests.get(url, timeout=10) |
| | soup = BeautifulSoup(resp.text, "html.parser") |
| | rows = soup.find_all("tr") |
| | for row in rows: |
| | if query in row.text: |
| | return row.text.strip() |
| | except Exception as e: |
| | return None |
| | return None |
| |
|
| | |
| | |
| | |
| | def web_search(query): |
| | try: |
| | ddgs = DDGS() |
| | results = ddgs.text(query, max_results=3) |
| | answer = "" |
| | for r in results: |
| | answer += f"{r['title']} - {r['body']}\n" |
| | return answer if answer else "查無資料。" |
| | except Exception: |
| | return "網路搜尋失敗。" |
| |
|
| | |
| | |
| | |
| | def ai_answer(question): |
| | inputs = tokenizer(question, return_tensors="pt").to(device) |
| | outputs = lm_model.generate(inputs["input_ids"], max_length=150, do_sample=True, top_k=50) |
| | return tokenizer.decode(outputs[0], skip_special_tokens=True) |
| |
|
| | |
| | |
| | |
| | def qa_system(audio, text_input): |
| | query = "" |
| | if text_input: |
| | query = text_input.strip() |
| | elif audio is not None: |
| | with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile: |
| | audio.export(tmpfile.name, format="wav") |
| | result = whisper_model.transcribe(tmpfile.name) |
| | query = result["text"] |
| | else: |
| | return "請提供語音或文字輸入。", None, "" |
| |
|
| | |
| | if "南台" in query or "南臺" in query: |
| | |
| | start_time = time.time() |
| | answer = ai_answer(query) |
| | elapsed = time.time() - start_time |
| | if elapsed > 120: |
| | web_ans = web_search(query) |
| | answer += f"\n(補充搜尋結果){web_ans}" |
| | else: |
| | |
| | start_time = time.time() |
| | answer = crawl_official_calendar(query) |
| | if not answer: |
| | elapsed = time.time() - start_time |
| | if elapsed > 60: |
| | answer = web_search(query) |
| | else: |
| | answer = "查無相關資料(可信網站)。" |
| |
|
| | |
| | audio_file = text_to_speech(answer) |
| | return query, answer, audio_file |
| |
|
| | |
| | |
| | |
| | with gr.Blocks() as demo: |
| | gr.Markdown("# 南臺科技大學語音文字問答系統") |
| | gr.Markdown("你可以輸入文字或錄音提問,系統會找出答案,並用語音回覆。") |
| |
|
| | with gr.Row(): |
| | with gr.Column(): |
| | text_input = gr.Textbox(label="文字輸入(可選)", placeholder="請輸入你的問題...") |
| | audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="請上傳語音檔或錄音") |
| | submit_btn = gr.Button("Submit") |
| | with gr.Column(): |
| | text_output = gr.Textbox(label="語音辨識文字") |
| | answer_output = gr.Textbox(label="AI 回答") |
| | audio_output = gr.Audio(label="語音回覆", type="filepath") |
| |
|
| | submit_btn.click( |
| | qa_system, |
| | inputs=[audio_input, text_input], |
| | outputs=[text_output, answer_output, audio_output] |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |
| |
|