import os import shutil from gradio_client import Client, handle_file import config # 初始化語音 API 連線 try: # ASR 辨識引擎 (sapolita-kaldi) asr_client = Client("https://ai-labs.ilrdf.org.tw/sapolita-kaldi/") # TTS 合成引擎 (hnang-kari-ai-asi-sluhay) tts_client = Client("https://ai-labs.ilrdf.org.tw/hnang-kari-ai-asi-sluhay/") except Exception as e: print(f"語音模組初始化失敗: {e}") def get_clean_value(res): """資料清洗器:確保從 API 拿回來的結果是純文字""" if isinstance(res, dict) and 'value' in res: return res['value'] if isinstance(res, list) and len(res) > 0: return res[0] return res def speech_to_text(audio_path, tribe_name): """ 耳朵模組:將音檔轉為文字 (ASR) 供語音訊息與影片音軌辨識使用 """ # 💡 從 config.py 的 TRIBE_MAP 取得該族語的 ASR 代碼 (如 formosan_ami) asr_code = config.TRIBE_MAP.get(tribe_name, {}).get("asr_code", "formosan_ami") try: # 呼叫原語會辨識 API result_raw = asr_client.predict( dialect_id=asr_code, audio_data=handle_file(audio_path), api_name="/automatic_speech_recognition" ) return get_clean_value(result_raw) except Exception as e: print(f"ASR 辨識失敗: {e}") return None def text_to_speech(text, tribe_name, filename): """ 嘴巴模組:將文字轉為音檔 (TTS) """ os.makedirs("static", exist_ok=True) save_path = f"static/{filename}.wav" try: # 1. 取得對應的發音人代碼 speaker = get_clean_value(tts_client.predict(ethnicity=tribe_name, api_name="/lambda")) # 2. 如果是阿美語,強制指定特定的女聲 (維持 3.0 傳統) if tribe_name == "阿美": speaker = "阿美_秀姑巒_女聲1" # 3. 執行合成 temp_file = tts_client.predict( ref=speaker, gen_text_input=text, api_name="/default_speaker_tts" ) # 4. 將暫存檔搬移到 static 資料夾 shutil.move(temp_file, save_path) return save_path except Exception as e: print(f"TTS 合成失敗: {e}") return None