| import os |
| import shutil |
| from gradio_client import Client, handle_file |
| import config |
|
|
| |
| try: |
| |
| asr_client = Client("https://ai-labs.ilrdf.org.tw/sapolita-kaldi/") |
| |
| tts_client = Client("https://ai-labs.ilrdf.org.tw/hnang-kari-ai-asi-sluhay/") |
| except Exception as e: |
| print(f"語音模組初始化失敗: {e}") |
|
|
| def get_clean_value(res): |
| """資料清洗器:確保從 API 拿回來的結果是純文字""" |
| if isinstance(res, dict) and 'value' in res: |
| return res['value'] |
| if isinstance(res, list) and len(res) > 0: |
| return res[0] |
| return res |
|
|
| def speech_to_text(audio_path, tribe_name): |
| """ |
| 耳朵模組:將音檔轉為文字 (ASR) |
| 供語音訊息與影片音軌辨識使用 |
| """ |
| |
| asr_code = config.TRIBE_MAP.get(tribe_name, {}).get("asr_code", "formosan_ami") |
| |
| try: |
| |
| result_raw = asr_client.predict( |
| dialect_id=asr_code, |
| audio_data=handle_file(audio_path), |
| api_name="/automatic_speech_recognition" |
| ) |
| return get_clean_value(result_raw) |
| except Exception as e: |
| print(f"ASR 辨識失敗: {e}") |
| return None |
|
|
| def text_to_speech(text, tribe_name, filename): |
| """ |
| 嘴巴模組:將文字轉為音檔 (TTS) |
| """ |
| os.makedirs("static", exist_ok=True) |
| save_path = f"static/{filename}.wav" |
| |
| try: |
| |
| speaker = get_clean_value(tts_client.predict(ethnicity=tribe_name, api_name="/lambda")) |
| |
| |
| if tribe_name == "阿美": |
| speaker = "阿美_秀姑巒_女聲1" |
| |
| |
| temp_file = tts_client.predict( |
| ref=speaker, |
| gen_text_input=text, |
| api_name="/default_speaker_tts" |
| ) |
| |
| |
| shutil.move(temp_file, save_path) |
| return save_path |
| except Exception as e: |
| print(f"TTS 合成失敗: {e}") |
| return None |