File size: 2,317 Bytes
1c9f1ca
 
df4782a
 
1c9f1ca
df4782a
1c9f1ca
df4782a
1c9f1ca
df4782a
d298129
1c9f1ca
 
 
d298129
df4782a
 
 
 
 
d298129
 
df4782a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c9f1ca
df4782a
 
 
1c9f1ca
d298129
df4782a
1c9f1ca
df4782a
d298129
df4782a
 
 
 
 
 
 
 
 
 
 
 
 
1c9f1ca
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
import shutil
from gradio_client import Client, handle_file
import config

# 初始化語音 API 連線
try:
    # ASR 辨識引擎 (sapolita-kaldi)
    asr_client = Client("https://ai-labs.ilrdf.org.tw/sapolita-kaldi/")
    # TTS 合成引擎 (hnang-kari-ai-asi-sluhay)
    tts_client = Client("https://ai-labs.ilrdf.org.tw/hnang-kari-ai-asi-sluhay/")
except Exception as e:
    print(f"語音模組初始化失敗: {e}")

def get_clean_value(res):
    """資料清洗器:確保從 API 拿回來的結果是純文字"""
    if isinstance(res, dict) and 'value' in res: 
        return res['value']
    if isinstance(res, list) and len(res) > 0: 
        return res[0]
    return res

def speech_to_text(audio_path, tribe_name):
    """
    耳朵模組:將音檔轉為文字 (ASR)
    供語音訊息與影片音軌辨識使用
    """
    # 💡 從 config.py 的 TRIBE_MAP 取得該族語的 ASR 代碼 (如 formosan_ami)
    asr_code = config.TRIBE_MAP.get(tribe_name, {}).get("asr_code", "formosan_ami")
    
    try:
        # 呼叫原語會辨識 API
        result_raw = asr_client.predict(
            dialect_id=asr_code, 
            audio_data=handle_file(audio_path), 
            api_name="/automatic_speech_recognition"
        )
        return get_clean_value(result_raw)
    except Exception as e:
        print(f"ASR 辨識失敗: {e}")
        return None

def text_to_speech(text, tribe_name, filename):
    """
    嘴巴模組:將文字轉為音檔 (TTS)
    """
    os.makedirs("static", exist_ok=True)
    save_path = f"static/{filename}.wav"
    
    try:
        # 1. 取得對應的發音人代碼
        speaker = get_clean_value(tts_client.predict(ethnicity=tribe_name, api_name="/lambda"))
        
        # 2. 如果是阿美語,強制指定特定的女聲 (維持 3.0 傳統)
        if tribe_name == "阿美": 
            speaker = "阿美_秀姑巒_女聲1"
        
        # 3. 執行合成
        temp_file = tts_client.predict(
            ref=speaker, 
            gen_text_input=text, 
            api_name="/default_speaker_tts"
        )
        
        # 4. 將暫存檔搬移到 static 資料夾
        shutil.move(temp_file, save_path)
        return save_path
    except Exception as e:
        print(f"TTS 合成失敗: {e}")
        return None