File size: 6,197 Bytes
62f57ec
f1a0148
 
 
 
62f57ec
 
f1a0148
 
 
62f57ec
 
 
 
 
f1a0148
62f57ec
 
 
 
f1a0148
62f57ec
 
f1a0148
 
62f57ec
 
 
 
 
 
f1a0148
 
 
 
62f57ec
 
 
 
 
f1a0148
62f57ec
 
 
 
f1a0148
62f57ec
 
 
 
f1a0148
62f57ec
 
 
459ac7b
f1a0148
 
 
62f57ec
 
 
 
 
 
 
 
 
f1a0148
62f57ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1a0148
62f57ec
 
 
 
 
 
 
 
 
 
 
 
 
f1a0148
62f57ec
 
 
 
 
 
 
 
 
 
 
 
f1a0148
62f57ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1a0148
62f57ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1a0148
 
 
62f57ec
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# 한국어 TTS Arena - TTS Router
import os
import json
import base64
import tempfile
import requests
from dotenv import load_dotenv

load_dotenv()

# 한국어 지원 TTS 제공자 매핑
# - 채널톡: 자체 API
# - ElevenLabs: 직접 API
# - OpenAI: API
# - Google: API

CHANNEL_TTS_URL = os.getenv(
    "CHANNEL_TTS_URL",
    "https://ch-tts-streaming-demo.channel.io/v1/text-to-speech"
)

ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
ELEVENLABS_VOICE_ID = os.getenv("ELEVENLABS_VOICE_ID", "21m00Tcm4TlvDq8ikWAM")  # Rachel (기본)

model_mapping = {
    # 채널톡 TTS (한국어 특화)
    "channel-hana": {
        "provider": "channel",
        "voice": "hana",
    },
    # ElevenLabs (다국어 지원) - 직접 API 호출
    "eleven-multilingual-v2": {
        "provider": "elevenlabs",
        "model": "eleven_multilingual_v2",
    },
    # OpenAI TTS
    "openai-tts-1": {
        "provider": "openai",
        "model": "tts-1",
        "voice": "alloy",
    },
    "openai-tts-1-hd": {
        "provider": "openai",
        "model": "tts-1-hd",
        "voice": "alloy",
    },
    # Google Cloud TTS
    "google-wavenet": {
        "provider": "google",
        "voice": "ko-KR-Wavenet-A",
    },
    "google-neural2": {
        "provider": "google",
        "voice": "ko-KR-Neural2-A",
    },
}


def predict_channel_tts(text: str, voice: str = "hana") -> str:
    """채널톡 TTS API 호출"""
    url = f"{CHANNEL_TTS_URL}/{voice}"
    
    response = requests.post(
        url,
        headers={"Content-Type": "application/json"},
        json={"text": text, "output_format": "wav_24000"},
        timeout=30,
    )
    response.raise_for_status()
    
    # 임시 파일에 저장
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
        f.write(response.content)
        return f.name


def predict_elevenlabs_tts(text: str, model: str = "eleven_multilingual_v2") -> str:
    """ElevenLabs TTS API 직접 호출"""
    api_key = ELEVENLABS_API_KEY
    if not api_key:
        raise ValueError("ELEVENLABS_API_KEY 환경 변수가 설정되지 않았습니다.")
    
    voice_id = ELEVENLABS_VOICE_ID
    
    response = requests.post(
        f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}",
        headers={
            "xi-api-key": api_key,
            "Content-Type": "application/json",
            "Accept": "audio/mpeg",
        },
        json={
            "text": text,
            "model_id": model,
            "voice_settings": {
                "stability": 0.5,
                "similarity_boost": 0.75,
            },
        },
        timeout=60,
    )
    response.raise_for_status()
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
        f.write(response.content)
        return f.name


def predict_openai_tts(text: str, model: str = "tts-1", voice: str = "alloy") -> str:
    """OpenAI TTS API 호출"""
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        raise ValueError("OPENAI_API_KEY 환경 변수가 설정되지 않았습니다.")
    
    response = requests.post(
        "https://api.openai.com/v1/audio/speech",
        headers={
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json",
        },
        json={
            "model": model,
            "input": text,
            "voice": voice,
            "response_format": "wav",
        },
        timeout=60,
    )
    response.raise_for_status()
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
        f.write(response.content)
        return f.name


def predict_google_tts(text: str, voice: str = "ko-KR-Wavenet-A") -> str:
    """Google Cloud TTS API 호출"""
    api_key = os.getenv("GOOGLE_API_KEY")
    if not api_key:
        raise ValueError("GOOGLE_API_KEY 환경 변수가 설정되지 않았습니다.")
    
    response = requests.post(
        f"https://texttospeech.googleapis.com/v1/text:synthesize?key={api_key}",
        headers={"Content-Type": "application/json"},
        json={
            "input": {"text": text},
            "voice": {
                "languageCode": "ko-KR",
                "name": voice,
            },
            "audioConfig": {
                "audioEncoding": "LINEAR16",
                "sampleRateHertz": 24000,
            },
        },
        timeout=30,
    )
    response.raise_for_status()
    
    audio_content = response.json().get("audioContent")
    if not audio_content:
        raise ValueError("Google TTS API가 오디오를 반환하지 않았습니다.")
    
    audio_bytes = base64.b64decode(audio_content)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
        f.write(audio_bytes)
        return f.name


def predict_tts(text: str, model: str) -> str:
    """
    TTS 생성 메인 함수
    
    Args:
        text: 합성할 텍스트
        model: 모델 ID (model_mapping의 키)
        
    Returns:
        생성된 오디오 파일 경로
    """
    print(f"[TTS] Predicting for model: {model}")
    
    if model not in model_mapping:
        raise ValueError(f"지원하지 않는 모델입니다: {model}")
    
    config = model_mapping[model]
    provider = config["provider"]
    
    if provider == "channel":
        return predict_channel_tts(text, config.get("voice", "hana"))
    
    elif provider == "openai":
        return predict_openai_tts(
            text,
            config.get("model", "tts-1"),
            config.get("voice", "alloy"),
        )
    
    elif provider == "google":
        return predict_google_tts(text, config.get("voice", "ko-KR-Wavenet-A"))
    
    elif provider == "elevenlabs":
        return predict_elevenlabs_tts(text, config.get("model", "eleven_multilingual_v2"))
    
    else:
        raise ValueError(f"알 수 없는 provider: {provider}")


if __name__ == "__main__":
    # 테스트
    test_text = "안녕하세요, 채널톡 TTS 테스트입니다."
    
    print("Testing Channel TTS...")
    try:
        path = predict_channel_tts(test_text)
        print(f"  Success: {path}")
    except Exception as e:
        print(f"  Error: {e}")