Spaces:
Sleeping
Sleeping
File size: 6,197 Bytes
62f57ec f1a0148 62f57ec f1a0148 62f57ec f1a0148 62f57ec f1a0148 62f57ec f1a0148 62f57ec f1a0148 62f57ec f1a0148 62f57ec f1a0148 62f57ec f1a0148 62f57ec 459ac7b f1a0148 62f57ec f1a0148 62f57ec f1a0148 62f57ec f1a0148 62f57ec f1a0148 62f57ec f1a0148 62f57ec f1a0148 62f57ec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
# 한국어 TTS Arena - TTS Router
import os
import json
import base64
import tempfile
import requests
from dotenv import load_dotenv
load_dotenv()
# 한국어 지원 TTS 제공자 매핑
# - 채널톡: 자체 API
# - ElevenLabs: 직접 API
# - OpenAI: API
# - Google: API
CHANNEL_TTS_URL = os.getenv(
"CHANNEL_TTS_URL",
"https://ch-tts-streaming-demo.channel.io/v1/text-to-speech"
)
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
ELEVENLABS_VOICE_ID = os.getenv("ELEVENLABS_VOICE_ID", "21m00Tcm4TlvDq8ikWAM") # Rachel (기본)
model_mapping = {
# 채널톡 TTS (한국어 특화)
"channel-hana": {
"provider": "channel",
"voice": "hana",
},
# ElevenLabs (다국어 지원) - 직접 API 호출
"eleven-multilingual-v2": {
"provider": "elevenlabs",
"model": "eleven_multilingual_v2",
},
# OpenAI TTS
"openai-tts-1": {
"provider": "openai",
"model": "tts-1",
"voice": "alloy",
},
"openai-tts-1-hd": {
"provider": "openai",
"model": "tts-1-hd",
"voice": "alloy",
},
# Google Cloud TTS
"google-wavenet": {
"provider": "google",
"voice": "ko-KR-Wavenet-A",
},
"google-neural2": {
"provider": "google",
"voice": "ko-KR-Neural2-A",
},
}
def predict_channel_tts(text: str, voice: str = "hana") -> str:
"""채널톡 TTS API 호출"""
url = f"{CHANNEL_TTS_URL}/{voice}"
response = requests.post(
url,
headers={"Content-Type": "application/json"},
json={"text": text, "output_format": "wav_24000"},
timeout=30,
)
response.raise_for_status()
# 임시 파일에 저장
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
f.write(response.content)
return f.name
def predict_elevenlabs_tts(text: str, model: str = "eleven_multilingual_v2") -> str:
"""ElevenLabs TTS API 직접 호출"""
api_key = ELEVENLABS_API_KEY
if not api_key:
raise ValueError("ELEVENLABS_API_KEY 환경 변수가 설정되지 않았습니다.")
voice_id = ELEVENLABS_VOICE_ID
response = requests.post(
f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}",
headers={
"xi-api-key": api_key,
"Content-Type": "application/json",
"Accept": "audio/mpeg",
},
json={
"text": text,
"model_id": model,
"voice_settings": {
"stability": 0.5,
"similarity_boost": 0.75,
},
},
timeout=60,
)
response.raise_for_status()
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
f.write(response.content)
return f.name
def predict_openai_tts(text: str, model: str = "tts-1", voice: str = "alloy") -> str:
"""OpenAI TTS API 호출"""
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError("OPENAI_API_KEY 환경 변수가 설정되지 않았습니다.")
response = requests.post(
"https://api.openai.com/v1/audio/speech",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
},
json={
"model": model,
"input": text,
"voice": voice,
"response_format": "wav",
},
timeout=60,
)
response.raise_for_status()
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
f.write(response.content)
return f.name
def predict_google_tts(text: str, voice: str = "ko-KR-Wavenet-A") -> str:
"""Google Cloud TTS API 호출"""
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
raise ValueError("GOOGLE_API_KEY 환경 변수가 설정되지 않았습니다.")
response = requests.post(
f"https://texttospeech.googleapis.com/v1/text:synthesize?key={api_key}",
headers={"Content-Type": "application/json"},
json={
"input": {"text": text},
"voice": {
"languageCode": "ko-KR",
"name": voice,
},
"audioConfig": {
"audioEncoding": "LINEAR16",
"sampleRateHertz": 24000,
},
},
timeout=30,
)
response.raise_for_status()
audio_content = response.json().get("audioContent")
if not audio_content:
raise ValueError("Google TTS API가 오디오를 반환하지 않았습니다.")
audio_bytes = base64.b64decode(audio_content)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
f.write(audio_bytes)
return f.name
def predict_tts(text: str, model: str) -> str:
"""
TTS 생성 메인 함수
Args:
text: 합성할 텍스트
model: 모델 ID (model_mapping의 키)
Returns:
생성된 오디오 파일 경로
"""
print(f"[TTS] Predicting for model: {model}")
if model not in model_mapping:
raise ValueError(f"지원하지 않는 모델입니다: {model}")
config = model_mapping[model]
provider = config["provider"]
if provider == "channel":
return predict_channel_tts(text, config.get("voice", "hana"))
elif provider == "openai":
return predict_openai_tts(
text,
config.get("model", "tts-1"),
config.get("voice", "alloy"),
)
elif provider == "google":
return predict_google_tts(text, config.get("voice", "ko-KR-Wavenet-A"))
elif provider == "elevenlabs":
return predict_elevenlabs_tts(text, config.get("model", "eleven_multilingual_v2"))
else:
raise ValueError(f"알 수 없는 provider: {provider}")
if __name__ == "__main__":
# 테스트
test_text = "안녕하세요, 채널톡 TTS 테스트입니다."
print("Testing Channel TTS...")
try:
path = predict_channel_tts(test_text)
print(f" Success: {path}")
except Exception as e:
print(f" Error: {e}")
|