# 한국어 TTS Arena - TTS Router import os import json import base64 import tempfile import requests from dotenv import load_dotenv load_dotenv() # 한국어 지원 TTS 제공자 매핑 # - 채널톡: 자체 API # - ElevenLabs: 직접 API # - OpenAI: API # - Google: API CHANNEL_TTS_URL = os.getenv( "CHANNEL_TTS_URL", "https://ch-tts-streaming-demo.channel.io/v1/text-to-speech" ) ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY") ELEVENLABS_VOICE_ID = os.getenv("ELEVENLABS_VOICE_ID", "21m00Tcm4TlvDq8ikWAM") # Rachel (기본) model_mapping = { # 채널톡 TTS (한국어 특화) "channel-hana": { "provider": "channel", "voice": "hana", }, # ElevenLabs (다국어 지원) - 직접 API 호출 "eleven-multilingual-v2": { "provider": "elevenlabs", "model": "eleven_multilingual_v2", }, # OpenAI TTS "openai-tts-1": { "provider": "openai", "model": "tts-1", "voice": "alloy", }, "openai-tts-1-hd": { "provider": "openai", "model": "tts-1-hd", "voice": "alloy", }, # Google Cloud TTS "google-wavenet": { "provider": "google", "voice": "ko-KR-Wavenet-A", }, "google-neural2": { "provider": "google", "voice": "ko-KR-Neural2-A", }, } def predict_channel_tts(text: str, voice: str = "hana") -> str: """채널톡 TTS API 호출""" url = f"{CHANNEL_TTS_URL}/{voice}" response = requests.post( url, headers={"Content-Type": "application/json"}, json={"text": text, "output_format": "wav_24000"}, timeout=30, ) response.raise_for_status() # 임시 파일에 저장 with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: f.write(response.content) return f.name def predict_elevenlabs_tts(text: str, model: str = "eleven_multilingual_v2") -> str: """ElevenLabs TTS API 직접 호출""" api_key = ELEVENLABS_API_KEY if not api_key: raise ValueError("ELEVENLABS_API_KEY 환경 변수가 설정되지 않았습니다.") voice_id = ELEVENLABS_VOICE_ID response = requests.post( f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}", headers={ "xi-api-key": api_key, "Content-Type": "application/json", "Accept": "audio/mpeg", }, json={ "text": text, "model_id": model, "voice_settings": { "stability": 0.5, "similarity_boost": 0.75, }, }, timeout=60, ) response.raise_for_status() with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f: f.write(response.content) return f.name def predict_openai_tts(text: str, model: str = "tts-1", voice: str = "alloy") -> str: """OpenAI TTS API 호출""" api_key = os.getenv("OPENAI_API_KEY") if not api_key: raise ValueError("OPENAI_API_KEY 환경 변수가 설정되지 않았습니다.") response = requests.post( "https://api.openai.com/v1/audio/speech", headers={ "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", }, json={ "model": model, "input": text, "voice": voice, "response_format": "wav", }, timeout=60, ) response.raise_for_status() with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: f.write(response.content) return f.name def predict_google_tts(text: str, voice: str = "ko-KR-Wavenet-A") -> str: """Google Cloud TTS API 호출""" api_key = os.getenv("GOOGLE_API_KEY") if not api_key: raise ValueError("GOOGLE_API_KEY 환경 변수가 설정되지 않았습니다.") response = requests.post( f"https://texttospeech.googleapis.com/v1/text:synthesize?key={api_key}", headers={"Content-Type": "application/json"}, json={ "input": {"text": text}, "voice": { "languageCode": "ko-KR", "name": voice, }, "audioConfig": { "audioEncoding": "LINEAR16", "sampleRateHertz": 24000, }, }, timeout=30, ) response.raise_for_status() audio_content = response.json().get("audioContent") if not audio_content: raise ValueError("Google TTS API가 오디오를 반환하지 않았습니다.") audio_bytes = base64.b64decode(audio_content) with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: f.write(audio_bytes) return f.name def predict_tts(text: str, model: str) -> str: """ TTS 생성 메인 함수 Args: text: 합성할 텍스트 model: 모델 ID (model_mapping의 키) Returns: 생성된 오디오 파일 경로 """ print(f"[TTS] Predicting for model: {model}") if model not in model_mapping: raise ValueError(f"지원하지 않는 모델입니다: {model}") config = model_mapping[model] provider = config["provider"] if provider == "channel": return predict_channel_tts(text, config.get("voice", "hana")) elif provider == "openai": return predict_openai_tts( text, config.get("model", "tts-1"), config.get("voice", "alloy"), ) elif provider == "google": return predict_google_tts(text, config.get("voice", "ko-KR-Wavenet-A")) elif provider == "elevenlabs": return predict_elevenlabs_tts(text, config.get("model", "eleven_multilingual_v2")) else: raise ValueError(f"알 수 없는 provider: {provider}") if __name__ == "__main__": # 테스트 test_text = "안녕하세요, 채널톡 TTS 테스트입니다." print("Testing Channel TTS...") try: path = predict_channel_tts(test_text) print(f" Success: {path}") except Exception as e: print(f" Error: {e}")