Spaces:
Sleeping
Sleeping
| # 한국어 TTS Arena - TTS Router | |
| import os | |
| import json | |
| import base64 | |
| import tempfile | |
| import requests | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| # 한국어 지원 TTS 제공자 매핑 | |
| # - 채널톡: 자체 API | |
| # - ElevenLabs: 직접 API | |
| # - OpenAI: API | |
| # - Google: API | |
| CHANNEL_TTS_URL = os.getenv( | |
| "CHANNEL_TTS_URL", | |
| "https://ch-tts-streaming-demo.channel.io/v1/text-to-speech" | |
| ) | |
| ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY") | |
| ELEVENLABS_VOICE_ID = os.getenv("ELEVENLABS_VOICE_ID", "21m00Tcm4TlvDq8ikWAM") # Rachel (기본) | |
| model_mapping = { | |
| # 채널톡 TTS (한국어 특화) | |
| "channel-hana": { | |
| "provider": "channel", | |
| "voice": "hana", | |
| }, | |
| # ElevenLabs (다국어 지원) - 직접 API 호출 | |
| "eleven-multilingual-v2": { | |
| "provider": "elevenlabs", | |
| "model": "eleven_multilingual_v2", | |
| }, | |
| # OpenAI TTS | |
| "openai-tts-1": { | |
| "provider": "openai", | |
| "model": "tts-1", | |
| "voice": "alloy", | |
| }, | |
| "openai-tts-1-hd": { | |
| "provider": "openai", | |
| "model": "tts-1-hd", | |
| "voice": "alloy", | |
| }, | |
| # Google Cloud TTS | |
| "google-wavenet": { | |
| "provider": "google", | |
| "voice": "ko-KR-Wavenet-A", | |
| }, | |
| "google-neural2": { | |
| "provider": "google", | |
| "voice": "ko-KR-Neural2-A", | |
| }, | |
| } | |
| def predict_channel_tts(text: str, voice: str = "hana") -> str: | |
| """채널톡 TTS API 호출""" | |
| url = f"{CHANNEL_TTS_URL}/{voice}" | |
| response = requests.post( | |
| url, | |
| headers={"Content-Type": "application/json"}, | |
| json={"text": text, "output_format": "wav_24000"}, | |
| timeout=30, | |
| ) | |
| response.raise_for_status() | |
| # 임시 파일에 저장 | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: | |
| f.write(response.content) | |
| return f.name | |
| def predict_elevenlabs_tts(text: str, model: str = "eleven_multilingual_v2") -> str: | |
| """ElevenLabs TTS API 직접 호출""" | |
| api_key = ELEVENLABS_API_KEY | |
| if not api_key: | |
| raise ValueError("ELEVENLABS_API_KEY 환경 변수가 설정되지 않았습니다.") | |
| voice_id = ELEVENLABS_VOICE_ID | |
| response = requests.post( | |
| f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}", | |
| headers={ | |
| "xi-api-key": api_key, | |
| "Content-Type": "application/json", | |
| "Accept": "audio/mpeg", | |
| }, | |
| json={ | |
| "text": text, | |
| "model_id": model, | |
| "voice_settings": { | |
| "stability": 0.5, | |
| "similarity_boost": 0.75, | |
| }, | |
| }, | |
| timeout=60, | |
| ) | |
| response.raise_for_status() | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f: | |
| f.write(response.content) | |
| return f.name | |
| def predict_openai_tts(text: str, model: str = "tts-1", voice: str = "alloy") -> str: | |
| """OpenAI TTS API 호출""" | |
| api_key = os.getenv("OPENAI_API_KEY") | |
| if not api_key: | |
| raise ValueError("OPENAI_API_KEY 환경 변수가 설정되지 않았습니다.") | |
| response = requests.post( | |
| "https://api.openai.com/v1/audio/speech", | |
| headers={ | |
| "Authorization": f"Bearer {api_key}", | |
| "Content-Type": "application/json", | |
| }, | |
| json={ | |
| "model": model, | |
| "input": text, | |
| "voice": voice, | |
| "response_format": "wav", | |
| }, | |
| timeout=60, | |
| ) | |
| response.raise_for_status() | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: | |
| f.write(response.content) | |
| return f.name | |
| def predict_google_tts(text: str, voice: str = "ko-KR-Wavenet-A") -> str: | |
| """Google Cloud TTS API 호출""" | |
| api_key = os.getenv("GOOGLE_API_KEY") | |
| if not api_key: | |
| raise ValueError("GOOGLE_API_KEY 환경 변수가 설정되지 않았습니다.") | |
| response = requests.post( | |
| f"https://texttospeech.googleapis.com/v1/text:synthesize?key={api_key}", | |
| headers={"Content-Type": "application/json"}, | |
| json={ | |
| "input": {"text": text}, | |
| "voice": { | |
| "languageCode": "ko-KR", | |
| "name": voice, | |
| }, | |
| "audioConfig": { | |
| "audioEncoding": "LINEAR16", | |
| "sampleRateHertz": 24000, | |
| }, | |
| }, | |
| timeout=30, | |
| ) | |
| response.raise_for_status() | |
| audio_content = response.json().get("audioContent") | |
| if not audio_content: | |
| raise ValueError("Google TTS API가 오디오를 반환하지 않았습니다.") | |
| audio_bytes = base64.b64decode(audio_content) | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: | |
| f.write(audio_bytes) | |
| return f.name | |
| def predict_tts(text: str, model: str) -> str: | |
| """ | |
| TTS 생성 메인 함수 | |
| Args: | |
| text: 합성할 텍스트 | |
| model: 모델 ID (model_mapping의 키) | |
| Returns: | |
| 생성된 오디오 파일 경로 | |
| """ | |
| print(f"[TTS] Predicting for model: {model}") | |
| if model not in model_mapping: | |
| raise ValueError(f"지원하지 않는 모델입니다: {model}") | |
| config = model_mapping[model] | |
| provider = config["provider"] | |
| if provider == "channel": | |
| return predict_channel_tts(text, config.get("voice", "hana")) | |
| elif provider == "openai": | |
| return predict_openai_tts( | |
| text, | |
| config.get("model", "tts-1"), | |
| config.get("voice", "alloy"), | |
| ) | |
| elif provider == "google": | |
| return predict_google_tts(text, config.get("voice", "ko-KR-Wavenet-A")) | |
| elif provider == "elevenlabs": | |
| return predict_elevenlabs_tts(text, config.get("model", "eleven_multilingual_v2")) | |
| else: | |
| raise ValueError(f"알 수 없는 provider: {provider}") | |
| if __name__ == "__main__": | |
| # 테스트 | |
| test_text = "안녕하세요, 채널톡 TTS 테스트입니다." | |
| print("Testing Channel TTS...") | |
| try: | |
| path = predict_channel_tts(test_text) | |
| print(f" Success: {path}") | |
| except Exception as e: | |
| print(f" Error: {e}") | |