Spaces:
Sleeping
Sleeping
Ko-TTS-Arena Contributors
commited on
Commit
·
1af3f4d
1
Parent(s):
c741502
feat: Add Typecast TTS (JaeYi voice) - https://typecast.ai/
Browse files- models.py +10 -0
- requirements.txt +2 -1
- tts.py +44 -0
models.py
CHANGED
|
@@ -565,6 +565,7 @@ def insert_initial_models():
|
|
| 565 |
has_supertone = bool(os.getenv("SUPERTONE_API_KEY"))
|
| 566 |
has_clova = bool(os.getenv("CLOVA_CLIENT_ID") and os.getenv("CLOVA_API_KEY"))
|
| 567 |
has_humelo = bool(os.getenv("HUMELO_API_KEY"))
|
|
|
|
| 568 |
|
| 569 |
tts_models = [
|
| 570 |
# 채널톡 TTS (한국어 특화) - 항상 활성화
|
|
@@ -638,6 +639,15 @@ def insert_initial_models():
|
|
| 638 |
is_active=has_humelo,
|
| 639 |
model_url="https://humelo.com/",
|
| 640 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 641 |
]
|
| 642 |
|
| 643 |
for model in tts_models:
|
|
|
|
| 565 |
has_supertone = bool(os.getenv("SUPERTONE_API_KEY"))
|
| 566 |
has_clova = bool(os.getenv("CLOVA_CLIENT_ID") and os.getenv("CLOVA_API_KEY"))
|
| 567 |
has_humelo = bool(os.getenv("HUMELO_API_KEY"))
|
| 568 |
+
has_typecast = bool(os.getenv("TYPECAST_API_KEY"))
|
| 569 |
|
| 570 |
tts_models = [
|
| 571 |
# 채널톡 TTS (한국어 특화) - 항상 활성화
|
|
|
|
| 639 |
is_active=has_humelo,
|
| 640 |
model_url="https://humelo.com/",
|
| 641 |
),
|
| 642 |
+
# Typecast TTS (한국어 특화) - API 키 있을 때만 활성화
|
| 643 |
+
Model(
|
| 644 |
+
id="typecast-jaeyi",
|
| 645 |
+
name="Typecast (JaeYi)",
|
| 646 |
+
model_type=ModelType.TTS,
|
| 647 |
+
is_open=False,
|
| 648 |
+
is_active=has_typecast,
|
| 649 |
+
model_url="https://typecast.ai/",
|
| 650 |
+
),
|
| 651 |
]
|
| 652 |
|
| 653 |
for model in tts_models:
|
requirements.txt
CHANGED
|
@@ -13,4 +13,5 @@ waitress
|
|
| 13 |
huggingface-hub
|
| 14 |
scipy
|
| 15 |
numpy
|
| 16 |
-
pydub
|
|
|
|
|
|
| 13 |
huggingface-hub
|
| 14 |
scipy
|
| 15 |
numpy
|
| 16 |
+
pydub
|
| 17 |
+
typecast-python
|
tts.py
CHANGED
|
@@ -52,6 +52,9 @@ CLOVA_API_KEY = os.getenv("CLOVA_API_KEY")
|
|
| 52 |
HUMELO_API_KEY = os.getenv("HUMELO_API_KEY")
|
| 53 |
HUMELO_API_URL = "https://agitvxptajouhvoatxio.supabase.co/functions/v1/dive-synthesize-v1"
|
| 54 |
|
|
|
|
|
|
|
|
|
|
| 55 |
def resample_wav_to_16khz(input_path: str) -> str:
|
| 56 |
"""
|
| 57 |
Resample a WAV file to 16kHz for fair comparison.
|
|
@@ -204,6 +207,12 @@ model_mapping = {
|
|
| 204 |
"voice": "리아",
|
| 205 |
"emotion": "neutral",
|
| 206 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
}
|
| 208 |
|
| 209 |
|
|
@@ -402,6 +411,33 @@ def predict_humelo_tts(text: str, voice: str = "리아", emotion: str = "neutral
|
|
| 402 |
return f.name
|
| 403 |
|
| 404 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 405 |
def predict_google_tts(text: str, voice: str = "ko-KR-Wavenet-A") -> str:
|
| 406 |
"""Google Cloud TTS API 호출"""
|
| 407 |
api_key = os.getenv("GOOGLE_API_KEY")
|
|
@@ -494,6 +530,14 @@ def predict_tts(text: str, model: str) -> str:
|
|
| 494 |
# Humelo might return MP3 or WAV, check extension
|
| 495 |
is_mp3 = audio_path.endswith(".mp3")
|
| 496 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 497 |
else:
|
| 498 |
raise ValueError(f"알 수 없는 provider: {provider}")
|
| 499 |
|
|
|
|
| 52 |
HUMELO_API_KEY = os.getenv("HUMELO_API_KEY")
|
| 53 |
HUMELO_API_URL = "https://agitvxptajouhvoatxio.supabase.co/functions/v1/dive-synthesize-v1"
|
| 54 |
|
| 55 |
+
# Typecast TTS
|
| 56 |
+
TYPECAST_API_KEY = os.getenv("TYPECAST_API_KEY")
|
| 57 |
+
|
| 58 |
def resample_wav_to_16khz(input_path: str) -> str:
|
| 59 |
"""
|
| 60 |
Resample a WAV file to 16kHz for fair comparison.
|
|
|
|
| 207 |
"voice": "리아",
|
| 208 |
"emotion": "neutral",
|
| 209 |
},
|
| 210 |
+
# Typecast TTS (한국어 특화)
|
| 211 |
+
"typecast-jaeyi": {
|
| 212 |
+
"provider": "typecast",
|
| 213 |
+
"voice_id": "tc_612ed01c7eb720fddd3ddedf",
|
| 214 |
+
"model": "ssfm-v21",
|
| 215 |
+
},
|
| 216 |
}
|
| 217 |
|
| 218 |
|
|
|
|
| 411 |
return f.name
|
| 412 |
|
| 413 |
|
| 414 |
+
def predict_typecast_tts(text: str, voice_id: str = "tc_612ed01c7eb720fddd3ddedf", model: str = "ssfm-v21") -> str:
|
| 415 |
+
"""Typecast TTS API 호출"""
|
| 416 |
+
api_key = TYPECAST_API_KEY
|
| 417 |
+
if not api_key:
|
| 418 |
+
raise ValueError("TYPECAST_API_KEY 환경 변수가 설정되지 않았습니다.")
|
| 419 |
+
|
| 420 |
+
try:
|
| 421 |
+
from typecast.client import Typecast
|
| 422 |
+
from typecast.models import TTSRequest
|
| 423 |
+
|
| 424 |
+
cli = Typecast(api_key=api_key)
|
| 425 |
+
response = cli.text_to_speech(TTSRequest(
|
| 426 |
+
text=text,
|
| 427 |
+
model=model,
|
| 428 |
+
voice_id=voice_id,
|
| 429 |
+
))
|
| 430 |
+
|
| 431 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
| 432 |
+
f.write(response.audio_data)
|
| 433 |
+
return f.name
|
| 434 |
+
|
| 435 |
+
except ImportError:
|
| 436 |
+
raise ValueError("typecast-python 패키지가 설치되지 않았습니다. pip install typecast-python")
|
| 437 |
+
except Exception as e:
|
| 438 |
+
raise ValueError(f"Typecast TTS API 오류: {str(e)}")
|
| 439 |
+
|
| 440 |
+
|
| 441 |
def predict_google_tts(text: str, voice: str = "ko-KR-Wavenet-A") -> str:
|
| 442 |
"""Google Cloud TTS API 호출"""
|
| 443 |
api_key = os.getenv("GOOGLE_API_KEY")
|
|
|
|
| 530 |
# Humelo might return MP3 or WAV, check extension
|
| 531 |
is_mp3 = audio_path.endswith(".mp3")
|
| 532 |
|
| 533 |
+
elif provider == "typecast":
|
| 534 |
+
audio_path = predict_typecast_tts(
|
| 535 |
+
text,
|
| 536 |
+
config.get("voice_id", "tc_612ed01c7eb720fddd3ddedf"),
|
| 537 |
+
config.get("model", "ssfm-v21"),
|
| 538 |
+
)
|
| 539 |
+
# Typecast returns WAV
|
| 540 |
+
|
| 541 |
else:
|
| 542 |
raise ValueError(f"알 수 없는 provider: {provider}")
|
| 543 |
|