Spaces:
Sleeping
Sleeping
Ko-TTS-Arena Contributors
commited on
Commit
·
45eb847
1
Parent(s):
6662253
fix: Require service account for Gemini TTS and disable legacy Typecast model
Browse files- models.py +7 -1
- requirements.txt +2 -1
- tts.py +51 -32
models.py
CHANGED
|
@@ -566,7 +566,7 @@ def insert_initial_models():
|
|
| 566 |
has_clova = bool(os.getenv("CLOVA_CLIENT_ID") and os.getenv("CLOVA_API_KEY"))
|
| 567 |
has_humelo = bool(os.getenv("HUMELO_API_KEY"))
|
| 568 |
has_typecast = bool(os.getenv("TYPECAST_API_KEY"))
|
| 569 |
-
has_gemini_tts = bool(os.getenv("
|
| 570 |
|
| 571 |
tts_models = [
|
| 572 |
# 채널톡 TTS (한국어 특화) - 항상 활성화
|
|
@@ -676,6 +676,12 @@ def insert_initial_models():
|
|
| 676 |
|
| 677 |
db.session.commit()
|
| 678 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 679 |
|
| 680 |
def get_top_voters(limit=10):
|
| 681 |
"""
|
|
|
|
| 566 |
has_clova = bool(os.getenv("CLOVA_CLIENT_ID") and os.getenv("CLOVA_API_KEY"))
|
| 567 |
has_humelo = bool(os.getenv("HUMELO_API_KEY"))
|
| 568 |
has_typecast = bool(os.getenv("TYPECAST_API_KEY"))
|
| 569 |
+
has_gemini_tts = bool(os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON"))
|
| 570 |
|
| 571 |
tts_models = [
|
| 572 |
# 채널톡 TTS (한국어 특화) - 항상 활성화
|
|
|
|
| 676 |
|
| 677 |
db.session.commit()
|
| 678 |
|
| 679 |
+
# Deactivate legacy Typecast model (JaeYi) if it still exists
|
| 680 |
+
legacy_typecast = Model.query.filter_by(id="typecast-jaeyi").first()
|
| 681 |
+
if legacy_typecast and legacy_typecast.is_active:
|
| 682 |
+
legacy_typecast.is_active = False
|
| 683 |
+
db.session.commit()
|
| 684 |
+
|
| 685 |
|
| 686 |
def get_top_voters(limit=10):
|
| 687 |
"""
|
requirements.txt
CHANGED
|
@@ -14,4 +14,5 @@ huggingface-hub
|
|
| 14 |
scipy
|
| 15 |
numpy
|
| 16 |
pydub
|
| 17 |
-
typecast-python
|
|
|
|
|
|
| 14 |
scipy
|
| 15 |
numpy
|
| 16 |
pydub
|
| 17 |
+
typecast-python
|
| 18 |
+
google-cloud-texttospeech
|
tts.py
CHANGED
|
@@ -55,10 +55,20 @@ HUMELO_API_URL = "https://agitvxptajouhvoatxio.supabase.co/functions/v1/dive-syn
|
|
| 55 |
# Typecast TTS
|
| 56 |
TYPECAST_API_KEY = os.getenv("TYPECAST_API_KEY")
|
| 57 |
|
| 58 |
-
# Gemini TTS (Google Cloud) -
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
def resample_wav_to_16khz(input_path: str) -> str:
|
| 64 |
"""
|
|
@@ -218,6 +228,12 @@ model_mapping = {
|
|
| 218 |
"voice_id": "tc_5c789c337ad86500073a02cd",
|
| 219 |
"model": "ssfm-v21",
|
| 220 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
# Gemini TTS (Google Cloud - 다국어 지원)
|
| 222 |
"gemini-tts-aoede": {
|
| 223 |
"provider": "gemini",
|
|
@@ -450,43 +466,46 @@ def predict_typecast_tts(text: str, voice_id: str = "tc_612ed01c7eb720fddd3ddedf
|
|
| 450 |
|
| 451 |
|
| 452 |
def predict_gemini_tts(text: str, voice: str = "Aoede", model: str = "gemini-2.5-flash-tts") -> str:
|
| 453 |
-
"""Gemini TTS API 호출 (
|
| 454 |
-
if not
|
| 455 |
-
raise ValueError(
|
|
|
|
|
|
|
| 456 |
|
| 457 |
try:
|
| 458 |
-
|
|
|
|
| 459 |
|
| 460 |
-
|
| 461 |
-
"
|
| 462 |
-
|
| 463 |
-
"prompt": "친절하고 자연스러운 톤으로 말해주세요"
|
| 464 |
-
},
|
| 465 |
-
"voice": {
|
| 466 |
-
"languageCode": "ko-kr",
|
| 467 |
-
"name": voice,
|
| 468 |
-
"modelName": model
|
| 469 |
-
},
|
| 470 |
-
"audioConfig": {
|
| 471 |
-
"audioEncoding": "LINEAR16",
|
| 472 |
-
"sampleRateHertz": 24000
|
| 473 |
-
}
|
| 474 |
-
}
|
| 475 |
|
| 476 |
-
|
| 477 |
-
|
|
|
|
|
|
|
|
|
|
| 478 |
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
|
| 483 |
-
audio_bytes = base64.b64decode(audio_content)
|
| 484 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
| 485 |
-
f.write(
|
| 486 |
return f.name
|
| 487 |
|
| 488 |
-
except
|
| 489 |
-
raise ValueError(
|
|
|
|
|
|
|
| 490 |
except Exception as e:
|
| 491 |
raise ValueError(f"Gemini TTS API 오류: {str(e)}")
|
| 492 |
|
|
|
|
| 55 |
# Typecast TTS
|
| 56 |
TYPECAST_API_KEY = os.getenv("TYPECAST_API_KEY")
|
| 57 |
|
| 58 |
+
# Gemini TTS (Google Cloud) - requires service account JSON
|
| 59 |
+
GOOGLE_APPLICATION_CREDENTIALS_JSON = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
|
| 60 |
+
_GEMINI_CREDENTIALS_PATH = None
|
| 61 |
+
if GOOGLE_APPLICATION_CREDENTIALS_JSON:
|
| 62 |
+
try:
|
| 63 |
+
_GEMINI_CREDENTIALS_PATH = os.path.join(
|
| 64 |
+
tempfile.gettempdir(), "gemini_tts_credentials.json"
|
| 65 |
+
)
|
| 66 |
+
with open(_GEMINI_CREDENTIALS_PATH, "w") as f:
|
| 67 |
+
f.write(GOOGLE_APPLICATION_CREDENTIALS_JSON)
|
| 68 |
+
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = _GEMINI_CREDENTIALS_PATH
|
| 69 |
+
print("[Gemini TTS] Service account credentials loaded")
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f"[Gemini TTS] Failed to save credentials: {e}")
|
| 72 |
|
| 73 |
def resample_wav_to_16khz(input_path: str) -> str:
|
| 74 |
"""
|
|
|
|
| 228 |
"voice_id": "tc_5c789c337ad86500073a02cd",
|
| 229 |
"model": "ssfm-v21",
|
| 230 |
},
|
| 231 |
+
# Legacy Typecast ID kept for backward compatibility (routes to GeumHee)
|
| 232 |
+
"typecast-jaeyi": {
|
| 233 |
+
"provider": "typecast",
|
| 234 |
+
"voice_id": "tc_5c789c337ad86500073a02cd",
|
| 235 |
+
"model": "ssfm-v21",
|
| 236 |
+
},
|
| 237 |
# Gemini TTS (Google Cloud - 다국어 지원)
|
| 238 |
"gemini-tts-aoede": {
|
| 239 |
"provider": "gemini",
|
|
|
|
| 466 |
|
| 467 |
|
| 468 |
def predict_gemini_tts(text: str, voice: str = "Aoede", model: str = "gemini-2.5-flash-tts") -> str:
|
| 469 |
+
"""Gemini TTS API 호출 (서비스 계정 JSON 필요)"""
|
| 470 |
+
if not GOOGLE_APPLICATION_CREDENTIALS_JSON:
|
| 471 |
+
raise ValueError(
|
| 472 |
+
"GOOGLE_APPLICATION_CREDENTIALS_JSON 환경 변수가 설정되지 않았습니다."
|
| 473 |
+
)
|
| 474 |
|
| 475 |
try:
|
| 476 |
+
from google.api_core.client_options import ClientOptions
|
| 477 |
+
from google.cloud import texttospeech_v1beta1 as texttospeech
|
| 478 |
|
| 479 |
+
client = texttospeech.TextToSpeechClient(
|
| 480 |
+
client_options=ClientOptions(api_endpoint="texttospeech.googleapis.com")
|
| 481 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
|
| 483 |
+
voice_params = texttospeech.VoiceSelectionParams(
|
| 484 |
+
name=voice,
|
| 485 |
+
language_code="ko-kr",
|
| 486 |
+
model_name=model,
|
| 487 |
+
)
|
| 488 |
|
| 489 |
+
response = client.synthesize_speech(
|
| 490 |
+
input=texttospeech.SynthesisInput(
|
| 491 |
+
text=text,
|
| 492 |
+
prompt="친절하고 자연스러운 톤으로 말해주세요",
|
| 493 |
+
),
|
| 494 |
+
voice=voice_params,
|
| 495 |
+
audio_config=texttospeech.AudioConfig(
|
| 496 |
+
audio_encoding=texttospeech.AudioEncoding.LINEAR16,
|
| 497 |
+
sample_rate_hertz=24000,
|
| 498 |
+
),
|
| 499 |
+
)
|
| 500 |
|
|
|
|
| 501 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
| 502 |
+
f.write(response.audio_content)
|
| 503 |
return f.name
|
| 504 |
|
| 505 |
+
except ImportError:
|
| 506 |
+
raise ValueError(
|
| 507 |
+
"google-cloud-texttospeech 패키지가 설치되지 않았습니다. requirements.txt를 확인하세요."
|
| 508 |
+
)
|
| 509 |
except Exception as e:
|
| 510 |
raise ValueError(f"Gemini TTS API 오류: {str(e)}")
|
| 511 |
|