Ko-TTS-Arena Contributors commited on
Commit
45eb847
·
1 Parent(s): 6662253

fix: Require service account for Gemini TTS and disable legacy Typecast model

Browse files
Files changed (3) hide show
  1. models.py +7 -1
  2. requirements.txt +2 -1
  3. tts.py +51 -32
models.py CHANGED
@@ -566,7 +566,7 @@ def insert_initial_models():
566
  has_clova = bool(os.getenv("CLOVA_CLIENT_ID") and os.getenv("CLOVA_API_KEY"))
567
  has_humelo = bool(os.getenv("HUMELO_API_KEY"))
568
  has_typecast = bool(os.getenv("TYPECAST_API_KEY"))
569
- has_gemini_tts = bool(os.getenv("GEMINI_TTS_API_KEY"))
570
 
571
  tts_models = [
572
  # 채널톡 TTS (한국어 특화) - 항상 활성화
@@ -676,6 +676,12 @@ def insert_initial_models():
676
 
677
  db.session.commit()
678
 
 
 
 
 
 
 
679
 
680
  def get_top_voters(limit=10):
681
  """
 
566
  has_clova = bool(os.getenv("CLOVA_CLIENT_ID") and os.getenv("CLOVA_API_KEY"))
567
  has_humelo = bool(os.getenv("HUMELO_API_KEY"))
568
  has_typecast = bool(os.getenv("TYPECAST_API_KEY"))
569
+ has_gemini_tts = bool(os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON"))
570
 
571
  tts_models = [
572
  # 채널톡 TTS (한국어 특화) - 항상 활성화
 
676
 
677
  db.session.commit()
678
 
679
+ # Deactivate legacy Typecast model (JaeYi) if it still exists
680
+ legacy_typecast = Model.query.filter_by(id="typecast-jaeyi").first()
681
+ if legacy_typecast and legacy_typecast.is_active:
682
+ legacy_typecast.is_active = False
683
+ db.session.commit()
684
+
685
 
686
  def get_top_voters(limit=10):
687
  """
requirements.txt CHANGED
@@ -14,4 +14,5 @@ huggingface-hub
14
  scipy
15
  numpy
16
  pydub
17
- typecast-python
 
 
14
  scipy
15
  numpy
16
  pydub
17
+ typecast-python
18
+ google-cloud-texttospeech
tts.py CHANGED
@@ -55,10 +55,20 @@ HUMELO_API_URL = "https://agitvxptajouhvoatxio.supabase.co/functions/v1/dive-syn
55
  # Typecast TTS
56
  TYPECAST_API_KEY = os.getenv("TYPECAST_API_KEY")
57
 
58
- # Gemini TTS (Google Cloud) - REST API with API Key
59
- GEMINI_TTS_API_KEY = os.getenv("GEMINI_TTS_API_KEY")
60
- if GEMINI_TTS_API_KEY:
61
- print(f"[Gemini TTS] API Key loaded")
 
 
 
 
 
 
 
 
 
 
62
 
63
  def resample_wav_to_16khz(input_path: str) -> str:
64
  """
@@ -218,6 +228,12 @@ model_mapping = {
218
  "voice_id": "tc_5c789c337ad86500073a02cd",
219
  "model": "ssfm-v21",
220
  },
 
 
 
 
 
 
221
  # Gemini TTS (Google Cloud - 다국어 지원)
222
  "gemini-tts-aoede": {
223
  "provider": "gemini",
@@ -450,43 +466,46 @@ def predict_typecast_tts(text: str, voice_id: str = "tc_612ed01c7eb720fddd3ddedf
450
 
451
 
452
  def predict_gemini_tts(text: str, voice: str = "Aoede", model: str = "gemini-2.5-flash-tts") -> str:
453
- """Gemini TTS API 호출 (REST API with API Key - v1beta1)"""
454
- if not GEMINI_TTS_API_KEY:
455
- raise ValueError("GEMINI_TTS_API_KEY 환경 변수가 설정되지 않았습니다.")
 
 
456
 
457
  try:
458
- url = f"https://texttospeech.googleapis.com/v1beta1/text:synthesize?key={GEMINI_TTS_API_KEY}"
 
459
 
460
- payload = {
461
- "input": {
462
- "text": text,
463
- "prompt": "친절하고 자연스러운 톤으로 말해주세요"
464
- },
465
- "voice": {
466
- "languageCode": "ko-kr",
467
- "name": voice,
468
- "modelName": model
469
- },
470
- "audioConfig": {
471
- "audioEncoding": "LINEAR16",
472
- "sampleRateHertz": 24000
473
- }
474
- }
475
 
476
- response = requests.post(url, json=payload, timeout=60)
477
- response.raise_for_status()
 
 
 
478
 
479
- audio_content = response.json().get("audioContent")
480
- if not audio_content:
481
- raise ValueError("Gemini TTS API가 오디오를 반환하지 않았습니다.")
 
 
 
 
 
 
 
 
482
 
483
- audio_bytes = base64.b64decode(audio_content)
484
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
485
- f.write(audio_bytes)
486
  return f.name
487
 
488
- except requests.exceptions.RequestException as e:
489
- raise ValueError(f"Gemini TTS API 요청 오류: {str(e)}")
 
 
490
  except Exception as e:
491
  raise ValueError(f"Gemini TTS API 오류: {str(e)}")
492
 
 
55
  # Typecast TTS
56
  TYPECAST_API_KEY = os.getenv("TYPECAST_API_KEY")
57
 
58
+ # Gemini TTS (Google Cloud) - requires service account JSON
59
+ GOOGLE_APPLICATION_CREDENTIALS_JSON = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
60
+ _GEMINI_CREDENTIALS_PATH = None
61
+ if GOOGLE_APPLICATION_CREDENTIALS_JSON:
62
+ try:
63
+ _GEMINI_CREDENTIALS_PATH = os.path.join(
64
+ tempfile.gettempdir(), "gemini_tts_credentials.json"
65
+ )
66
+ with open(_GEMINI_CREDENTIALS_PATH, "w") as f:
67
+ f.write(GOOGLE_APPLICATION_CREDENTIALS_JSON)
68
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = _GEMINI_CREDENTIALS_PATH
69
+ print("[Gemini TTS] Service account credentials loaded")
70
+ except Exception as e:
71
+ print(f"[Gemini TTS] Failed to save credentials: {e}")
72
 
73
  def resample_wav_to_16khz(input_path: str) -> str:
74
  """
 
228
  "voice_id": "tc_5c789c337ad86500073a02cd",
229
  "model": "ssfm-v21",
230
  },
231
+ # Legacy Typecast ID kept for backward compatibility (routes to GeumHee)
232
+ "typecast-jaeyi": {
233
+ "provider": "typecast",
234
+ "voice_id": "tc_5c789c337ad86500073a02cd",
235
+ "model": "ssfm-v21",
236
+ },
237
  # Gemini TTS (Google Cloud - 다국어 지원)
238
  "gemini-tts-aoede": {
239
  "provider": "gemini",
 
466
 
467
 
468
  def predict_gemini_tts(text: str, voice: str = "Aoede", model: str = "gemini-2.5-flash-tts") -> str:
469
+ """Gemini TTS API 호출 (서비스 계정 JSON 필요)"""
470
+ if not GOOGLE_APPLICATION_CREDENTIALS_JSON:
471
+ raise ValueError(
472
+ "GOOGLE_APPLICATION_CREDENTIALS_JSON 환경 변수가 설정되지 않았습니다."
473
+ )
474
 
475
  try:
476
+ from google.api_core.client_options import ClientOptions
477
+ from google.cloud import texttospeech_v1beta1 as texttospeech
478
 
479
+ client = texttospeech.TextToSpeechClient(
480
+ client_options=ClientOptions(api_endpoint="texttospeech.googleapis.com")
481
+ )
 
 
 
 
 
 
 
 
 
 
 
 
482
 
483
+ voice_params = texttospeech.VoiceSelectionParams(
484
+ name=voice,
485
+ language_code="ko-kr",
486
+ model_name=model,
487
+ )
488
 
489
+ response = client.synthesize_speech(
490
+ input=texttospeech.SynthesisInput(
491
+ text=text,
492
+ prompt="친절하고 자연스러운 톤으로 말해주세요",
493
+ ),
494
+ voice=voice_params,
495
+ audio_config=texttospeech.AudioConfig(
496
+ audio_encoding=texttospeech.AudioEncoding.LINEAR16,
497
+ sample_rate_hertz=24000,
498
+ ),
499
+ )
500
 
 
501
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
502
+ f.write(response.audio_content)
503
  return f.name
504
 
505
+ except ImportError:
506
+ raise ValueError(
507
+ "google-cloud-texttospeech 패키지가 설치되지 않았습니다. requirements.txt를 확인하세요."
508
+ )
509
  except Exception as e:
510
  raise ValueError(f"Gemini TTS API 오류: {str(e)}")
511