Spaces:

channelcorp
/

Ko-TTS-Arena

Sleeping

App Files Files Community

Ko-TTS-Arena Contributors commited on 11 days ago

Commit

45eb847

1 Parent(s): 6662253

fix: Require service account for Gemini TTS and disable legacy Typecast model

Browse files

Files changed (3) hide show

models.py +7 -1
requirements.txt +2 -1
tts.py +51 -32

models.py CHANGED Viewed

@@ -566,7 +566,7 @@ def insert_initial_models():
     has_clova = bool(os.getenv("CLOVA_CLIENT_ID") and os.getenv("CLOVA_API_KEY"))
     has_humelo = bool(os.getenv("HUMELO_API_KEY"))
     has_typecast = bool(os.getenv("TYPECAST_API_KEY"))
-    has_gemini_tts = bool(os.getenv("GEMINI_TTS_API_KEY"))
     tts_models = [
         # 채널톡 TTS (한국어 특화) - 항상 활성화
@@ -676,6 +676,12 @@ def insert_initial_models():
     db.session.commit()
 def get_top_voters(limit=10):
     """

     has_clova = bool(os.getenv("CLOVA_CLIENT_ID") and os.getenv("CLOVA_API_KEY"))
     has_humelo = bool(os.getenv("HUMELO_API_KEY"))
     has_typecast = bool(os.getenv("TYPECAST_API_KEY"))
+    has_gemini_tts = bool(os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON"))
     tts_models = [
         # 채널톡 TTS (한국어 특화) - 항상 활성화
     db.session.commit()
+    # Deactivate legacy Typecast model (JaeYi) if it still exists
+    legacy_typecast = Model.query.filter_by(id="typecast-jaeyi").first()
+    if legacy_typecast and legacy_typecast.is_active:
+        legacy_typecast.is_active = False
+        db.session.commit()
 def get_top_voters(limit=10):
     """

requirements.txt CHANGED Viewed

@@ -14,4 +14,5 @@ huggingface-hub
 scipy
 numpy
 pydub
-typecast-python

 scipy
 numpy
 pydub
+typecast-python
+google-cloud-texttospeech

tts.py CHANGED Viewed

@@ -55,10 +55,20 @@ HUMELO_API_URL = "https://agitvxptajouhvoatxio.supabase.co/functions/v1/dive-syn
 # Typecast TTS
 TYPECAST_API_KEY = os.getenv("TYPECAST_API_KEY")
-# Gemini TTS (Google Cloud) - REST API with API Key
-GEMINI_TTS_API_KEY = os.getenv("GEMINI_TTS_API_KEY")
-if GEMINI_TTS_API_KEY:
-    print(f"[Gemini TTS] API Key loaded")
 def resample_wav_to_16khz(input_path: str) -> str:
     """
@@ -218,6 +228,12 @@ model_mapping = {
         "voice_id": "tc_5c789c337ad86500073a02cd",
         "model": "ssfm-v21",
     },
     # Gemini TTS (Google Cloud - 다국어 지원)
     "gemini-tts-aoede": {
         "provider": "gemini",
@@ -450,43 +466,46 @@ def predict_typecast_tts(text: str, voice_id: str = "tc_612ed01c7eb720fddd3ddedf
 def predict_gemini_tts(text: str, voice: str = "Aoede", model: str = "gemini-2.5-flash-tts") -> str:
-    """Gemini TTS API 호출 (REST API with API Key - v1beta1)"""
-    if not GEMINI_TTS_API_KEY:
-        raise ValueError("GEMINI_TTS_API_KEY 환경 변수가 설정되지 않았습니다.")
     try:
-        url = f"https://texttospeech.googleapis.com/v1beta1/text:synthesize?key={GEMINI_TTS_API_KEY}"
-        payload = {
-            "input": {
-                "text": text,
-                "prompt": "친절하고 자연스러운 톤으로 말해주세요"
-            },
-            "voice": {
-                "languageCode": "ko-kr",
-                "name": voice,
-                "modelName": model
-            },
-            "audioConfig": {
-                "audioEncoding": "LINEAR16",
-                "sampleRateHertz": 24000
-            }
-        }
-        response = requests.post(url, json=payload, timeout=60)
-        response.raise_for_status()
-        audio_content = response.json().get("audioContent")
-        if not audio_content:
-            raise ValueError("Gemini TTS API가 오디오를 반환하지 않았습니다.")
-        audio_bytes = base64.b64decode(audio_content)
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
-            f.write(audio_bytes)
             return f.name
-    except requests.exceptions.RequestException as e:
-        raise ValueError(f"Gemini TTS API 요청 오류: {str(e)}")
     except Exception as e:
         raise ValueError(f"Gemini TTS API 오류: {str(e)}")

 # Typecast TTS
 TYPECAST_API_KEY = os.getenv("TYPECAST_API_KEY")
+# Gemini TTS (Google Cloud) - requires service account JSON
+GOOGLE_APPLICATION_CREDENTIALS_JSON = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
+_GEMINI_CREDENTIALS_PATH = None
+if GOOGLE_APPLICATION_CREDENTIALS_JSON:
+    try:
+        _GEMINI_CREDENTIALS_PATH = os.path.join(
+            tempfile.gettempdir(), "gemini_tts_credentials.json"
+        )
+        with open(_GEMINI_CREDENTIALS_PATH, "w") as f:
+            f.write(GOOGLE_APPLICATION_CREDENTIALS_JSON)
+        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = _GEMINI_CREDENTIALS_PATH
+        print("[Gemini TTS] Service account credentials loaded")
+    except Exception as e:
+        print(f"[Gemini TTS] Failed to save credentials: {e}")
 def resample_wav_to_16khz(input_path: str) -> str:
     """
         "voice_id": "tc_5c789c337ad86500073a02cd",
         "model": "ssfm-v21",
     },
+    # Legacy Typecast ID kept for backward compatibility (routes to GeumHee)
+    "typecast-jaeyi": {
+        "provider": "typecast",
+        "voice_id": "tc_5c789c337ad86500073a02cd",
+        "model": "ssfm-v21",
+    },
     # Gemini TTS (Google Cloud - 다국어 지원)
     "gemini-tts-aoede": {
         "provider": "gemini",
 def predict_gemini_tts(text: str, voice: str = "Aoede", model: str = "gemini-2.5-flash-tts") -> str:
+    """Gemini TTS API 호출 (서비스 계정 JSON 필요)"""
+    if not GOOGLE_APPLICATION_CREDENTIALS_JSON:
+        raise ValueError(
+            "GOOGLE_APPLICATION_CREDENTIALS_JSON 환경 변수가 설정되지 않았습니다."
+        )
     try:
+        from google.api_core.client_options import ClientOptions
+        from google.cloud import texttospeech_v1beta1 as texttospeech
+        client = texttospeech.TextToSpeechClient(
+            client_options=ClientOptions(api_endpoint="texttospeech.googleapis.com")
+        )
+        voice_params = texttospeech.VoiceSelectionParams(
+            name=voice,
+            language_code="ko-kr",
+            model_name=model,
+        )
+        response = client.synthesize_speech(
+            input=texttospeech.SynthesisInput(
+                text=text,
+                prompt="친절하고 자연스러운 톤으로 말해주세요",
+            ),
+            voice=voice_params,
+            audio_config=texttospeech.AudioConfig(
+                audio_encoding=texttospeech.AudioEncoding.LINEAR16,
+                sample_rate_hertz=24000,
+            ),
+        )
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
+            f.write(response.audio_content)
             return f.name
+    except ImportError:
+        raise ValueError(
+            "google-cloud-texttospeech 패키지가 설치되지 않았습니다. requirements.txt를 확인하세요."
+        )
     except Exception as e:
         raise ValueError(f"Gemini TTS API 오류: {str(e)}")