| from services.gemini_client import get_gemini_client |
| from google.genai import types |
| import base64 |
| import logging |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| async def generate_tts(text: str) -> bytes: |
| """ |
| Convert text to speech using Gemini API. |
| |
| Args: |
| text: Text to convert to speech |
| |
| Returns: |
| Audio bytes in WAV format |
| |
| Raises: |
| Exception: If TTS generation fails |
| """ |
| try: |
| client = get_gemini_client() |
| |
| logger.info(f"Generating speech for: '{text}'") |
| |
| |
| response = client.models.generate_content( |
| model="gemini-2.5-flash-preview-tts", |
| contents=text, |
| config=types.GenerateContentConfig( |
| response_modalities=["AUDIO"], |
| speech_config=types.SpeechConfig( |
| voice_config=types.VoiceConfig( |
| prebuilt_voice_config=types.PrebuiltVoiceConfig( |
| voice_name="Kore" |
| ) |
| ) |
| ), |
| ), |
| ) |
| |
| |
| audio_base64 = response.candidates[0].content.parts[0].inline_data.data |
| audio_bytes = base64.b64decode(audio_base64) |
| |
| logger.info(f"✓ TTS successful: {len(audio_bytes)} bytes generated") |
| |
| return audio_bytes |
| |
| except Exception as e: |
| logger.error(f"✗ TTS failed: {str(e)}") |
| raise Exception(f"Text-to-speech generation failed: {str(e)}") |