| from services.gemini_client import get_gemini_client |
| from google.genai import types |
| import base64 |
|
|
| async def generate_tts(text: str) -> bytes: |
| client = get_gemini_client() |
|
|
| response = client.models.generate_content( |
| model="gemini-2.5-flash-preview-tts", |
| contents=text, |
| config=types.GenerateContentConfig( |
| response_modalities=["AUDIO"], |
| speech_config=types.SpeechConfig( |
| voice_config=types.VoiceConfig( |
| prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Kore") |
| ) |
| ), |
| ), |
| ) |
|
|
| |
| audio_base64 = response.candidates[0].content.parts[0].inline_data.data |
| audio_bytes = base64.b64decode(audio_base64) |
|
|
| return audio_bytes |
|
|