| from google.genai import types | |
| from services.gemini_client import get_gemini_client | |
| import wave | |
| def save_wave_file(filename, pcm, channels=1, rate=24000, sample_width=2): | |
| with wave.open(filename, "wb") as wf: | |
| wf.setnchannels(channels) | |
| wf.setsampwidth(sample_width) | |
| wf.setframerate(rate) | |
| wf.writeframes(pcm) | |
| async def generate_tts(text: str) -> bytes: | |
| client = get_gemini_client() | |
| response = client.models.generate_content( | |
| model="gemini-2.5-flash-preview-tts", | |
| contents=text, | |
| config=types.GenerateContentConfig( | |
| response_modalities=["AUDIO"], | |
| speech_config=types.SpeechConfig( | |
| voice_config=types.VoiceConfig( | |
| prebuilt_voice_config=types.PrebuiltVoiceConfig( | |
| voice_name="Kore" | |
| ) | |
| ) | |
| ), | |
| ), | |
| ) | |
| audio_bytes = response.candidates[0].content.parts[0].inline_data.data | |
| return audio_bytes | |