from google.genai import types from services.gemini_client import get_gemini_client import wave def save_wave_file(filename, pcm, channels=1, rate=24000, sample_width=2): with wave.open(filename, "wb") as wf: wf.setnchannels(channels) wf.setsampwidth(sample_width) wf.setframerate(rate) wf.writeframes(pcm) async def generate_tts(text: str) -> bytes: client = get_gemini_client() response = client.models.generate_content( model="gemini-2.5-flash-preview-tts", contents=text, config=types.GenerateContentConfig( response_modalities=["AUDIO"], speech_config=types.SpeechConfig( voice_config=types.VoiceConfig( prebuilt_voice_config=types.PrebuiltVoiceConfig( voice_name="Kore" ) ) ), ), ) audio_bytes = response.candidates[0].content.parts[0].inline_data.data return audio_bytes