FastAPI-Backend-Models / services /tts_service.py
malek-messaoudii
add files
4a13628
raw
history blame
1.62 kB
from services.gemini_client import get_gemini_client
from google.genai import types
import base64
import logging
logger = logging.getLogger(__name__)
async def generate_tts(text: str) -> bytes:
"""
Convert text to speech using Gemini API.
Args:
text: Text to convert to speech
Returns:
Audio bytes in WAV format
Raises:
Exception: If TTS generation fails
"""
try:
client = get_gemini_client()
logger.info(f"Generating speech for: '{text}'")
# Call Gemini TTS API
response = client.models.generate_content(
model="gemini-2.5-flash-preview-tts",
contents=text,
config=types.GenerateContentConfig(
response_modalities=["AUDIO"],
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(
voice_name="Kore" # Options: Kore, Peri, Charon, Fenrir, Orbit
)
)
),
),
)
# Extract and decode base64 audio
audio_base64 = response.candidates[0].content.parts[0].inline_data.data
audio_bytes = base64.b64decode(audio_base64)
logger.info(f"✓ TTS successful: {len(audio_bytes)} bytes generated")
return audio_bytes
except Exception as e:
logger.error(f"✗ TTS failed: {str(e)}")
raise Exception(f"Text-to-speech generation failed: {str(e)}")