Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Sleeping

malek-messaoudii

add files

4a13628 5 months ago

1.62 kB

	from services.gemini_client import get_gemini_client
	from google.genai import types
	import base64
	import logging

	logger = logging.getLogger(__name__)


	async def generate_tts(text: str) -> bytes:
	"""
	Convert text to speech using Gemini API.

	Args:
	text: Text to convert to speech

	Returns:
	Audio bytes in WAV format

	Raises:
	Exception: If TTS generation fails
	"""
	try:
	client = get_gemini_client()

	logger.info(f"Generating speech for: '{text}'")

	# Call Gemini TTS API
	response = client.models.generate_content(
	model="gemini-2.5-flash-preview-tts",
	contents=text,
	config=types.GenerateContentConfig(
	response_modalities=["AUDIO"],
	speech_config=types.SpeechConfig(
	voice_config=types.VoiceConfig(
	prebuilt_voice_config=types.PrebuiltVoiceConfig(
	voice_name="Kore" # Options: Kore, Peri, Charon, Fenrir, Orbit
	)
	)
	),
	),
	)

	# Extract and decode base64 audio
	audio_base64 = response.candidates[0].content.parts[0].inline_data.data
	audio_bytes = base64.b64decode(audio_base64)

	logger.info(f"✓ TTS successful: {len(audio_bytes)} bytes generated")

	return audio_bytes

	except Exception as e:
	logger.error(f"✗ TTS failed: {str(e)}")
	raise Exception(f"Text-to-speech generation failed: {str(e)}")