Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

FastAPI-Backend-Models / services /tts_service.py

malek-messaoudii

Refactor audio processing and chatbot services; enhance STT and TTS functionalities with base64 support and session management

56dc677 about 1 month ago

raw

history blame

4.29 kB

	import base64
	import io
	import tempfile
	import os
	from gtts import gTTS
	import pyttsx3

	class TTSService:
	def __init__(self):
	self.models = {}
	self._initialize_models()

	def _initialize_models(self):
	"""Initialize TTS models"""
	# gTTS is our primary method (always available)
	self.models["gtts"] = True

	# Try to initialize pyttsx3 as fallback
	try:
	self.models["pyttsx3"] = pyttsx3.init()
	print("✓ pyttsx3 TTS initialized")
	except:
	print("⚠️ pyttsx3 not available")
	self.models["pyttsx3"] = None

	# Coqui TTS is optional
	self.models["coqui"] = self._initialize_coqui_tts()

	def _initialize_coqui_tts(self):
	"""Initialize Coqui TTS if available"""
	try:
	from TTS.api import TTS
	tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
	print("✓ Coqui TTS initialized")
	return tts_model
	except ImportError:
	print("⚠️ Coqui TTS not available. Install with: pip install TTS")
	return None
	except Exception as e:
	print(f"⚠️ Coqui TTS initialization failed: {e}")
	return None

	async def text_to_speech_base64(self, text: str, language: str = "en") -> str:
	"""Convert text to base64 audio"""
	# Try gTTS first (most reliable and free)
	try:
	return await self._gtts_to_base64(text, language)
	except Exception as e:
	print(f"gTTS error: {e}")

	# Fallback to pyttsx3
	try:
	if self.models.get("pyttsx3"):
	return await self._pyttsx3_to_base64(text)
	except Exception as e:
	print(f"pyttsx3 error: {e}")

	# Final fallback to Coqui TTS
	try:
	if self.models.get("coqui"):
	return await self._coqui_to_base64(text)
	except Exception as e:
	print(f"Coqui TTS error: {e}")

	raise Exception("All TTS services failed")

	async def _gtts_to_base64(self, text: str, language: str) -> str:
	"""Convert using gTTS"""
	tts = gTTS(text=text, lang=language, slow=False)
	audio_buffer = io.BytesIO()
	tts.write_to_fp(audio_buffer)
	audio_buffer.seek(0)
	return base64.b64encode(audio_buffer.getvalue()).decode('utf-8')

	async def _pyttsx3_to_base64(self, text: str) -> str:
	"""Convert using pyttsx3"""
	engine = self.models["pyttsx3"]
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
	temp_path = temp_file.name

	engine.save_to_file(text, temp_path)
	engine.runAndWait()

	with open(temp_path, 'rb') as audio_file:
	audio_base64 = base64.b64encode(audio_file.read()).decode('utf-8')

	# Cleanup
	os.unlink(temp_path)
	return audio_base64

	async def _coqui_to_base64(self, text: str) -> str:
	"""Convert using Coqui TTS"""
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
	temp_path = temp_file.name

	self.models["coqui"].tts_to_file(text=text, file_path=temp_path)

	with open(temp_path, 'rb') as audio_file:
	audio_base64 = base64.b64encode(audio_file.read()).decode('utf-8')

	# Cleanup
	os.unlink(temp_path)
	return audio_base64

	# Simple TTS service that only uses gTTS (minimal dependencies)
	class SimpleTTSService:
	def __init__(self):
	pass

	async def text_to_speech_base64(self, text: str, language: str = "en") -> str:
	"""Convert text to base64 audio using only gTTS"""
	try:
	tts = gTTS(text=text, lang=language, slow=False)
	audio_buffer = io.BytesIO()
	tts.write_to_fp(audio_buffer)
	audio_buffer.seek(0)
	return base64.b64encode(audio_buffer.getvalue()).decode('utf-8')
	except Exception as e:
	print(f"gTTS error: {e}")
	# Return a placeholder audio or error message
	return "TTS_ERROR_PLACEHOLDER"