Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Sleeping

File size: 4,289 Bytes

import base64
import io
import tempfile
import os
from gtts import gTTS
import pyttsx3

class TTSService:
    def __init__(self):
        self.models = {}
        self._initialize_models()
    
    def _initialize_models(self):
        """Initialize TTS models"""
        # gTTS is our primary method (always available)
        self.models["gtts"] = True
        
        # Try to initialize pyttsx3 as fallback
        try:
            self.models["pyttsx3"] = pyttsx3.init()
            print("✓ pyttsx3 TTS initialized")
        except:
            print("⚠️ pyttsx3 not available")
            self.models["pyttsx3"] = None
        
        # Coqui TTS is optional
        self.models["coqui"] = self._initialize_coqui_tts()
    
    def _initialize_coqui_tts(self):
        """Initialize Coqui TTS if available"""
        try:
            from TTS.api import TTS
            tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
            print("✓ Coqui TTS initialized")
            return tts_model
        except ImportError:
            print("⚠️ Coqui TTS not available. Install with: pip install TTS")
            return None
        except Exception as e:
            print(f"⚠️ Coqui TTS initialization failed: {e}")
            return None
    
    async def text_to_speech_base64(self, text: str, language: str = "en") -> str:
        """Convert text to base64 audio"""
        # Try gTTS first (most reliable and free)
        try:
            return await self._gtts_to_base64(text, language)
        except Exception as e:
            print(f"gTTS error: {e}")
        
        # Fallback to pyttsx3
        try:
            if self.models.get("pyttsx3"):
                return await self._pyttsx3_to_base64(text)
        except Exception as e:
            print(f"pyttsx3 error: {e}")
        
        # Final fallback to Coqui TTS
        try:
            if self.models.get("coqui"):
                return await self._coqui_to_base64(text)
        except Exception as e:
            print(f"Coqui TTS error: {e}")
        
        raise Exception("All TTS services failed")
    
    async def _gtts_to_base64(self, text: str, language: str) -> str:
        """Convert using gTTS"""
        tts = gTTS(text=text, lang=language, slow=False)
        audio_buffer = io.BytesIO()
        tts.write_to_fp(audio_buffer)
        audio_buffer.seek(0)
        return base64.b64encode(audio_buffer.getvalue()).decode('utf-8')
    
    async def _pyttsx3_to_base64(self, text: str) -> str:
        """Convert using pyttsx3"""
        engine = self.models["pyttsx3"]
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
            temp_path = temp_file.name
        
        engine.save_to_file(text, temp_path)
        engine.runAndWait()
        
        with open(temp_path, 'rb') as audio_file:
            audio_base64 = base64.b64encode(audio_file.read()).decode('utf-8')
        
        # Cleanup
        os.unlink(temp_path)
        return audio_base64
    
    async def _coqui_to_base64(self, text: str) -> str:
        """Convert using Coqui TTS"""
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
            temp_path = temp_file.name
        
        self.models["coqui"].tts_to_file(text=text, file_path=temp_path)
        
        with open(temp_path, 'rb') as audio_file:
            audio_base64 = base64.b64encode(audio_file.read()).decode('utf-8')
        
        # Cleanup
        os.unlink(temp_path)
        return audio_base64

# Simple TTS service that only uses gTTS (minimal dependencies)
class SimpleTTSService:
    def __init__(self):
        pass
    
    async def text_to_speech_base64(self, text: str, language: str = "en") -> str:
        """Convert text to base64 audio using only gTTS"""
        try:
            tts = gTTS(text=text, lang=language, slow=False)
            audio_buffer = io.BytesIO()
            tts.write_to_fp(audio_buffer)
            audio_buffer.seek(0)
            return base64.b64encode(audio_buffer.getvalue()).decode('utf-8')
        except Exception as e:
            print(f"gTTS error: {e}")
            # Return a placeholder audio or error message
            return "TTS_ERROR_PLACEHOLDER"