Mexar / backend /services /tts_service.py
devrajsinh2012's picture
Initial commit of MEXAR Ultimate - Phase 2 cleanup complete
b0b150b
"""
MEXAR Core Engine - Text-to-Speech Service
Provides text-to-speech capabilities with multiple provider support.
"""
import os
import logging
import hashlib
import requests
from pathlib import Path
from typing import Optional, Dict, Any, List
from dotenv import load_dotenv
load_dotenv()
logger = logging.getLogger(__name__)
class TTSService:
"""
Text-to-Speech service supporting multiple providers:
- ElevenLabs (high quality, free tier: 10k chars/month)
- Web Speech API (browser-based, unlimited, handled client-side)
"""
def __init__(self, cache_dir: str = "data/tts_cache"):
"""
Initialize TTS service.
Args:
cache_dir: Directory to cache generated audio files
"""
self.cache_dir = Path(cache_dir)
self.cache_dir.mkdir(parents=True, exist_ok=True)
# ElevenLabs configuration
self.elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
self.elevenlabs_base_url = "https://api.elevenlabs.io/v1"
# Default voices
self.default_voices = {
"elevenlabs": "21m00Tcm4TlvDq8ikWAM", # Rachel - neutral
"web_speech": "default" # Browser default
}
def generate_speech(
self,
text: str,
provider: str = "elevenlabs",
voice_id: Optional[str] = None,
model_id: str = "eleven_monolingual_v1"
) -> Dict[str, Any]:
"""
Generate speech from text using specified provider.
Args:
text: Text to convert to speech
provider: "elevenlabs" or "web_speech"
voice_id: Voice ID (provider-specific)
model_id: Model ID for ElevenLabs
Returns:
Dict with audio file path, provider info, and metadata
"""
if not text or not text.strip():
return {
"success": False,
"error": "Empty text provided"
}
# Check cache first
cache_key = self._get_cache_key(text, provider, voice_id)
cached_file = self.cache_dir / f"{cache_key}.mp3"
if cached_file.exists():
logger.info(f"Using cached TTS audio: {cache_key}")
return {
"success": True,
"provider": provider,
"audio_path": str(cached_file),
"audio_url": f"/api/chat/tts/audio/{cache_key}.mp3",
"cached": True,
"text_length": len(text)
}
# Generate new audio
if provider == "elevenlabs":
return self._generate_elevenlabs(text, voice_id, model_id, cached_file)
elif provider == "web_speech":
# Web Speech API is client-side only
return {
"success": True,
"provider": "web_speech",
"client_side": True,
"text": text,
"voice_id": voice_id or self.default_voices["web_speech"],
"message": "Use browser Web Speech API for playback"
}
else:
return {
"success": False,
"error": f"Unknown provider: {provider}"
}
def _generate_elevenlabs(
self,
text: str,
voice_id: Optional[str],
model_id: str,
output_path: Path
) -> Dict[str, Any]:
"""Generate speech using ElevenLabs API."""
if not self.elevenlabs_api_key:
return {
"success": False,
"error": "ElevenLabs API key not configured",
"fallback": "web_speech"
}
voice = voice_id or self.default_voices["elevenlabs"]
try:
url = f"{self.elevenlabs_base_url}/text-to-speech/{voice}"
headers = {
"Accept": "audio/mpeg",
"Content-Type": "application/json",
"xi-api-key": self.elevenlabs_api_key
}
data = {
"text": text,
"model_id": model_id,
"voice_settings": {
"stability": 0.5,
"similarity_boost": 0.75
}
}
response = requests.post(url, json=data, headers=headers, timeout=30)
if response.status_code == 200:
# Save audio file
with open(output_path, "wb") as f:
f.write(response.content)
logger.info(f"Generated ElevenLabs TTS: {len(text)} chars")
return {
"success": True,
"provider": "elevenlabs",
"audio_path": str(output_path),
"audio_url": f"/api/chat/tts/audio/{output_path.name}",
"cached": False,
"text_length": len(text),
"voice_id": voice
}
elif response.status_code == 401:
return {
"success": False,
"error": "Invalid ElevenLabs API key",
"fallback": "web_speech"
}
elif response.status_code == 429:
return {
"success": False,
"error": "ElevenLabs quota exceeded",
"fallback": "web_speech"
}
else:
return {
"success": False,
"error": f"ElevenLabs API error: {response.status_code}",
"fallback": "web_speech"
}
except Exception as e:
logger.error(f"ElevenLabs TTS failed: {e}")
return {
"success": False,
"error": str(e),
"fallback": "web_speech"
}
def get_available_voices(self, provider: str = "elevenlabs") -> List[Dict[str, str]]:
"""
Get list of available voices for a provider.
Args:
provider: "elevenlabs" or "web_speech"
Returns:
List of voice dictionaries with id, name, and metadata
"""
if provider == "elevenlabs":
if not self.elevenlabs_api_key:
return []
try:
url = f"{self.elevenlabs_base_url}/voices"
headers = {"xi-api-key": self.elevenlabs_api_key}
response = requests.get(url, headers=headers, timeout=10)
if response.status_code == 200:
data = response.json()
return [
{
"id": voice["voice_id"],
"name": voice["name"],
"category": voice.get("category", "general"),
"preview_url": voice.get("preview_url")
}
for voice in data.get("voices", [])
]
except Exception as e:
logger.error(f"Failed to fetch ElevenLabs voices: {e}")
return []
elif provider == "web_speech":
# Web Speech API voices are browser-specific
return [
{"id": "default", "name": "Browser Default", "category": "system"}
]
return []
def check_quota(self) -> Dict[str, Any]:
"""
Check remaining quota for ElevenLabs.
Returns:
Dict with quota information
"""
if not self.elevenlabs_api_key:
return {
"provider": "elevenlabs",
"configured": False
}
try:
url = f"{self.elevenlabs_base_url}/user"
headers = {"xi-api-key": self.elevenlabs_api_key}
response = requests.get(url, headers=headers, timeout=10)
if response.status_code == 200:
data = response.json()
subscription = data.get("subscription", {})
return {
"provider": "elevenlabs",
"configured": True,
"character_count": subscription.get("character_count", 0),
"character_limit": subscription.get("character_limit", 10000),
"remaining": subscription.get("character_limit", 10000) - subscription.get("character_count", 0),
"tier": subscription.get("tier", "free")
}
except Exception as e:
logger.error(f"Failed to check ElevenLabs quota: {e}")
return {
"provider": "elevenlabs",
"configured": True,
"error": "Failed to fetch quota"
}
def _get_cache_key(self, text: str, provider: str, voice_id: Optional[str]) -> str:
"""Generate cache key for audio file."""
content = f"{provider}:{voice_id or 'default'}:{text}"
return hashlib.md5(content.encode()).hexdigest()
def clear_cache(self) -> int:
"""
Clear all cached audio files.
Returns:
Number of files deleted
"""
count = 0
for file in self.cache_dir.glob("*.mp3"):
try:
file.unlink()
count += 1
except Exception as e:
logger.warning(f"Failed to delete cache file {file}: {e}")
logger.info(f"Cleared {count} cached TTS files")
return count
# Singleton instance
_tts_service_instance: Optional[TTSService] = None
def get_tts_service() -> TTSService:
"""Get or create the singleton TTS service instance."""
global _tts_service_instance
if _tts_service_instance is None:
_tts_service_instance = TTSService()
return _tts_service_instance