spik-backend / core /models /tts_model.py
Nam Fam
add files
1f47729
import logging
import requests
from typing import Dict, Any, Optional
from fastapi import HTTPException, status
# Use relative import
from app.config import Config
logger = logging.getLogger(__name__)
class TTSModel:
"""Handles text-to-speech synthesis using Deepgram's API."""
def __init__(self, api_key: Optional[str] = None, base_url: Optional[str] = None):
"""Initialize the TTS model with API key and base URL.
Args:
api_key: Deepgram API key (default: from config)
base_url: Base URL for Deepgram API (default: v1/speak)
"""
self.api_key = api_key or Config.DEEPGRAM_API_KEY
self.base_url = base_url or "https://api.deepgram.com/v1/speak"
self.timeout = 30 # seconds
# Voice to model mapping
self.voice_models = {
"Deepgram Aura2": "aura-2-thalia-en",
"Deepgram Aura1": "aura-1-thalia-en",
"Deepgram Legacy": "aura-legacy-thalia-en"
}
async def synthesize(self, text: str, voice: str = "Deepgram Aura2") -> bytes:
"""Synthesize speech from text using Deepgram's TTS API.
Args:
text: The text to convert to speech
voice: The voice to use (must be one of the supported voices)
Returns:
bytes: Binary audio data in MP3 format
Raises:
HTTPException: If there's an error with the Deepgram API
"""
if not self.api_key:
raise ValueError("Deepgram API key is not configured")
if voice not in self.voice_models:
raise ValueError(f"Unsupported voice: {voice}")
headers = {
"Authorization": f"Token {self.api_key}",
"Content-Type": "application/json"
}
data = {
"text": text
}
logger.debug(f"Sending TTS request to Deepgram with text length: {len(text)}")
try:
url = f"{self.base_url}?model={self.voice_models[voice]}"
response = requests.post(
url,
headers=headers,
json=data,
timeout=self.timeout,
stream=True
)
response.raise_for_status()
# Get the audio data
audio_data = response.content
if not audio_data:
raise ValueError("Received empty audio data from Deepgram")
logger.info(f"Successfully synthesized {len(audio_data)} bytes of audio")
return audio_data
except requests.exceptions.RequestException as e:
error_msg = f"Deepgram TTS API request failed: {str(e)}"
if hasattr(e, 'response') and e.response is not None:
error_msg += f" | Status: {e.response.status_code} | Response: {e.response.text}"
logger.error(error_msg)
# Handle rate limiting specifically
if hasattr(e, 'response') and e.response is not None:
if e.response.status_code == 429: # Rate limited
retry_after = e.response.headers.get('Retry-After', '60')
raise HTTPException(
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
detail={
"error": "Rate limit exceeded",
"retry_after": retry_after,
"message": f"Please wait {retry_after} seconds before making another request"
}
)
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"TTS service error: {str(e)}"
)
except Exception as e:
logger.error(f"Unexpected error in synthesize: {str(e)}", exc_info=True)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error generating speech: {str(e)}"
)