Spaces:
Sleeping
Sleeping
| import logging | |
| import requests | |
| from typing import Dict, Any, Optional | |
| from fastapi import HTTPException, status | |
| # Use relative import | |
| from app.config import Config | |
| logger = logging.getLogger(__name__) | |
| class TTSModel: | |
| """Handles text-to-speech synthesis using Deepgram's API.""" | |
| def __init__(self, api_key: Optional[str] = None, base_url: Optional[str] = None): | |
| """Initialize the TTS model with API key and base URL. | |
| Args: | |
| api_key: Deepgram API key (default: from config) | |
| base_url: Base URL for Deepgram API (default: v1/speak) | |
| """ | |
| self.api_key = api_key or Config.DEEPGRAM_API_KEY | |
| self.base_url = base_url or "https://api.deepgram.com/v1/speak" | |
| self.timeout = 30 # seconds | |
| # Voice to model mapping | |
| self.voice_models = { | |
| "Deepgram Aura2": "aura-2-thalia-en", | |
| "Deepgram Aura1": "aura-1-thalia-en", | |
| "Deepgram Legacy": "aura-legacy-thalia-en" | |
| } | |
| async def synthesize(self, text: str, voice: str = "Deepgram Aura2") -> bytes: | |
| """Synthesize speech from text using Deepgram's TTS API. | |
| Args: | |
| text: The text to convert to speech | |
| voice: The voice to use (must be one of the supported voices) | |
| Returns: | |
| bytes: Binary audio data in MP3 format | |
| Raises: | |
| HTTPException: If there's an error with the Deepgram API | |
| """ | |
| if not self.api_key: | |
| raise ValueError("Deepgram API key is not configured") | |
| if voice not in self.voice_models: | |
| raise ValueError(f"Unsupported voice: {voice}") | |
| headers = { | |
| "Authorization": f"Token {self.api_key}", | |
| "Content-Type": "application/json" | |
| } | |
| data = { | |
| "text": text | |
| } | |
| logger.debug(f"Sending TTS request to Deepgram with text length: {len(text)}") | |
| try: | |
| url = f"{self.base_url}?model={self.voice_models[voice]}" | |
| response = requests.post( | |
| url, | |
| headers=headers, | |
| json=data, | |
| timeout=self.timeout, | |
| stream=True | |
| ) | |
| response.raise_for_status() | |
| # Get the audio data | |
| audio_data = response.content | |
| if not audio_data: | |
| raise ValueError("Received empty audio data from Deepgram") | |
| logger.info(f"Successfully synthesized {len(audio_data)} bytes of audio") | |
| return audio_data | |
| except requests.exceptions.RequestException as e: | |
| error_msg = f"Deepgram TTS API request failed: {str(e)}" | |
| if hasattr(e, 'response') and e.response is not None: | |
| error_msg += f" | Status: {e.response.status_code} | Response: {e.response.text}" | |
| logger.error(error_msg) | |
| # Handle rate limiting specifically | |
| if hasattr(e, 'response') and e.response is not None: | |
| if e.response.status_code == 429: # Rate limited | |
| retry_after = e.response.headers.get('Retry-After', '60') | |
| raise HTTPException( | |
| status_code=status.HTTP_429_TOO_MANY_REQUESTS, | |
| detail={ | |
| "error": "Rate limit exceeded", | |
| "retry_after": retry_after, | |
| "message": f"Please wait {retry_after} seconds before making another request" | |
| } | |
| ) | |
| raise HTTPException( | |
| status_code=status.HTTP_502_BAD_GATEWAY, | |
| detail=f"TTS service error: {str(e)}" | |
| ) | |
| except Exception as e: | |
| logger.error(f"Unexpected error in synthesize: {str(e)}", exc_info=True) | |
| raise HTTPException( | |
| status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, | |
| detail=f"Error generating speech: {str(e)}" | |
| ) | |