Spaces:

namfam
/

spik-backend

Sleeping

spik-backend / core /models /tts_model.py

Nam Fam

add files

1f47729 11 months ago

4.18 kB

	import logging
	import requests
	from typing import Dict, Any, Optional
	from fastapi import HTTPException, status

	# Use relative import
	from app.config import Config

	logger = logging.getLogger(__name__)

	class TTSModel:
	"""Handles text-to-speech synthesis using Deepgram's API."""

	def __init__(self, api_key: Optional[str] = None, base_url: Optional[str] = None):
	"""Initialize the TTS model with API key and base URL.

	Args:
	api_key: Deepgram API key (default: from config)
	base_url: Base URL for Deepgram API (default: v1/speak)
	"""
	self.api_key = api_key or Config.DEEPGRAM_API_KEY
	self.base_url = base_url or "https://api.deepgram.com/v1/speak"
	self.timeout = 30 # seconds

	# Voice to model mapping
	self.voice_models = {
	"Deepgram Aura2": "aura-2-thalia-en",
	"Deepgram Aura1": "aura-1-thalia-en",
	"Deepgram Legacy": "aura-legacy-thalia-en"
	}

	async def synthesize(self, text: str, voice: str = "Deepgram Aura2") -> bytes:
	"""Synthesize speech from text using Deepgram's TTS API.

	Args:
	text: The text to convert to speech
	voice: The voice to use (must be one of the supported voices)

	Returns:
	bytes: Binary audio data in MP3 format

	Raises:
	HTTPException: If there's an error with the Deepgram API
	"""
	if not self.api_key:
	raise ValueError("Deepgram API key is not configured")

	if voice not in self.voice_models:
	raise ValueError(f"Unsupported voice: {voice}")

	headers = {
	"Authorization": f"Token {self.api_key}",
	"Content-Type": "application/json"
	}

	data = {
	"text": text
	}

	logger.debug(f"Sending TTS request to Deepgram with text length: {len(text)}")

	try:
	url = f"{self.base_url}?model={self.voice_models[voice]}"
	response = requests.post(
	url,
	headers=headers,
	json=data,
	timeout=self.timeout,
	stream=True
	)
	response.raise_for_status()

	# Get the audio data
	audio_data = response.content

	if not audio_data:
	raise ValueError("Received empty audio data from Deepgram")

	logger.info(f"Successfully synthesized {len(audio_data)} bytes of audio")
	return audio_data

	except requests.exceptions.RequestException as e:
	error_msg = f"Deepgram TTS API request failed: {str(e)}"
	if hasattr(e, 'response') and e.response is not None:
	error_msg += f" \| Status: {e.response.status_code} \| Response: {e.response.text}"
	logger.error(error_msg)

	# Handle rate limiting specifically
	if hasattr(e, 'response') and e.response is not None:
	if e.response.status_code == 429: # Rate limited
	retry_after = e.response.headers.get('Retry-After', '60')
	raise HTTPException(
	status_code=status.HTTP_429_TOO_MANY_REQUESTS,
	detail={
	"error": "Rate limit exceeded",
	"retry_after": retry_after,
	"message": f"Please wait {retry_after} seconds before making another request"
	}
	)

	raise HTTPException(
	status_code=status.HTTP_502_BAD_GATEWAY,
	detail=f"TTS service error: {str(e)}"
	)

	except Exception as e:
	logger.error(f"Unexpected error in synthesize: {str(e)}", exc_info=True)
	raise HTTPException(
	status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
	detail=f"Error generating speech: {str(e)}"
	)