orztv
update
76374ce
# tts_handler.py
import edge_tts
import asyncio
import tempfile
import subprocess
import os
# Language default (environment variable)
DEFAULT_LANGUAGE = os.getenv('DEFAULT_LANGUAGE', 'en-US')
# OpenAI voice names mapped to edge-tts equivalents
voice_mapping = {
'alloy': 'en-US-AvaNeural',
'echo': 'en-US-AndrewNeural',
'fable': 'en-GB-SoniaNeural',
'onyx': 'en-US-EricNeural',
'nova': 'en-US-SteffanNeural',
'shimmer': 'en-US-EmmaNeural'
}
async def _generate_audio(text, voice, response_format, speed):
# Determine if the voice is an OpenAI-compatible voice or a direct edge-tts voice
edge_tts_voice = voice_mapping.get(voice, voice) # Use mapping if in OpenAI names, otherwise use as-is
# Generate the TTS output in mp3 format first
temp_output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
communicator = edge_tts.Communicate(text, edge_tts_voice)
await communicator.save(temp_output_file.name)
# If the requested format is mp3 and speed is 1.0, return the generated file directly
if response_format == "mp3" and speed == 1.0:
return temp_output_file.name
# Convert to the requested format if not mp3 or if speed adjustment is needed
converted_output_file = tempfile.NamedTemporaryFile(delete=False, suffix=f".{response_format}")
# ffmpeg playback speed adjustment
speed_filter = f"atempo={speed}" if response_format != "pcm" else f"asetrate=44100*{speed},aresample=44100"
ffmpeg_command = [
"ffmpeg", "-i", temp_output_file.name,
"-filter:a", speed_filter, # Apply speed adjustment
"-f", response_format, "-y",
converted_output_file.name
]
try:
subprocess.run(ffmpeg_command, check=True)
except subprocess.CalledProcessError as e:
raise RuntimeError(f"Error in audio conversion: {e}")
return converted_output_file.name
def generate_speech(text, voice, response_format, speed=1.0):
return asyncio.run(_generate_audio(text, voice, response_format, speed))
def get_models():
return [
{"id": "tts-1", "name": "Text-to-speech v1"},
{"id": "tts-1-hd", "name": "Text-to-speech v1 HD"}
]
async def _get_voices(language=None):
# List all voices, filter by language if specified
all_voices = await edge_tts.list_voices()
language = language or DEFAULT_LANGUAGE # Use default if no language specified
filtered_voices = [
{"name": v['ShortName'], "gender": v['Gender'], "language": v['Locale']}
for v in all_voices if language == 'all' or language is None or v['Locale'] == language
]
return filtered_voices
def get_voices(language=None):
return asyncio.run(_get_voices(language))