Spaces:

aankitdas
/

tts-eval-framework

Sleeping

File size: 1,309 Bytes

a3419b6

# src/edge_tts_client.py
# TTS client for edge-tts (Microsoft neural TTS).
# Free cloud-based system, no API key required.
# Same engine powering modern Windows Narrator and Azure TTS.

import time
import asyncio
import edge_tts


async def _synthesize_async(text: str, output_path: str, voice: str, rate: str) -> None:
    """Internal async helper — edge_tts is async under the hood."""
    communicate = edge_tts.Communicate(text, voice, rate=rate)
    await communicate.save(output_path)


async def synthesize(text: str, output_path: str, voice: str = "en-US-JennyNeural", rate: str = "+0%") -> dict:
    """
    Synthesize text to a .wav file using edge-tts (Microsoft neural TTS).

    Args:
        text: the string to synthesize
        output_path: where to save the .mp3 file
        voice: edge-tts voice ID (default en-US-JennyNeural — warm American female)
        rate: speaking rate adjustment e.g. "+0%", "+10%", "-10%"

    Returns:
        dict with keys: output_path, latency_seconds, engine, voice
    """
    start = time.time()
    await _synthesize_async(text, output_path, voice, rate)
    latency = time.time() - start

    return {
        "output_path": output_path,
        "latency_seconds": round(latency, 3),
        "engine": "edge_tts",
        "voice": voice,
    }