Spaces:
Running
Running
File size: 2,694 Bytes
a8fdab7 1b20d8b a8fdab7 1b20d8b a8fdab7 1b20d8b a8fdab7 1b20d8b a8fdab7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | import os
from config import ROOT_DIR, get_tts_voice, is_running_in_spaces
# Voice mapping: friendly name -> edge-tts voice ID
EDGE_TTS_VOICES = {
"Jasper": "en-US-GuyNeural",
"Bella": "en-US-JennyNeural",
"Luna": "en-GB-SoniaNeural",
"Bruno": "en-US-ChristopherNeural",
"Rosie": "en-AU-NatashaNeural",
"Hugo": "en-GB-RyanNeural",
"Kiki": "en-US-AriaNeural",
"Leo": "en-US-DavisNeural",
}
def _use_edge_tts() -> bool:
"""Use edge-tts when KittenTTS is not available (e.g. on HF Spaces)."""
if is_running_in_spaces():
return True
try:
from kittentts import KittenTTS # noqa: F401
return False
except ImportError:
return True
class TTS:
def __init__(self) -> None:
self._voice = get_tts_voice()
self._use_edge = _use_edge_tts()
if not self._use_edge:
import soundfile # noqa: F401 — ensure available
from kittentts import KittenTTS as KittenModel
self._model = KittenModel("KittenML/kitten-tts-mini-0.8")
self._sample_rate = 24000
else:
self._model = None
def synthesize(self, text, output_file=os.path.join(ROOT_DIR, ".mp", "audio.wav")):
if self._use_edge:
return self._synthesize_edge(text, output_file)
return self._synthesize_kitten(text, output_file)
def _synthesize_kitten(self, text, output_file):
import soundfile as sf
audio = self._model.generate(text, voice=self._voice)
sf.write(output_file, audio, self._sample_rate)
return output_file
def _synthesize_edge(self, text, output_file):
import asyncio
import edge_tts
voice_id = EDGE_TTS_VOICES.get(self._voice, "en-US-GuyNeural")
# edge-tts outputs mp3; we write to mp3 then keep as-is
# MoviePy can handle mp3 audio via ffmpeg
mp3_path = output_file.rsplit(".", 1)[0] + ".mp3"
async def _generate():
communicate = edge_tts.Communicate(text, voice_id)
await communicate.save(mp3_path)
asyncio.run(_generate())
# Convert mp3 to wav for compatibility with the rest of the pipeline
try:
from pydub import AudioSegment
audio = AudioSegment.from_mp3(mp3_path)
audio.export(output_file, format="wav")
os.remove(mp3_path)
except ImportError:
# If pydub not available, just use the mp3 directly
# Rename mp3 to the expected output path
if os.path.exists(output_file):
os.remove(output_file)
os.rename(mp3_path, output_file)
return output_file
|