Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,829 Bytes
96cf6e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
import os
import json
import platform
import glob
from phonemizer import phonemize
from phonemizer.backend.espeak.espeak import EspeakWrapper
from utils.normalize_text import VietnameseTTSNormalizer
# Configuration
PHONEME_DICT_PATH = os.getenv(
'PHONEME_DICT_PATH',
os.path.join(os.path.dirname(__file__), "phoneme_dict.json")
)
def load_phoneme_dict(path=PHONEME_DICT_PATH):
"""Load phoneme dictionary from JSON file."""
try:
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
except FileNotFoundError:
raise FileNotFoundError(
f"Phoneme dictionary not found at {path}. "
"Please create it or set PHONEME_DICT_PATH environment variable."
)
def setup_espeak_library():
"""Configure eSpeak library path based on operating system."""
system = platform.system()
if system == "Windows":
_setup_windows_espeak()
elif system == "Linux":
_setup_linux_espeak()
elif system == "Darwin":
_setup_macos_espeak()
else:
raise OSError(
f"Unsupported OS: {system}. "
"Only Windows, Linux, and macOS are supported."
)
def _setup_windows_espeak():
"""Setup eSpeak for Windows."""
default_path = r"C:\Program Files\eSpeak NG\libespeak-ng.dll"
if os.path.exists(default_path):
EspeakWrapper.set_library(default_path)
else:
raise FileNotFoundError(
f"eSpeak library not found at {default_path}. "
"Please install eSpeak NG from: https://github.com/espeak-ng/espeak-ng/releases"
)
def _setup_linux_espeak():
"""Setup eSpeak for Linux."""
search_patterns = [
"/usr/lib/x86_64-linux-gnu/libespeak-ng.so*",
"/usr/lib/x86_64-linux-gnu/libespeak.so*",
"/usr/lib/libespeak-ng.so*",
"/usr/lib64/libespeak-ng.so*",
"/usr/local/lib/libespeak-ng.so*",
]
for pattern in search_patterns:
matches = glob.glob(pattern)
if matches:
EspeakWrapper.set_library(sorted(matches, key=len)[0])
return
raise RuntimeError(
"eSpeak NG library not found. Install with:\n"
" Ubuntu/Debian: sudo apt-get install espeak-ng\n"
" Fedora: sudo dnf install espeak-ng\n"
" Arch: sudo pacman -S espeak-ng\n"
"See: https://github.com/pnnbao97/VieNeu-TTS/issues/5"
)
def _setup_macos_espeak():
"""Setup eSpeak for macOS."""
espeak_lib = os.environ.get('PHONEMIZER_ESPEAK_LIBRARY')
paths_to_check = [
espeak_lib,
"/opt/homebrew/lib/libespeak-ng.dylib", # Apple Silicon
"/usr/local/lib/libespeak-ng.dylib", # Intel
"/opt/local/lib/libespeak-ng.dylib", # MacPorts
]
for path in paths_to_check:
if path and os.path.exists(path):
EspeakWrapper.set_library(path)
return
raise FileNotFoundError(
"eSpeak library not found. Install with:\n"
" brew install espeak-ng\n"
"Or set: export PHONEMIZER_ESPEAK_LIBRARY=/path/to/libespeak-ng.dylib"
)
# Initialize
try:
setup_espeak_library()
phoneme_dict = load_phoneme_dict()
normalizer = VietnameseTTSNormalizer()
except Exception as e:
print(f"Initialization error: {e}")
raise
def phonemize_text(text: str) -> str:
"""Convert text to phonemes using phonemizer."""
text = normalizer.normalize(text)
return phonemize(
text,
language="vi",
backend="espeak",
preserve_punctuation=True,
with_stress=True,
language_switch="remove-flags"
)
def phonemize_with_dict(text: str, phoneme_dict=phoneme_dict) -> str:
"""Phonemize text with dictionary lookup."""
text = normalizer.normalize(text)
words = text.split()
result = []
for word in words:
if word in phoneme_dict:
phone_word = phoneme_dict[word]
else:
try:
phone_word = phonemize(
word,
language='vi',
backend='espeak',
preserve_punctuation=True,
with_stress=True,
language_switch='remove-flags'
)
if word.lower().startswith('r'):
phone_word = 'ɹ' + phone_word[1:]
phoneme_dict[word] = phone_word
except Exception as e:
print(f"Warning: Could not phonemize '{word}': {e}")
phone_word = word
result.append(phone_word)
return ' '.join(result) |