| import os
|
| import json
|
| import platform
|
| import glob
|
| from phonemizer import phonemize
|
| from phonemizer.backend.espeak.espeak import EspeakWrapper
|
| from utils.normalize_text import VietnameseTTSNormalizer
|
|
|
|
|
| PHONEME_DICT_PATH = os.getenv(
|
| 'PHONEME_DICT_PATH',
|
| os.path.join(os.path.dirname(__file__), "phoneme_dict.json")
|
| )
|
|
|
| def load_phoneme_dict(path=PHONEME_DICT_PATH):
|
| """Load phoneme dictionary from JSON file."""
|
| try:
|
| with open(path, "r", encoding="utf-8") as f:
|
| return json.load(f)
|
| except FileNotFoundError:
|
| raise FileNotFoundError(
|
| f"Phoneme dictionary not found at {path}. "
|
| "Please create it or set PHONEME_DICT_PATH environment variable."
|
| )
|
|
|
| def setup_espeak_library():
|
| """Configure eSpeak library path based on operating system."""
|
| system = platform.system()
|
|
|
| if system == "Windows":
|
| _setup_windows_espeak()
|
| elif system == "Linux":
|
| _setup_linux_espeak()
|
| elif system == "Darwin":
|
| _setup_macos_espeak()
|
| else:
|
| raise OSError(
|
| f"Unsupported OS: {system}. "
|
| "Only Windows, Linux, and macOS are supported."
|
| )
|
|
|
| def _setup_windows_espeak():
|
| """Setup eSpeak for Windows."""
|
| default_path = r"C:\Program Files\eSpeak NG\libespeak-ng.dll"
|
| if os.path.exists(default_path):
|
| EspeakWrapper.set_library(default_path)
|
| else:
|
| raise FileNotFoundError(
|
| f"eSpeak library not found at {default_path}. "
|
| "Please install eSpeak NG from: https://github.com/espeak-ng/espeak-ng/releases"
|
| )
|
|
|
| def _setup_linux_espeak():
|
| """Setup eSpeak for Linux."""
|
| search_patterns = [
|
| "/usr/lib/x86_64-linux-gnu/libespeak-ng.so*",
|
| "/usr/lib/x86_64-linux-gnu/libespeak.so*",
|
| "/usr/lib/libespeak-ng.so*",
|
| "/usr/lib64/libespeak-ng.so*",
|
| "/usr/local/lib/libespeak-ng.so*",
|
| ]
|
|
|
| for pattern in search_patterns:
|
| matches = glob.glob(pattern)
|
| if matches:
|
| EspeakWrapper.set_library(sorted(matches, key=len)[0])
|
| return
|
|
|
| raise RuntimeError(
|
| "eSpeak NG library not found. Install with:\n"
|
| " Ubuntu/Debian: sudo apt-get install espeak-ng\n"
|
| " Fedora: sudo dnf install espeak-ng\n"
|
| " Arch: sudo pacman -S espeak-ng\n"
|
| "See: https://github.com/pnnbao97/VieNeu-TTS/issues/5"
|
| )
|
|
|
| def _setup_macos_espeak():
|
| """Setup eSpeak for macOS."""
|
| espeak_lib = os.environ.get('PHONEMIZER_ESPEAK_LIBRARY')
|
|
|
| paths_to_check = [
|
| espeak_lib,
|
| "/opt/homebrew/lib/libespeak-ng.dylib",
|
| "/usr/local/lib/libespeak-ng.dylib",
|
| "/opt/local/lib/libespeak-ng.dylib",
|
| ]
|
|
|
| for path in paths_to_check:
|
| if path and os.path.exists(path):
|
| EspeakWrapper.set_library(path)
|
| return
|
|
|
| raise FileNotFoundError(
|
| "eSpeak library not found. Install with:\n"
|
| " brew install espeak-ng\n"
|
| "Or set: export PHONEMIZER_ESPEAK_LIBRARY=/path/to/libespeak-ng.dylib"
|
| )
|
|
|
|
|
| try:
|
| setup_espeak_library()
|
| phoneme_dict = load_phoneme_dict()
|
| normalizer = VietnameseTTSNormalizer()
|
| except Exception as e:
|
| print(f"Initialization error: {e}")
|
| raise
|
|
|
| def phonemize_text(text: str) -> str:
|
| """Convert text to phonemes using phonemizer."""
|
| text = normalizer.normalize(text)
|
| return phonemize(
|
| text,
|
| language="vi",
|
| backend="espeak",
|
| preserve_punctuation=True,
|
| with_stress=True,
|
| language_switch="remove-flags"
|
| )
|
|
|
| def phonemize_with_dict(text: str, phoneme_dict=phoneme_dict) -> str:
|
| """Phonemize text with dictionary lookup."""
|
| text = normalizer.normalize(text)
|
| words = text.split()
|
| result = []
|
|
|
| for word in words:
|
| if word in phoneme_dict:
|
| phone_word = phoneme_dict[word]
|
| else:
|
| try:
|
| phone_word = phonemize(
|
| word,
|
| language='vi',
|
| backend='espeak',
|
| preserve_punctuation=True,
|
| with_stress=True,
|
| language_switch='remove-flags'
|
| )
|
|
|
| if word.lower().startswith('r'):
|
| phone_word = 'ɹ' + phone_word[1:]
|
|
|
| phoneme_dict[word] = phone_word
|
| except Exception as e:
|
| print(f"Warning: Could not phonemize '{word}': {e}")
|
| phone_word = word
|
|
|
| result.append(phone_word)
|
|
|
| return ' '.join(result) |