nhantrungsp commited on
Commit
96cf6e9
·
verified ·
1 Parent(s): 474b8b5

Upload 4 files

Browse files
utils/__init__.py ADDED
File without changes
utils/normalize_text.py ADDED
@@ -0,0 +1,408 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ class VietnameseTTSNormalizer:
4
+ """
5
+ A text normalizer for Vietnamese Text-to-Speech systems.
6
+ Converts numbers, dates, units, and special characters into readable Vietnamese text.
7
+ """
8
+
9
+ def __init__(self):
10
+ self.units = {
11
+ 'km': 'ki lô mét', 'dm': 'đê xi mét', 'cm': 'xen ti mét',
12
+ 'mm': 'mi li mét', 'nm': 'na nô mét', 'µm': 'mic rô mét',
13
+ 'μm': 'mic rô mét', 'm': 'mét',
14
+
15
+ 'kg': 'ki lô gam', 'g': 'gam', 'mg': 'mi li gam',
16
+
17
+ 'km²': 'ki lô mét vuông', 'km2': 'ki lô mét vuông',
18
+ 'm²': 'mét vuông', 'm2': 'mét vuông',
19
+ 'cm²': 'xen ti mét vuông', 'cm2': 'xen ti mét vuông',
20
+ 'mm²': 'mi li mét vuông', 'mm2': 'mi li mét vuông',
21
+ 'ha': 'héc ta',
22
+
23
+ 'km³': 'ki lô mét khối', 'km3': 'ki lô mét khối',
24
+ 'm³': 'mét khối', 'm3': 'mét khối',
25
+ 'cm³': 'xen ti mét khối', 'cm3': 'xen ti mét khối',
26
+ 'mm³': 'mi li mét khối', 'mm3': 'mi li mét khối',
27
+ 'l': 'lít', 'dl': 'đê xi lít', 'ml': 'mi li lít', 'hl': 'héc tô lít',
28
+
29
+ 'v': 'vôn', 'kv': 'ki lô vôn', 'mv': 'mi li vôn',
30
+ 'a': 'am pe', 'ma': 'mi li am pe', 'ka': 'ki lô am pe',
31
+ 'w': 'oát', 'kw': 'ki lô oát', 'mw': 'mê ga oát', 'gw': 'gi ga oát',
32
+ 'kwh': 'ki lô oát giờ', 'mwh': 'mê ga oát giờ', 'wh': 'oát giờ',
33
+ 'ω': 'ôm', 'ohm': 'ôm', 'kω': 'ki lô ôm', 'mω': 'mê ga ôm',
34
+
35
+ 'hz': 'héc', 'khz': 'ki lô héc', 'mhz': 'mê ga héc', 'ghz': 'gi ga héc',
36
+
37
+ 'pa': 'pát cal', 'kpa': 'ki lô pát cal', 'mpa': 'mê ga pát cal',
38
+ 'bar': 'ba', 'mbar': 'mi li ba', 'atm': 'át mốt phia', 'psi': 'pi ét xai',
39
+
40
+ 'j': 'giun', 'kj': 'ki lô giun',
41
+ 'cal': 'ca lo', 'kcal': 'ki lô ca lo',
42
+ }
43
+
44
+ self.digits = ['không', 'một', 'hai', 'ba', 'bốn',
45
+ 'năm', 'sáu', 'bảy', 'tám', 'chín']
46
+
47
+ def normalize(self, text):
48
+ """Main normalization pipeline."""
49
+ text = text.lower()
50
+ text = self._normalize_temperature(text)
51
+ text = self._normalize_currency(text)
52
+ text = self._normalize_percentage(text)
53
+ text = self._normalize_units(text)
54
+ text = self._normalize_time(text)
55
+ text = self._normalize_date(text)
56
+ text = self._normalize_phone(text)
57
+ text = self._normalize_numbers(text)
58
+ text = self._number_to_words(text)
59
+ text = self._normalize_special_chars(text)
60
+ text = self._normalize_whitespace(text)
61
+ return text
62
+
63
+ def _normalize_temperature(self, text):
64
+ """Convert temperature notation to words."""
65
+ text = re.sub(r'-(\d+(?:[.,]\d+)?)\s*°\s*c\b', r'âm \1 độ xê', text, flags=re.IGNORECASE)
66
+ text = re.sub(r'-(\d+(?:[.,]\d+)?)\s*°\s*f\b', r'âm \1 độ ép', text, flags=re.IGNORECASE)
67
+ text = re.sub(r'(\d+(?:[.,]\d+)?)\s*°\s*c\b', r'\1 độ xê', text, flags=re.IGNORECASE)
68
+ text = re.sub(r'(\d+(?:[.,]\d+)?)\s*°\s*f\b', r'\1 độ ép', text, flags=re.IGNORECASE)
69
+ text = re.sub(r'°', ' độ ', text)
70
+ return text
71
+
72
+ def _normalize_currency(self, text):
73
+ """Convert currency notation to words."""
74
+ def decimal_currency(match):
75
+ whole = match.group(1)
76
+ decimal = match.group(2)
77
+ unit = match.group(3)
78
+ decimal_words = ' '.join([self.digits[int(d)] for d in decimal])
79
+ unit_map = {'k': 'nghìn', 'm': 'triệu', 'b': 'tỷ'}
80
+ unit_word = unit_map.get(unit.lower(), unit)
81
+ return f"{whole} phẩy {decimal_words} {unit_word}"
82
+
83
+ text = re.sub(r'(\d+)[.,](\d+)\s*([kmb])\b', decimal_currency, text, flags=re.IGNORECASE)
84
+ text = re.sub(r'(\d+)\s*k\b', r'\1 nghìn', text, flags=re.IGNORECASE)
85
+ text = re.sub(r'(\d+)\s*m\b', r'\1 triệu', text, flags=re.IGNORECASE)
86
+ text = re.sub(r'(\d+)\s*b\b', r'\1 tỷ', text, flags=re.IGNORECASE)
87
+ text = re.sub(r'(\d+(?:[.,]\d+)?)\s*đ\b', r'\1 đồng', text)
88
+ text = re.sub(r'(\d+(?:[.,]\d+)?)\s*vnd\b', r'\1 đồng', text, flags=re.IGNORECASE)
89
+ text = re.sub(r'\$\s*(\d+(?:[.,]\d+)?)', r'\1 đô la', text)
90
+ text = re.sub(r'(\d+(?:[.,]\d+)?)\s*\$', r'\1 đô la', text)
91
+ return text
92
+
93
+ def _normalize_percentage(self, text):
94
+ """Convert percentage to words."""
95
+ text = re.sub(r'(\d+(?:[.,]\d+)?)\s*%', r'\1 phần trăm', text)
96
+ return text
97
+
98
+ def _normalize_units(self, text):
99
+ """Convert measurement units to words."""
100
+ def expand_compound_with_number(match):
101
+ number = match.group(1)
102
+ unit1 = match.group(2).lower()
103
+ unit2 = match.group(3).lower()
104
+ full_unit1 = self.units.get(unit1, unit1)
105
+ full_unit2 = self.units.get(unit2, unit2)
106
+ return f"{number} {full_unit1} trên {full_unit2}"
107
+
108
+ def expand_compound_without_number(match):
109
+ unit1 = match.group(1).lower()
110
+ unit2 = match.group(2).lower()
111
+ full_unit1 = self.units.get(unit1, unit1)
112
+ full_unit2 = self.units.get(unit2, unit2)
113
+ return f"{full_unit1} trên {full_unit2}"
114
+
115
+ text = re.sub(r'(\d+(?:[.,]\d+)?)\s*([a-zA-Zμµ²³°]+)/([a-zA-Zμµ²³°0-9]+)\b',
116
+ expand_compound_with_number, text)
117
+ text = re.sub(r'\b([a-zA-Zμµ²³°]+)/([a-zA-Zμµ²³°0-9]+)\b',
118
+ expand_compound_without_number, text)
119
+
120
+ sorted_units = sorted(self.units.items(), key=lambda x: len(x[0]), reverse=True)
121
+ for unit, full_name in sorted_units:
122
+ pattern = r'(\d+(?:[.,]\d+)?)\s*' + re.escape(unit) + r'\b'
123
+ text = re.sub(pattern, rf'\1 {full_name}', text, flags=re.IGNORECASE)
124
+
125
+ for unit, full_name in sorted_units:
126
+ if any(c in unit for c in '²³°'):
127
+ pattern = r'\b' + re.escape(unit) + r'\b'
128
+ text = re.sub(pattern, full_name, text, flags=re.IGNORECASE)
129
+
130
+ return text
131
+
132
+ def _normalize_time(self, text):
133
+ """Convert time notation to words with validation."""
134
+
135
+ def validate_and_convert_time(match):
136
+ """Validate time components before converting."""
137
+ groups = match.groups()
138
+
139
+ # HH:MM:SS format
140
+ if len(groups) == 3:
141
+ hour, minute, second = groups
142
+ hour_int, minute_int, second_int = int(hour), int(minute), int(second)
143
+
144
+ # Validate ranges
145
+ if not (0 <= hour_int <= 23):
146
+ return match.group(0) # Return original if invalid
147
+ if not (0 <= minute_int <= 59):
148
+ return match.group(0)
149
+ if not (0 <= second_int <= 59):
150
+ return match.group(0)
151
+
152
+ return f"{hour} giờ {minute} phút {second} giây"
153
+
154
+ # HH:MM or HHhMM format
155
+ elif len(groups) == 2:
156
+ hour, minute = groups
157
+ hour_int, minute_int = int(hour), int(minute)
158
+
159
+ # Validate ranges
160
+ if not (0 <= hour_int <= 23):
161
+ return match.group(0)
162
+ if not (0 <= minute_int <= 59):
163
+ return match.group(0)
164
+
165
+ return f"{hour} giờ {minute} phút"
166
+
167
+ # HHh format
168
+ else:
169
+ hour = groups[0]
170
+ hour_int = int(hour)
171
+
172
+ if not (0 <= hour_int <= 23):
173
+ return match.group(0)
174
+
175
+ return f"{hour} giờ"
176
+
177
+ # Apply patterns with validation
178
+ text = re.sub(r'(\d{1,2}):(\d{2}):(\d{2})', validate_and_convert_time, text)
179
+ text = re.sub(r'(\d{1,2}):(\d{2})', validate_and_convert_time, text)
180
+ text = re.sub(r'(\d{1,2})h(\d{2})', validate_and_convert_time, text)
181
+ text = re.sub(r'(\d{1,2})h\b', validate_and_convert_time, text)
182
+
183
+ return text
184
+
185
+ def _normalize_date(self, text):
186
+ """Convert date notation to words with validation."""
187
+
188
+ def is_valid_date(day, month, year):
189
+ """Check if date components are valid."""
190
+ day, month, year = int(day), int(month), int(year)
191
+
192
+ # Basic range checks
193
+ if not (1 <= day <= 31):
194
+ return False
195
+ if not (1 <= month <= 12):
196
+ return False
197
+
198
+ return True
199
+
200
+ def date_to_text(match):
201
+ day, month, year = match.groups()
202
+ if is_valid_date(day, month, year):
203
+ return f"ngày {day} tháng {month} năm {year}"
204
+ return match.group(0) # Return original if invalid
205
+
206
+ def date_iso_to_text(match):
207
+ year, month, day = match.groups()
208
+ if is_valid_date(day, month, year):
209
+ return f"ngày {day} tháng {month} năm {year}"
210
+ return match.group(0)
211
+
212
+ def date_short_year(match):
213
+ day, month, year = match.groups()
214
+ full_year = f"20{year}" if int(year) < 50 else f"19{year}"
215
+ if is_valid_date(day, month, full_year):
216
+ return f"ngày {day} tháng {month} năm {full_year}"
217
+ return match.group(0)
218
+
219
+ # Apply patterns with validation
220
+ text = re.sub(r'\bngày\s+(\d{1,2})[/\-](\d{1,2})[/\-](\d{4})\b',
221
+ lambda m: date_to_text(m).replace('ngày ngày', 'ngày'), text)
222
+ text = re.sub(r'\bngày\s+(\d{1,2})[/\-](\d{1,2})[/\-](\d{2})\b',
223
+ lambda m: date_short_year(m).replace('ngày ngày', 'ngày'), text)
224
+ text = re.sub(r'\b(\d{4})-(\d{1,2})-(\d{1,2})\b', date_iso_to_text, text)
225
+ text = re.sub(r'\b(\d{1,2})[/\-](\d{1,2})[/\-](\d{4})\b', date_to_text, text)
226
+ text = re.sub(r'\b(\d{1,2})[/\-](\d{1,2})[/\-](\d{2})\b', date_short_year, text)
227
+
228
+ return text
229
+
230
+ def _normalize_phone(self, text):
231
+ """Convert phone numbers to digit-by-digit reading."""
232
+ def phone_to_text(match):
233
+ phone = match.group(0)
234
+ phone = re.sub(r'[^\d]', '', phone)
235
+
236
+ if phone.startswith('84') and len(phone) >= 10:
237
+ phone = '0' + phone[2:]
238
+
239
+ if 10 <= len(phone) <= 11:
240
+ words = [self.digits[int(d)] for d in phone]
241
+ return ' '.join(words) + ' '
242
+
243
+ return match.group(0)
244
+
245
+ text = re.sub(r'(\+84|84)[\s\-\.]?\d[\d\s\-\.]{7,}', phone_to_text, text)
246
+ text = re.sub(r'\b0\d[\d\s\-\.]{8,}', phone_to_text, text)
247
+ return text
248
+
249
+ def _normalize_numbers(self, text):
250
+ text = re.sub(r'(\d+(?:[,.]\d+)?)%', lambda m: f'{m.group(1)} phần trăm', text)
251
+ # 1. Xóa dấu thousand separator trước
252
+ text = re.sub(r'(\d{1,3})(?:\.(\d{3}))+', lambda m: m.group(0).replace('.', ''), text)
253
+
254
+ # 2. Chuyển số thập phân thành chữ
255
+ def decimal_to_words(match):
256
+ whole = match.group(1)
257
+ decimal = match.group(2)
258
+ decimal_words = ' '.join([self.digits[int(d)] for d in decimal])
259
+ separator = 'phẩy' if ',' in match.group(0) else 'chấm'
260
+ return f"{whole} {separator} {decimal_words}"
261
+
262
+ # 2a. Dấu phẩy
263
+ text = re.sub(r'(\d+),(\d+)', decimal_to_words, text)
264
+ # 2b. Dấu chấm (1-2 chữ số thập phân)
265
+ text = re.sub(r'(\d+)\.(\d{1,2})\b', decimal_to_words, text)
266
+
267
+ return text
268
+
269
+ def _read_two_digits(self, n):
270
+ """Read two-digit numbers in Vietnamese."""
271
+ if n < 10:
272
+ return self.digits[n]
273
+ elif n == 10:
274
+ return "mười"
275
+ elif n < 20:
276
+ if n == 15:
277
+ return "mười lăm"
278
+ return f"mười {self.digits[n % 10]}"
279
+ else:
280
+ tens = n // 10
281
+ ones = n % 10
282
+ if ones == 0:
283
+ return f"{self.digits[tens]} mươi"
284
+ elif ones == 1:
285
+ return f"{self.digits[tens]} mươi mốt"
286
+ elif ones == 5:
287
+ return f"{self.digits[tens]} mươi lăm"
288
+ else:
289
+ return f"{self.digits[tens]} mươi {self.digits[ones]}"
290
+
291
+ def _read_three_digits(self, n):
292
+ """Read three-digit numbers in Vietnamese."""
293
+ if n < 100:
294
+ return self._read_two_digits(n)
295
+
296
+ hundreds = n // 100
297
+ remainder = n % 100
298
+ result = f"{self.digits[hundreds]} trăm"
299
+
300
+ if remainder == 0:
301
+ return result
302
+ elif remainder < 10:
303
+ result += f" lẻ {self.digits[remainder]}"
304
+ else:
305
+ result += f" {self._read_two_digits(remainder)}"
306
+
307
+ return result
308
+
309
+ def _convert_number_to_words(self, num):
310
+ """Convert a number to Vietnamese words."""
311
+ if num == 0:
312
+ return "không"
313
+
314
+ if num < 0:
315
+ return f"âm {self._convert_number_to_words(-num)}"
316
+
317
+ if num >= 1000000000:
318
+ billion = num // 1000000000
319
+ remainder = num % 1000000000
320
+ result = f"{self._read_three_digits(billion)} tỷ"
321
+ if remainder > 0:
322
+ result += f" {self._convert_number_to_words(remainder)}"
323
+ return result
324
+
325
+ elif num >= 1000000:
326
+ million = num // 1000000
327
+ remainder = num % 1000000
328
+ result = f"{self._read_three_digits(million)} triệu"
329
+ if remainder > 0:
330
+ result += f" {self._convert_number_to_words(remainder)}"
331
+ return result
332
+
333
+ elif num >= 1000:
334
+ thousand = num // 1000
335
+ remainder = num % 1000
336
+ result = f"{self._read_three_digits(thousand)} nghìn"
337
+ if remainder > 0:
338
+ if remainder < 100:
339
+ result += f" không trăm {self._read_two_digits(remainder)}"
340
+ else:
341
+ result += f" {self._read_three_digits(remainder)}"
342
+ return result
343
+
344
+ else:
345
+ return self._read_three_digits(num)
346
+
347
+ def _number_to_words(self, text):
348
+ """Convert all remaining numbers to words."""
349
+ def convert_number(match):
350
+ num = int(match.group(0))
351
+ return self._convert_number_to_words(num)
352
+
353
+ text = re.sub(r'\b\d+\b', convert_number, text)
354
+ return text
355
+
356
+ def _normalize_special_chars(self, text):
357
+ """Handle special characters."""
358
+ text = text.replace('&', ' và ')
359
+ text = text.replace('+', ' cộng ')
360
+ text = text.replace('=', ' bằng ')
361
+ text = text.replace('#', ' thăng ')
362
+ text = re.sub(r'[\[\]\(\)\{\}]', ' ', text)
363
+ text = re.sub(r'\s+[-–—]+\s+', ' ', text)
364
+ text = re.sub(r'\.{2,}', ' ', text)
365
+ text = re.sub(r'\s+\.\s+', ' ', text)
366
+ text = re.sub(r'[^\w\sàáảãạăắằẳẵặâấầẩẫậèéẻẽẹêếềểễệìíỉĩịòóỏõọôốồổỗộơớờởỡợùúủũụưứừửữựỳýỷỹỵđ.,!?;:@%]', ' ', text)
367
+ return text
368
+
369
+ def _normalize_whitespace(self, text):
370
+ """Normalize whitespace."""
371
+ text = re.sub(r'\s+', ' ', text)
372
+ text = text.strip()
373
+ return text
374
+
375
+
376
+ if __name__ == "__main__":
377
+ normalizer = VietnameseTTSNormalizer()
378
+
379
+ test_texts = [
380
+ "Giá 2.500.000đ (giảm 50%), mua trước 14h30 ngày 15/12/2025",
381
+ "Liên hệ: 0912-345-678 hoặc email@example.com",
382
+ "Tốc độ 120km/h, trọng lượng 75kg",
383
+ "Nhiệt độ 36,5°C, độ ẩm 80%",
384
+ "Số pi = 3,14159",
385
+ "Giá trị tăng 2.5M, đạt 10B",
386
+ "Nhiệt độ -15°C vào mùa đông",
387
+ "Điện áp 220V, công suất 2.5kW, tần số 50Hz",
388
+ "Tôi đi lấy l nước về nhà",
389
+ "Cần 5l nước cho công thức này",
390
+ "Vận tốc ánh sáng 299792km/s",
391
+ "Mật độ dân số 450 người/km2",
392
+ "Công suất 100 W/m2",
393
+ "Hôm nay 2025-01-15",
394
+ "Gọi +84 912 345 678",
395
+ "Nhiệt độ 25°C lúc 14:30:45",
396
+ "Ngày 15/12/25",
397
+ "Giá 3.140.159",
398
+ ]
399
+
400
+ print("=" * 80)
401
+ print("VIETNAMESE TTS NORMALIZATION TEST")
402
+ print("=" * 80)
403
+
404
+ for text in test_texts:
405
+ print(f"\n📝 Input: {text}")
406
+ normalized = normalizer.normalize(text)
407
+ print(f"🎵 Output: {normalized}")
408
+ print("-" * 80)
utils/phoneme_dict.json ADDED
The diff for this file is too large to render. See raw diff
 
utils/phonemize_text.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import platform
4
+ import glob
5
+ from phonemizer import phonemize
6
+ from phonemizer.backend.espeak.espeak import EspeakWrapper
7
+ from utils.normalize_text import VietnameseTTSNormalizer
8
+
9
+ # Configuration
10
+ PHONEME_DICT_PATH = os.getenv(
11
+ 'PHONEME_DICT_PATH',
12
+ os.path.join(os.path.dirname(__file__), "phoneme_dict.json")
13
+ )
14
+
15
+ def load_phoneme_dict(path=PHONEME_DICT_PATH):
16
+ """Load phoneme dictionary from JSON file."""
17
+ try:
18
+ with open(path, "r", encoding="utf-8") as f:
19
+ return json.load(f)
20
+ except FileNotFoundError:
21
+ raise FileNotFoundError(
22
+ f"Phoneme dictionary not found at {path}. "
23
+ "Please create it or set PHONEME_DICT_PATH environment variable."
24
+ )
25
+
26
+ def setup_espeak_library():
27
+ """Configure eSpeak library path based on operating system."""
28
+ system = platform.system()
29
+
30
+ if system == "Windows":
31
+ _setup_windows_espeak()
32
+ elif system == "Linux":
33
+ _setup_linux_espeak()
34
+ elif system == "Darwin":
35
+ _setup_macos_espeak()
36
+ else:
37
+ raise OSError(
38
+ f"Unsupported OS: {system}. "
39
+ "Only Windows, Linux, and macOS are supported."
40
+ )
41
+
42
+ def _setup_windows_espeak():
43
+ """Setup eSpeak for Windows."""
44
+ default_path = r"C:\Program Files\eSpeak NG\libespeak-ng.dll"
45
+ if os.path.exists(default_path):
46
+ EspeakWrapper.set_library(default_path)
47
+ else:
48
+ raise FileNotFoundError(
49
+ f"eSpeak library not found at {default_path}. "
50
+ "Please install eSpeak NG from: https://github.com/espeak-ng/espeak-ng/releases"
51
+ )
52
+
53
+ def _setup_linux_espeak():
54
+ """Setup eSpeak for Linux."""
55
+ search_patterns = [
56
+ "/usr/lib/x86_64-linux-gnu/libespeak-ng.so*",
57
+ "/usr/lib/x86_64-linux-gnu/libespeak.so*",
58
+ "/usr/lib/libespeak-ng.so*",
59
+ "/usr/lib64/libespeak-ng.so*",
60
+ "/usr/local/lib/libespeak-ng.so*",
61
+ ]
62
+
63
+ for pattern in search_patterns:
64
+ matches = glob.glob(pattern)
65
+ if matches:
66
+ EspeakWrapper.set_library(sorted(matches, key=len)[0])
67
+ return
68
+
69
+ raise RuntimeError(
70
+ "eSpeak NG library not found. Install with:\n"
71
+ " Ubuntu/Debian: sudo apt-get install espeak-ng\n"
72
+ " Fedora: sudo dnf install espeak-ng\n"
73
+ " Arch: sudo pacman -S espeak-ng\n"
74
+ "See: https://github.com/pnnbao97/VieNeu-TTS/issues/5"
75
+ )
76
+
77
+ def _setup_macos_espeak():
78
+ """Setup eSpeak for macOS."""
79
+ espeak_lib = os.environ.get('PHONEMIZER_ESPEAK_LIBRARY')
80
+
81
+ paths_to_check = [
82
+ espeak_lib,
83
+ "/opt/homebrew/lib/libespeak-ng.dylib", # Apple Silicon
84
+ "/usr/local/lib/libespeak-ng.dylib", # Intel
85
+ "/opt/local/lib/libespeak-ng.dylib", # MacPorts
86
+ ]
87
+
88
+ for path in paths_to_check:
89
+ if path and os.path.exists(path):
90
+ EspeakWrapper.set_library(path)
91
+ return
92
+
93
+ raise FileNotFoundError(
94
+ "eSpeak library not found. Install with:\n"
95
+ " brew install espeak-ng\n"
96
+ "Or set: export PHONEMIZER_ESPEAK_LIBRARY=/path/to/libespeak-ng.dylib"
97
+ )
98
+
99
+ # Initialize
100
+ try:
101
+ setup_espeak_library()
102
+ phoneme_dict = load_phoneme_dict()
103
+ normalizer = VietnameseTTSNormalizer()
104
+ except Exception as e:
105
+ print(f"Initialization error: {e}")
106
+ raise
107
+
108
+ def phonemize_text(text: str) -> str:
109
+ """Convert text to phonemes using phonemizer."""
110
+ text = normalizer.normalize(text)
111
+ return phonemize(
112
+ text,
113
+ language="vi",
114
+ backend="espeak",
115
+ preserve_punctuation=True,
116
+ with_stress=True,
117
+ language_switch="remove-flags"
118
+ )
119
+
120
+ def phonemize_with_dict(text: str, phoneme_dict=phoneme_dict) -> str:
121
+ """Phonemize text with dictionary lookup."""
122
+ text = normalizer.normalize(text)
123
+ words = text.split()
124
+ result = []
125
+
126
+ for word in words:
127
+ if word in phoneme_dict:
128
+ phone_word = phoneme_dict[word]
129
+ else:
130
+ try:
131
+ phone_word = phonemize(
132
+ word,
133
+ language='vi',
134
+ backend='espeak',
135
+ preserve_punctuation=True,
136
+ with_stress=True,
137
+ language_switch='remove-flags'
138
+ )
139
+
140
+ if word.lower().startswith('r'):
141
+ phone_word = 'ɹ' + phone_word[1:]
142
+
143
+ phoneme_dict[word] = phone_word
144
+ except Exception as e:
145
+ print(f"Warning: Could not phonemize '{word}': {e}")
146
+ phone_word = word
147
+
148
+ result.append(phone_word)
149
+
150
+ return ' '.join(result)