abreza commited on
Commit
3784ae7
·
1 Parent(s): 766414c

fix: persian number and phone number and fix parallel-wavegan installation

Browse files
Files changed (6) hide show
  1. app.py +10 -6
  2. persian_numbers.py +295 -0
  3. requirements.txt +3 -4
  4. setup.py +16 -2
  5. synthesis.py +2 -11
  6. text_utils.py +0 -84
app.py CHANGED
@@ -1,33 +1,37 @@
1
  import os
2
  import warnings
3
  from config import models_path, results_path, sample_path
4
- from setup import setup_environment
5
  from synthesis import load_models
6
  from interface import create_interface
7
 
8
  warnings.filterwarnings("ignore")
9
 
 
10
  def main():
11
  os.makedirs(models_path, exist_ok=True)
12
  os.makedirs(results_path, exist_ok=True)
13
-
 
 
14
  if (not os.path.exists(os.path.join(models_path, 'encoder.pt')) or
15
  not os.path.exists(os.path.join(models_path, 'synthesizer.pt')) or
16
  not os.path.exists(os.path.join(models_path, 'vocoder_HiFiGAN.pkl')) or
17
- not os.path.exists(sample_path)):
18
  setup_success = setup_environment()
19
  if not setup_success:
20
  print("Setup failed. Exiting.")
21
  exit(1)
22
  print("Setup completed successfully.")
23
-
24
  load_success = load_models()
25
  if not load_success:
26
  print("Failed to load models. Exiting.")
27
  exit(1)
28
-
29
  demo = create_interface()
30
  demo.launch()
31
 
 
32
  if __name__ == "__main__":
33
- main()
 
1
  import os
2
  import warnings
3
  from config import models_path, results_path, sample_path
4
+ from setup import setup_environment, install_dependencies
5
  from synthesis import load_models
6
  from interface import create_interface
7
 
8
  warnings.filterwarnings("ignore")
9
 
10
+
11
  def main():
12
  os.makedirs(models_path, exist_ok=True)
13
  os.makedirs(results_path, exist_ok=True)
14
+
15
+ install_dependencies()
16
+
17
  if (not os.path.exists(os.path.join(models_path, 'encoder.pt')) or
18
  not os.path.exists(os.path.join(models_path, 'synthesizer.pt')) or
19
  not os.path.exists(os.path.join(models_path, 'vocoder_HiFiGAN.pkl')) or
20
+ not os.path.exists(sample_path)):
21
  setup_success = setup_environment()
22
  if not setup_success:
23
  print("Setup failed. Exiting.")
24
  exit(1)
25
  print("Setup completed successfully.")
26
+
27
  load_success = load_models()
28
  if not load_success:
29
  print("Failed to load models. Exiting.")
30
  exit(1)
31
+
32
  demo = create_interface()
33
  demo.launch()
34
 
35
+
36
  if __name__ == "__main__":
37
+ main()
persian_numbers.py ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+
4
+ DIGITS_MAP = {
5
+ '0': 'صِفر', '1': 'یک', '2': 'دو', '3': 'سه', '4': 'چهار',
6
+ '5': 'پنج', '6': 'شِش', '7': 'هفت', '8': 'هشت', '9': 'نُه'
7
+ }
8
+
9
+ TENS = {
10
+ 10: 'دَه', 11: 'یازده', 12: 'دوازده', 13: 'سیزده', 14: 'چهارده',
11
+ 15: 'پانزده', 16: 'شانزده', 17: 'هفده', 18: 'هجده', 19: 'نوزده',
12
+ 20: 'بیست', 30: 'سی', 40: 'چهل', 50: 'پنجاه',
13
+ 60: 'شصت', 70: 'هفتاد', 80: 'هشتاد', 90: 'نود'
14
+ }
15
+
16
+ HUNDREDS = {
17
+ 100: 'صَد', 200: 'دویست', 300: 'سیصد', 400: 'چهارصد', 500: 'پانصد',
18
+ 600: 'ششصد', 700: 'هفتصد', 800: 'هشتصد', 900: 'نهصد'
19
+ }
20
+
21
+
22
+ def _convert_three_digit(num: int) -> str:
23
+ if num == 0:
24
+ return ''
25
+
26
+ if num < 10:
27
+ return DIGITS_MAP[str(num)]
28
+ elif num < 20:
29
+ return TENS[num]
30
+ elif num < 100:
31
+ tens_part = (num // 10) * 10
32
+ ones_part = num % 10
33
+ if ones_part == 0:
34
+ return TENS[tens_part]
35
+ return f"{TENS[tens_part]} و {DIGITS_MAP[str(ones_part)]}"
36
+ else:
37
+ hundreds_part = (num // 100) * 100
38
+ rem = num % 100
39
+ if rem == 0:
40
+ return HUNDREDS[hundreds_part]
41
+ return f"{HUNDREDS[hundreds_part]} و {_convert_three_digit(rem)}"
42
+
43
+
44
+ def num_to_text(num: int) -> str:
45
+ if num == 0:
46
+ return 'صِفر'
47
+
48
+ if num < 0:
49
+ return f"مَنفی {num_to_text(abs(num))}"
50
+
51
+ if num < 1000:
52
+ return _convert_three_digit(num)
53
+
54
+ parts = []
55
+
56
+ if num >= 1_000_000_000:
57
+ billions = num // 1_000_000_000
58
+ parts.append(f"{_convert_three_digit(billions)} میلیارد")
59
+ num %= 1_000_000_000
60
+
61
+ if num >= 1_000_000:
62
+ millions = num // 1_000_000
63
+ parts.append(f"{_convert_three_digit(millions)} میلیون")
64
+ num %= 1_000_000
65
+
66
+ if num >= 1000:
67
+ thousands = num // 1000
68
+ parts.append(f"{_convert_three_digit(thousands)} هزار")
69
+ num %= 1000
70
+
71
+ if num > 0:
72
+ parts.append(_convert_three_digit(num))
73
+
74
+ return ' و '.join(parts)
75
+
76
+
77
+ def _read_phone_chunk(chunk: str) -> str:
78
+ if not chunk:
79
+ return ""
80
+
81
+ if all(c == '0' for c in chunk):
82
+ count = len(chunk)
83
+ if count == 2:
84
+ return "دو صِفر"
85
+ elif count == 3:
86
+ return "سِِتا صفر"
87
+ elif count == 4:
88
+ return "چهارتا صفر"
89
+ else:
90
+ return f"{num_to_text(count)} تا صِفر"
91
+
92
+ result_parts = []
93
+ temp_chunk = chunk
94
+
95
+ while temp_chunk.startswith('0'):
96
+ result_parts.append("صِفر")
97
+ temp_chunk = temp_chunk[1:]
98
+
99
+ if temp_chunk:
100
+ val = int(temp_chunk)
101
+ result_parts.append(num_to_text(val))
102
+
103
+ return " ".join(result_parts)
104
+
105
+
106
+ def _smart_split_phone(phone_str: str, has_plus: bool = False) -> list:
107
+ length = len(phone_str)
108
+ chunks = []
109
+
110
+ if has_plus:
111
+ if phone_str.startswith('98') and len(phone_str) > 5:
112
+ chunks.append("+" + phone_str[:2])
113
+ rest = phone_str[2:]
114
+ if rest.startswith('9'):
115
+
116
+ inner_chunks = _smart_split_phone("0" + rest)
117
+ chunks.extend(inner_chunks)
118
+ return chunks
119
+ else:
120
+ chunks.append(rest)
121
+ return chunks
122
+
123
+ elif phone_str.startswith('1') and length == 11:
124
+ chunks.append("+" + phone_str[:1])
125
+ chunks.append(phone_str[1:4])
126
+ chunks.append(phone_str[4:7])
127
+ chunks.append(phone_str[7:])
128
+ return chunks
129
+
130
+ if phone_str.startswith('09') and length == 11:
131
+ chunks.append(phone_str[:4])
132
+ rest = phone_str[4:]
133
+
134
+ part_mid = rest[:3]
135
+ part_end = rest[3:]
136
+
137
+ is_end_round = False
138
+ if part_end == '0000':
139
+ is_end_round = True
140
+ elif part_end.endswith('00'):
141
+ is_end_round = True
142
+ elif part_end[1] == '0' and part_end[2] == '0':
143
+ is_end_round = True
144
+ if part_mid == '000':
145
+ is_end_round = True
146
+
147
+ if is_end_round:
148
+ chunks.append(part_mid)
149
+ chunks.append(part_end)
150
+ else:
151
+ chunks.append(rest[:3])
152
+ chunks.append(rest[3:5])
153
+ chunks.append(rest[5:])
154
+ return chunks
155
+
156
+ if phone_str.startswith('0') and length == 11:
157
+ chunks.append(phone_str[:3])
158
+ rest = phone_str[3:]
159
+
160
+ part1 = rest[:4]
161
+ part2 = rest[4:]
162
+
163
+ if (part1.endswith('00') and part2.endswith('00')) or (part2 == '0000'):
164
+ chunks.append(part1)
165
+ chunks.append(part2)
166
+ return chunks
167
+
168
+ p3_1 = rest[:3]
169
+ p3_2 = rest[3:6]
170
+ if p3_1.endswith('0') and p3_2.endswith('0'):
171
+ chunks.append(p3_1)
172
+ chunks.append(p3_2)
173
+ chunks.append(rest[6:])
174
+ return chunks
175
+
176
+ chunks.append(rest[:2])
177
+ chunks.append(rest[2:4])
178
+ chunks.append(rest[4:6])
179
+ chunks.append(rest[6:])
180
+ return chunks
181
+
182
+ if not phone_str.startswith('0'):
183
+ if length == 8:
184
+ chunks.append(phone_str[:2])
185
+ chunks.append(phone_str[2:4])
186
+ chunks.append(phone_str[4:6])
187
+ chunks.append(phone_str[6:])
188
+ return chunks
189
+ elif length == 4:
190
+ chunks.append(phone_str)
191
+ return chunks
192
+ elif length == 5:
193
+ chunks.append(phone_str)
194
+ return chunks
195
+
196
+ if length == 10 and phone_str.startswith('9'):
197
+ chunks.append(phone_str[:3])
198
+ chunks.append(phone_str[3:6])
199
+ chunks.append(phone_str[6:8])
200
+ chunks.append(phone_str[8:])
201
+ return chunks
202
+
203
+ return [phone_str]
204
+
205
+
206
+ def phone_to_text(raw_input: str) -> str:
207
+ clean_input = raw_input.replace(' ', '').replace(
208
+ '-', '').replace('(', '').replace(')', '')
209
+
210
+ persian_digits = '۰۱۲۳۴۵۶۷۸۹'
211
+ english_digits = '0123456789'
212
+ trans_table = str.maketrans(persian_digits, english_digits)
213
+ clean_input = clean_input.translate(trans_table)
214
+
215
+ has_plus = False
216
+ if clean_input.startswith('+'):
217
+ has_plus = True
218
+ clean_input = clean_input[1:]
219
+
220
+ if not clean_input.isdigit():
221
+ return raw_input
222
+
223
+ chunks = _smart_split_phone(clean_input, has_plus)
224
+
225
+ text_parts = []
226
+ for ch in chunks:
227
+ if ch.startswith('+'):
228
+ val = int(ch[1:])
229
+ text_parts.append(f"مثبت {num_to_text(val)}")
230
+ else:
231
+ text_parts.append(_read_phone_chunk(ch))
232
+
233
+ return "، ".join(text_parts)
234
+
235
+
236
+ def _is_likely_phone(num_str: str) -> bool:
237
+ if num_str.startswith('+'):
238
+ return True
239
+
240
+ if num_str.startswith('09') and len(num_str) == 11:
241
+ return True
242
+
243
+ if num_str.startswith('0') and len(num_str) >= 7:
244
+ return True
245
+
246
+ return False
247
+
248
+
249
+ def find_and_normalize_numbers(text: str) -> str:
250
+ text = text.translate(str.maketrans('٠١٢٣٤٥٦٧٨٩', '0123456789'))\
251
+ .translate(str.maketrans('۰۱۲۳۴۵۶۷۸۹', '0123456789'))
252
+
253
+ pattern = r'(?:\+|-)?\d+(?:[,\-]\d+)*'
254
+
255
+ def replace_match(match):
256
+ original_str = match.group()
257
+ clean_str = original_str.replace(',', '')
258
+
259
+ if _is_likely_phone(clean_str):
260
+ return phone_to_text(clean_str)
261
+ else:
262
+ try:
263
+ val = int(clean_str)
264
+ return num_to_text(val)
265
+ except ValueError:
266
+ return original_str
267
+
268
+ return re.sub(pattern, replace_match, text)
269
+
270
+
271
+ if __name__ == "__main__":
272
+ examples = [
273
+
274
+ "شماره من ۰۹۱۲۳۴۵۶۷۸۹ است",
275
+ "تلفن شرکت ۰۲۱۸۸۰۵۶۰۷۰ می باشد",
276
+ "کد تایید: ۸۸۹۹۱۱۰۰",
277
+ "تماس بین المللی: +۹۸۹۱۵۱۰۰۲۰۳۰",
278
+ "شارژ مستقیم ۰۹۳۵۲۰۰۳۰۴۰",
279
+ "کد پستی ۱۱۱۱۱۰۰۰۰۰",
280
+ "و با تلفن ۰۲۱-۸۸۸۰۳۳۵۴ تماس بگیرید",
281
+
282
+
283
+ "قیمت این کالا ۵,۴۰۰ تومان است",
284
+ "جمعیت ایران ۸۵۰۰۰۰۰۰ نفر است",
285
+ "دمای هوا منفی ۵ درجه است: -5",
286
+ "تعداد ۱۰۰۱ شب",
287
+ "عدد صفر 0"
288
+ ]
289
+
290
+ print("--- بررسی عملکرد کد ادغام شده ---\n")
291
+ for ex in examples:
292
+ converted = find_and_normalize_numbers(ex)
293
+ print(f"Original: {ex}")
294
+ print(f"Converted: {converted}")
295
+ print("-" * 30)
requirements.txt CHANGED
@@ -5,8 +5,6 @@ soundfile
5
  spaces
6
  requests
7
  gdown
8
- parallel_wavegan
9
-
10
  inflect
11
  librosa
12
  matplotlib
@@ -15,7 +13,8 @@ tqdm
15
  Unidecode
16
  visdom
17
  webrtcvad
18
-
19
- unidecode
20
  transformers
21
  nltk
 
 
 
 
5
  spaces
6
  requests
7
  gdown
 
 
8
  inflect
9
  librosa
10
  matplotlib
 
13
  Unidecode
14
  visdom
15
  webrtcvad
 
 
16
  transformers
17
  nltk
18
+ PyYAML
19
+ tensorboardX
20
+ h5py
setup.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import requests
3
  import tarfile
4
  import gdown
@@ -38,9 +39,22 @@ def download_file(url, destination):
38
  return False
39
 
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  def setup_environment():
42
  print("Setting up the environment for Persian TTS...")
43
-
44
  BASE_DIR_PATH = Path(BASE_DIR)
45
  MODEL_DIR = BASE_DIR_PATH / "saved_models" / "final_models"
46
  RESULTS_DIR = BASE_DIR_PATH / "results"
@@ -49,7 +63,7 @@ def setup_environment():
49
  os.makedirs(RESULTS_DIR, exist_ok=True)
50
 
51
  tacotron_repo = BASE_DIR_PATH / "pmt2"
52
-
53
  encoder_file = tacotron_repo / "saved_models" / "default" / "encoder.pt"
54
  if not os.path.exists(encoder_file):
55
  default_model_dir = tacotron_repo / "saved_models" / "default"
 
1
  import os
2
+ import sys
3
  import requests
4
  import tarfile
5
  import gdown
 
39
  return False
40
 
41
 
42
+ def install_dependencies():
43
+ print("Checking runtime dependencies...")
44
+ try:
45
+ import parallel_wavegan
46
+ print("✓ parallel-wavegan is already installed.")
47
+ except ImportError:
48
+ print("Installing parallel-wavegan with --no-build-isolation...")
49
+ cmd = f"{sys.executable} -m pip install parallel-wavegan>=0.5.4 --no-build-isolation"
50
+ success = run_command(cmd)
51
+ if not success:
52
+ print("WARNING: Failed to install parallel-wavegan. TTS might fail.")
53
+
54
+
55
  def setup_environment():
56
  print("Setting up the environment for Persian TTS...")
57
+
58
  BASE_DIR_PATH = Path(BASE_DIR)
59
  MODEL_DIR = BASE_DIR_PATH / "saved_models" / "final_models"
60
  RESULTS_DIR = BASE_DIR_PATH / "results"
 
63
  os.makedirs(RESULTS_DIR, exist_ok=True)
64
 
65
  tacotron_repo = BASE_DIR_PATH / "pmt2"
66
+
67
  encoder_file = tacotron_repo / "saved_models" / "default" / "encoder.pt"
68
  if not os.path.exists(encoder_file):
69
  default_model_dir = tacotron_repo / "saved_models" / "default"
synthesis.py CHANGED
@@ -7,7 +7,7 @@ import soundfile as sf
7
  import spaces
8
  from config import models_path, results_path, sample_path, BASE_DIR
9
  from sentence_splitter import PersianSentenceSplitter
10
- from text_utils import convert_number_to_text
11
 
12
  encoder = None
13
  synthesizer = None
@@ -56,16 +56,7 @@ def normalize_text_for_synthesis(text: str) -> str:
56
  text = re.sub(r'\s+', ' ', text)
57
  text = text.strip()
58
 
59
- number_pattern = r'[۰-۹0-9٠-٩]+(?:[,،٬][۰-۹0-9٠-٩]+)*'
60
-
61
- def replace_number(match):
62
- num_str = match.group(0)
63
- try:
64
- return convert_number_to_text(num_str)
65
- except:
66
- return num_str
67
-
68
- text = re.sub(number_pattern, replace_number, text)
69
 
70
  return text
71
 
 
7
  import spaces
8
  from config import models_path, results_path, sample_path, BASE_DIR
9
  from sentence_splitter import PersianSentenceSplitter
10
+ from persian_numbers import find_and_normalize_numbers
11
 
12
  encoder = None
13
  synthesizer = None
 
56
  text = re.sub(r'\s+', ' ', text)
57
  text = text.strip()
58
 
59
+ text = find_and_normalize_numbers(text)
 
 
 
 
 
 
 
 
 
60
 
61
  return text
62
 
text_utils.py DELETED
@@ -1,84 +0,0 @@
1
- PERSIAN_DIGITS = {
2
- '۰': 'صفر', '۱': 'یک', '۲': 'دو', '۳': 'سه', '۴': 'چهار',
3
- '۵': 'پنج', '۶': 'شش', '۷': 'هفت', '۸': 'هشت', '۹': 'نه',
4
- '0': 'صفر', '1': 'یک', '2': 'دو', '3': 'سه', '4': 'چهار',
5
- '5': 'پنج', '6': 'شش', '7': 'هفت', '8': 'هشت', '9': 'نه'
6
- }
7
-
8
- PERSIAN_NUMBERS = {
9
- 10: 'ده', 11: 'یازده', 12: 'دوازده', 13: 'سیزده', 14: 'چهارده',
10
- 15: 'پانزده', 16: 'شانزده', 17: 'هفده', 18: 'هجده', 19: 'نوزده',
11
- 20: 'بیست', 30: 'سی', 40: 'چهل', 50: 'پنجاه',
12
- 60: 'شصت', 70: 'هفتاد', 80: 'هشتاد', 90: 'نود',
13
- 100: 'صد', 200: 'دویست', 300: 'سیصد', 400: 'چهارصد', 500: 'پانصد',
14
- 600: 'ششصد', 700: 'هفتصد', 800: 'هشتصد', 900: 'نهصد'
15
- }
16
-
17
-
18
- def convert_three_digit(num: int) -> str:
19
- if num == 0:
20
- return ''
21
-
22
- if num < 10:
23
- return PERSIAN_DIGITS[str(num)]
24
- elif num < 20:
25
- return PERSIAN_NUMBERS[num]
26
- elif num < 100:
27
- tens = (num // 10) * 10
28
- ones = num % 10
29
- if ones == 0:
30
- return PERSIAN_NUMBERS[tens]
31
- return PERSIAN_NUMBERS[tens] + ' و ' + PERSIAN_DIGITS[str(ones)]
32
- else:
33
- hundreds = (num // 100) * 100
34
- remainder = num % 100
35
- if remainder == 0:
36
- return PERSIAN_NUMBERS[hundreds]
37
- return PERSIAN_NUMBERS[hundreds] + ' و ' + convert_three_digit(remainder)
38
-
39
-
40
- def convert_number_to_text(num_str: str, phone_mode: bool = False) -> str:
41
- try:
42
- num_str = num_str.replace(',', '').replace('٬', '').replace(' ', '')
43
-
44
- persian_to_english = str.maketrans('۰۱۲۳۴۵۶۷۸۹', '0123456789')
45
- num_str = num_str.translate(persian_to_english)
46
-
47
- if phone_mode:
48
- return ' '.join(PERSIAN_DIGITS[d] for d in num_str if d.isdigit())
49
-
50
- num = int(num_str)
51
-
52
- if num == 0:
53
- return 'صفر'
54
-
55
- if num < 0:
56
- return 'منفی ' + convert_number_to_text(str(abs(num)))
57
-
58
- if num < 1000:
59
- return convert_three_digit(num)
60
-
61
- parts = []
62
-
63
- if num >= 1_000_000_000:
64
- billions = num // 1_000_000_000
65
- parts.append(convert_three_digit(billions) + ' میلیارد')
66
- num %= 1_000_000_000
67
-
68
- if num >= 1_000_000:
69
- millions = num // 1_000_000
70
- parts.append(convert_three_digit(millions) + ' میلیون')
71
- num %= 1_000_000
72
-
73
- if num >= 1000:
74
- thousands = num // 1000
75
- parts.append(convert_three_digit(thousands) + ' هزار')
76
- num %= 1000
77
-
78
- if num > 0:
79
- parts.append(convert_three_digit(num))
80
-
81
- return ' و '.join(parts)
82
-
83
- except:
84
- return num_str