Spaces:

abreza
/

mana-tts

Running on Zero

App Files Files Community

abreza commited on Nov 26, 2025

Commit

3784ae7

1 Parent(s): 766414c

fix: persian number and phone number and fix parallel-wavegan installation

Browse files

Files changed (6) hide show

app.py +10 -6
persian_numbers.py +295 -0
requirements.txt +3 -4
setup.py +16 -2
synthesis.py +2 -11
text_utils.py +0 -84

app.py CHANGED Viewed

@@ -1,33 +1,37 @@
 import os
 import warnings
 from config import models_path, results_path, sample_path
-from setup import setup_environment
 from synthesis import load_models
 from interface import create_interface
 warnings.filterwarnings("ignore")
 def main():
     os.makedirs(models_path, exist_ok=True)
     os.makedirs(results_path, exist_ok=True)
     if (not os.path.exists(os.path.join(models_path, 'encoder.pt')) or
         not os.path.exists(os.path.join(models_path, 'synthesizer.pt')) or
         not os.path.exists(os.path.join(models_path, 'vocoder_HiFiGAN.pkl')) or
-        not os.path.exists(sample_path)):
         setup_success = setup_environment()
         if not setup_success:
             print("Setup failed. Exiting.")
             exit(1)
         print("Setup completed successfully.")
     load_success = load_models()
     if not load_success:
         print("Failed to load models. Exiting.")
         exit(1)
     demo = create_interface()
     demo.launch()
 if __name__ == "__main__":
-    main()

 import os
 import warnings
 from config import models_path, results_path, sample_path
+from setup import setup_environment, install_dependencies
 from synthesis import load_models
 from interface import create_interface
 warnings.filterwarnings("ignore")
 def main():
     os.makedirs(models_path, exist_ok=True)
     os.makedirs(results_path, exist_ok=True)
+    install_dependencies()
     if (not os.path.exists(os.path.join(models_path, 'encoder.pt')) or
         not os.path.exists(os.path.join(models_path, 'synthesizer.pt')) or
         not os.path.exists(os.path.join(models_path, 'vocoder_HiFiGAN.pkl')) or
+            not os.path.exists(sample_path)):
         setup_success = setup_environment()
         if not setup_success:
             print("Setup failed. Exiting.")
             exit(1)
         print("Setup completed successfully.")
     load_success = load_models()
     if not load_success:
         print("Failed to load models. Exiting.")
         exit(1)
     demo = create_interface()
     demo.launch()
 if __name__ == "__main__":
+    main()

persian_numbers.py ADDED Viewed

	@@ -0,0 +1,295 @@

+import re
+DIGITS_MAP = {
+    '0': 'صِفر', '1': 'یک', '2': 'دو', '3': 'سه', '4': 'چهار',
+    '5': 'پنج', '6': 'شِش', '7': 'هفت', '8': 'هشت', '9': 'نُه'
+}
+TENS = {
+    10: 'دَه', 11: 'یازده', 12: 'دوازده', 13: 'سیزده', 14: 'چهارده',
+    15: 'پانزده', 16: 'شانزده', 17: 'هفده', 18: 'هجده', 19: 'نوزده',
+    20: 'بیست', 30: 'سی', 40: 'چهل', 50: 'پنجاه',
+    60: 'شصت', 70: 'هفتاد', 80: 'هشتاد', 90: 'نود'
+}
+HUNDREDS = {
+    100: 'صَد', 200: 'دویست', 300: 'سیصد', 400: 'چهارصد', 500: 'پانصد',
+    600: 'ششصد', 700: 'هفتصد', 800: 'هشتصد', 900: 'نهصد'
+}
+def _convert_three_digit(num: int) -> str:
+    if num == 0:
+        return ''
+    if num < 10:
+        return DIGITS_MAP[str(num)]
+    elif num < 20:
+        return TENS[num]
+    elif num < 100:
+        tens_part = (num // 10) * 10
+        ones_part = num % 10
+        if ones_part == 0:
+            return TENS[tens_part]
+        return f"{TENS[tens_part]} و {DIGITS_MAP[str(ones_part)]}"
+    else:
+        hundreds_part = (num // 100) * 100
+        rem = num % 100
+        if rem == 0:
+            return HUNDREDS[hundreds_part]
+        return f"{HUNDREDS[hundreds_part]} و {_convert_three_digit(rem)}"
+def num_to_text(num: int) -> str:
+    if num == 0:
+        return 'صِفر'
+    if num < 0:
+        return f"مَنفی {num_to_text(abs(num))}"
+    if num < 1000:
+        return _convert_three_digit(num)
+    parts = []
+    if num >= 1_000_000_000:
+        billions = num // 1_000_000_000
+        parts.append(f"{_convert_three_digit(billions)} میلیارد")
+        num %= 1_000_000_000
+    if num >= 1_000_000:
+        millions = num // 1_000_000
+        parts.append(f"{_convert_three_digit(millions)} میلیون")
+        num %= 1_000_000
+    if num >= 1000:
+        thousands = num // 1000
+        parts.append(f"{_convert_three_digit(thousands)} هزار")
+        num %= 1000
+    if num > 0:
+        parts.append(_convert_three_digit(num))
+    return ' و '.join(parts)
+def _read_phone_chunk(chunk: str) -> str:
+    if not chunk:
+        return ""
+    if all(c == '0' for c in chunk):
+        count = len(chunk)
+        if count == 2:
+            return "دو صِفر"
+        elif count == 3:
+            return "سِِتا صفر"
+        elif count == 4:
+            return "چهارتا صفر"
+        else:
+            return f"{num_to_text(count)} تا صِفر"
+    result_parts = []
+    temp_chunk = chunk
+    while temp_chunk.startswith('0'):
+        result_parts.append("صِفر")
+        temp_chunk = temp_chunk[1:]
+    if temp_chunk:
+        val = int(temp_chunk)
+        result_parts.append(num_to_text(val))
+    return " ".join(result_parts)
+def _smart_split_phone(phone_str: str, has_plus: bool = False) -> list:
+    length = len(phone_str)
+    chunks = []
+    if has_plus:
+        if phone_str.startswith('98') and len(phone_str) > 5:
+            chunks.append("+" + phone_str[:2])
+            rest = phone_str[2:]
+            if rest.startswith('9'):
+                inner_chunks = _smart_split_phone("0" + rest)
+                chunks.extend(inner_chunks)
+                return chunks
+            else:
+                chunks.append(rest)
+                return chunks
+        elif phone_str.startswith('1') and length == 11:
+            chunks.append("+" + phone_str[:1])
+            chunks.append(phone_str[1:4])
+            chunks.append(phone_str[4:7])
+            chunks.append(phone_str[7:])
+            return chunks
+    if phone_str.startswith('09') and length == 11:
+        chunks.append(phone_str[:4])
+        rest = phone_str[4:]
+        part_mid = rest[:3]
+        part_end = rest[3:]
+        is_end_round = False
+        if part_end == '0000':
+            is_end_round = True
+        elif part_end.endswith('00'):
+            is_end_round = True
+        elif part_end[1] == '0' and part_end[2] == '0':
+            is_end_round = True
+        if part_mid == '000':
+            is_end_round = True
+        if is_end_round:
+            chunks.append(part_mid)
+            chunks.append(part_end)
+        else:
+            chunks.append(rest[:3])
+            chunks.append(rest[3:5])
+            chunks.append(rest[5:])
+        return chunks
+    if phone_str.startswith('0') and length == 11:
+        chunks.append(phone_str[:3])
+        rest = phone_str[3:]
+        part1 = rest[:4]
+        part2 = rest[4:]
+        if (part1.endswith('00') and part2.endswith('00')) or (part2 == '0000'):
+            chunks.append(part1)
+            chunks.append(part2)
+            return chunks
+        p3_1 = rest[:3]
+        p3_2 = rest[3:6]
+        if p3_1.endswith('0') and p3_2.endswith('0'):
+            chunks.append(p3_1)
+            chunks.append(p3_2)
+            chunks.append(rest[6:])
+            return chunks
+        chunks.append(rest[:2])
+        chunks.append(rest[2:4])
+        chunks.append(rest[4:6])
+        chunks.append(rest[6:])
+        return chunks
+    if not phone_str.startswith('0'):
+        if length == 8:
+            chunks.append(phone_str[:2])
+            chunks.append(phone_str[2:4])
+            chunks.append(phone_str[4:6])
+            chunks.append(phone_str[6:])
+            return chunks
+        elif length == 4:
+            chunks.append(phone_str)
+            return chunks
+        elif length == 5:
+            chunks.append(phone_str)
+            return chunks
+    if length == 10 and phone_str.startswith('9'):
+        chunks.append(phone_str[:3])
+        chunks.append(phone_str[3:6])
+        chunks.append(phone_str[6:8])
+        chunks.append(phone_str[8:])
+        return chunks
+    return [phone_str]
+def phone_to_text(raw_input: str) -> str:
+    clean_input = raw_input.replace(' ', '').replace(
+        '-', '').replace('(', '').replace(')', '')
+    persian_digits = '۰۱۲۳۴۵۶۷۸۹'
+    english_digits = '0123456789'
+    trans_table = str.maketrans(persian_digits, english_digits)
+    clean_input = clean_input.translate(trans_table)
+    has_plus = False
+    if clean_input.startswith('+'):
+        has_plus = True
+        clean_input = clean_input[1:]
+    if not clean_input.isdigit():
+        return raw_input
+    chunks = _smart_split_phone(clean_input, has_plus)
+    text_parts = []
+    for ch in chunks:
+        if ch.startswith('+'):
+            val = int(ch[1:])
+            text_parts.append(f"مثبت {num_to_text(val)}")
+        else:
+            text_parts.append(_read_phone_chunk(ch))
+    return "، ".join(text_parts)
+def _is_likely_phone(num_str: str) -> bool:
+    if num_str.startswith('+'):
+        return True
+    if num_str.startswith('09') and len(num_str) == 11:
+        return True
+    if num_str.startswith('0') and len(num_str) >= 7:
+        return True
+    return False
+def find_and_normalize_numbers(text: str) -> str:
+    text = text.translate(str.maketrans('٠١٢٣٤٥٦٧٨٩', '0123456789'))\
+                .translate(str.maketrans('۰۱۲۳۴۵۶۷۸۹', '0123456789'))
+    pattern = r'(?:\+|-)?\d+(?:[,\-]\d+)*'
+    def replace_match(match):
+        original_str = match.group()
+        clean_str = original_str.replace(',', '')
+        if _is_likely_phone(clean_str):
+            return phone_to_text(clean_str)
+        else:
+            try:
+                val = int(clean_str)
+                return num_to_text(val)
+            except ValueError:
+                return original_str
+    return re.sub(pattern, replace_match, text)
+if __name__ == "__main__":
+    examples = [
+        "شماره من ۰۹۱۲۳۴۵۶۷۸۹ است",
+        "تلفن شرکت ۰۲۱۸۸۰۵۶۰۷۰ می باشد",
+        "کد تایید: ۸۸۹۹۱۱۰۰",
+        "تماس بین المللی: +۹۸۹۱۵۱۰۰۲۰۳۰",
+        "شارژ مستقیم ۰۹۳۵۲۰۰۳۰۴۰",
+        "کد پستی ۱۱۱۱۱۰۰۰۰۰",
+        "و با تلفن ۰۲۱-۸۸۸۰۳۳۵۴ تماس بگیرید",
+        "قیمت این کالا ۵,۴۰۰ تومان است",
+        "جمعیت ایران ۸۵۰۰۰۰۰۰ نفر است",
+        "دمای هوا منفی ۵ درجه است: -5",
+        "تعداد ۱۰۰۱ شب",
+        "عدد صفر 0"
+    ]
+    print("--- بررسی عملکرد کد ادغام شده ---\n")
+    for ex in examples:
+        converted = find_and_normalize_numbers(ex)
+        print(f"Original: {ex}")
+        print(f"Converted: {converted}")
+        print("-" * 30)

requirements.txt CHANGED Viewed

@@ -5,8 +5,6 @@ soundfile
 spaces
 requests
 gdown
-parallel_wavegan
 inflect
 librosa
 matplotlib
@@ -15,7 +13,8 @@ tqdm
 Unidecode
 visdom
 webrtcvad
-unidecode
 transformers
 nltk

 spaces
 requests
 gdown
 inflect
 librosa
 matplotlib
 Unidecode
 visdom
 webrtcvad
 transformers
 nltk
+PyYAML
+tensorboardX
+h5py

setup.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import requests
 import tarfile
 import gdown
@@ -38,9 +39,22 @@ def download_file(url, destination):
         return False
 def setup_environment():
     print("Setting up the environment for Persian TTS...")
     BASE_DIR_PATH = Path(BASE_DIR)
     MODEL_DIR = BASE_DIR_PATH / "saved_models" / "final_models"
     RESULTS_DIR = BASE_DIR_PATH / "results"
@@ -49,7 +63,7 @@ def setup_environment():
     os.makedirs(RESULTS_DIR, exist_ok=True)
     tacotron_repo = BASE_DIR_PATH / "pmt2"
     encoder_file = tacotron_repo / "saved_models" / "default" / "encoder.pt"
     if not os.path.exists(encoder_file):
         default_model_dir = tacotron_repo / "saved_models" / "default"

 import os
+import sys
 import requests
 import tarfile
 import gdown
         return False
+def install_dependencies():
+    print("Checking runtime dependencies...")
+    try:
+        import parallel_wavegan
+        print("✓ parallel-wavegan is already installed.")
+    except ImportError:
+        print("Installing parallel-wavegan with --no-build-isolation...")
+        cmd = f"{sys.executable} -m pip install parallel-wavegan>=0.5.4 --no-build-isolation"
+        success = run_command(cmd)
+        if not success:
+            print("WARNING: Failed to install parallel-wavegan. TTS might fail.")
 def setup_environment():
     print("Setting up the environment for Persian TTS...")
     BASE_DIR_PATH = Path(BASE_DIR)
     MODEL_DIR = BASE_DIR_PATH / "saved_models" / "final_models"
     RESULTS_DIR = BASE_DIR_PATH / "results"
     os.makedirs(RESULTS_DIR, exist_ok=True)
     tacotron_repo = BASE_DIR_PATH / "pmt2"
     encoder_file = tacotron_repo / "saved_models" / "default" / "encoder.pt"
     if not os.path.exists(encoder_file):
         default_model_dir = tacotron_repo / "saved_models" / "default"

synthesis.py CHANGED Viewed

@@ -7,7 +7,7 @@ import soundfile as sf
 import spaces
 from config import models_path, results_path, sample_path, BASE_DIR
 from sentence_splitter import PersianSentenceSplitter
-from text_utils import convert_number_to_text
 encoder = None
 synthesizer = None
@@ -56,16 +56,7 @@ def normalize_text_for_synthesis(text: str) -> str:
     text = re.sub(r'\s+', ' ', text)
     text = text.strip()
-    number_pattern = r'[۰-۹0-9٠-٩]+(?:[,،٬][۰-۹0-9٠-٩]+)*'
-    def replace_number(match):
-        num_str = match.group(0)
-        try:
-            return convert_number_to_text(num_str)
-        except:
-            return num_str
-    text = re.sub(number_pattern, replace_number, text)
     return text

 import spaces
 from config import models_path, results_path, sample_path, BASE_DIR
 from sentence_splitter import PersianSentenceSplitter
+from persian_numbers import find_and_normalize_numbers
 encoder = None
 synthesizer = None
     text = re.sub(r'\s+', ' ', text)
     text = text.strip()
+    text = find_and_normalize_numbers(text)
     return text

text_utils.py DELETED Viewed

@@ -1,84 +0,0 @@
-PERSIAN_DIGITS = {
-    '۰': 'صفر', '۱': 'یک', '۲': 'دو', '۳': 'سه', '۴': 'چهار',
-    '۵': 'پنج', '۶': 'شش', '۷': 'هفت', '۸': 'هشت', '۹': 'نه',
-    '0': 'صفر', '1': 'یک', '2': 'دو', '3': 'سه', '4': 'چهار',
-    '5': 'پنج', '6': 'شش', '7': 'هفت', '8': 'هشت', '9': 'نه'
-}
-PERSIAN_NUMBERS = {
-    10: 'ده', 11: 'یازده', 12: 'دوازده', 13: 'سیزده', 14: 'چهارده',
-    15: 'پانزده', 16: 'شانزده', 17: 'هفده', 18: 'هجده', 19: 'نوزده',
-    20: 'بیست', 30: 'سی', 40: 'چهل', 50: 'پنجاه',
-    60: 'شصت', 70: 'هفتاد', 80: 'هشتاد', 90: 'نود',
-    100: 'صد', 200: 'دویست', 300: 'سیصد', 400: 'چهارصد', 500: 'پانصد',
-    600: 'ششصد', 700: 'هفتصد', 800: 'هشتصد', 900: 'نهصد'
-}
-def convert_three_digit(num: int) -> str:
-    if num == 0:
-        return ''
-    if num < 10:
-        return PERSIAN_DIGITS[str(num)]
-    elif num < 20:
-        return PERSIAN_NUMBERS[num]
-    elif num < 100:
-        tens = (num // 10) * 10
-        ones = num % 10
-        if ones == 0:
-            return PERSIAN_NUMBERS[tens]
-        return PERSIAN_NUMBERS[tens] + ' و ' + PERSIAN_DIGITS[str(ones)]
-    else:
-        hundreds = (num // 100) * 100
-        remainder = num % 100
-        if remainder == 0:
-            return PERSIAN_NUMBERS[hundreds]
-        return PERSIAN_NUMBERS[hundreds] + ' و ' + convert_three_digit(remainder)
-def convert_number_to_text(num_str: str, phone_mode: bool = False) -> str:
-    try:
-        num_str = num_str.replace(',', '').replace('٬', '').replace(' ', '')
-        persian_to_english = str.maketrans('۰۱۲۳۴۵۶۷۸۹', '0123456789')
-        num_str = num_str.translate(persian_to_english)
-        if phone_mode:
-            return ' '.join(PERSIAN_DIGITS[d] for d in num_str if d.isdigit())
-        num = int(num_str)
-        if num == 0:
-            return 'صفر'
-        if num < 0:
-            return 'منفی ' + convert_number_to_text(str(abs(num)))
-        if num < 1000:
-            return convert_three_digit(num)
-        parts = []
-        if num >= 1_000_000_000:
-            billions = num // 1_000_000_000
-            parts.append(convert_three_digit(billions) + ' میلیارد')
-            num %= 1_000_000_000
-        if num >= 1_000_000:
-            millions = num // 1_000_000
-            parts.append(convert_three_digit(millions) + ' میلیون')
-            num %= 1_000_000
-        if num >= 1000:
-            thousands = num // 1000
-            parts.append(convert_three_digit(thousands) + ' هزار')
-            num %= 1000
-        if num > 0:
-            parts.append(convert_three_digit(num))
-        return ' و '.join(parts)
-    except:
-        return num_str