Spaces:

piyazon
/

TTS_Piyazon

Sleeping

App Files Files Community

piyazon commited on Sep 30, 2025

Commit

87acc82

1 Parent(s): 233b82f

change model, fix string

Browse files

Files changed (2) hide show

app.py +303 -3
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -8,6 +8,13 @@ import soundfile as sf
 from pydantic import BaseModel
 import string
 import unicodedata
 import os
 # Access the secret named "MY_API_KEY"
@@ -90,10 +97,301 @@ def fix_string(batch):
     return batch
 # model = VitsModel.from_pretrained("facebook/mms-tts-uig-script_arabic")
 # tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-uig-script_arabic")
-model_ug = VitsModel.from_pretrained("piyazon/TTS-CV-Unique-Ug", token=hf_token)
-tokenizer_ug = AutoTokenizer.from_pretrained("piyazon/TTS-CV-Unique-Ug", token=hf_token)
 # model_ug = VitsModel.from_pretrained("piyazon/qutadgu_bilik")
 # tokenizer_ug = AutoTokenizer.from_pretrained("piyazon/qutadgu_bilik")
@@ -117,7 +415,9 @@ async def generate_tts(input: TextInput):
         if input.lang=="ug":
             model = model_ug
             tokenizer = tokenizer_ug
-            inputs = tokenizer(fix_string(input.text), return_tensors="pt")
         else:
             model = model_ru
             tokenizer = tokenizer_ru

 from pydantic import BaseModel
 import string
 import unicodedata
+from pypinyin import pinyin, Style
+import re
+from umsc import UgMultiScriptConverter
+# Initialize uyghur script converter
+ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
+ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
 import os
 # Access the secret named "MY_API_KEY"
     return batch
+def number_to_uyghur_arabic_script(number_str):
+    """
+    Converts a number (integer, decimal, fraction, percentage, or ordinal) up to 9 digits (integer and decimal)
+    to its Uyghur pronunciation in Arabic script. Decimal part is pronounced as a whole number with a fractional term.
+    Ordinals use the -ىنجى suffix for all numbers up to 9 digits, with special forms for single digits.
+    Args:
+        number_str (str): Number as a string (e.g., '123', '0.001', '1/4', '25%', '1968_', '123456789').
+    Returns:
+        str: Uyghur pronunciation in Arabic script.
+    """
+    # Uyghur number words in Arabic script
+    digits = {
+        0: 'نۆل', 1: 'بىر', 2: 'ئىككى', 3: 'ئۈچ', 4: 'تۆت', 5: 'بەش',
+        6: 'ئالتە', 7: 'يەتتە', 8: 'سەككىز', 9: 'توققۇز'
+    }
+    ordinals = {
+        1: 'بىرىنجى', 2: 'ئىككىنجى', 3: 'ئۈچىنجى', 4: 'تۆتىنجى', 5: 'بەشىنجى',
+        6: 'ئالتىنجى', 7: 'يەتتىنجى', 8: 'سەككىزىنجى', 9: 'توققۇزىنجى'
+    }
+    tens = {
+        10: 'ئون', 20: 'يىگىرمە', 30: 'ئوتتۇز', 40: 'قىرىق', 50: 'ئەللىك',
+        60: 'ئاتمىش', 70: 'يەتمىش', 80: 'سەكسەن', 90: 'توقسان'
+    }
+    units = [
+        (1000000000, 'مىليارد'),  # billion
+        (1000000, 'مىليون'),      # million
+        (1000, 'مىڭ'),             # thousand
+        (100, 'يۈز')               # hundred
+    ]
+    fractions = {
+        1: 'ئوندا',         # tenths
+        2: 'يۈزدە',         # hundredths
+        3: 'مىڭدە',         # thousandths
+        4: 'ئون مىڭدە',      # ten-thousandths
+        5: 'يۈز مىڭدە',     # hundred-thousandths
+        6: 'مىليوندا',     # millionths
+        7: 'ئون مىليوندا',  # ten-millionths
+        8: 'يۈز مىليوندا', # hundred-millionths
+        9: 'مىليارددا'     # billionths
+    }
+    # Convert integer part to words
+    def integer_to_words(num):
+        if num == 0:
+            return digits[0]
+        result = []
+        num = int(num)
+        # Handle large units (billion, million, thousand, hundred)
+        for value, unit_name in units:
+            if num >= value:
+                count = num // value
+                if count == 1 and value >= 100:  # e.g., 100 → "يۈز", not "بىر يۈز"
+                    result.append(unit_name)
+                else:
+                    result.append(integer_to_words(count) + ' ' + unit_name)
+                num %= value
+        # Handle tens and ones
+        if num >= 10 and num in tens:
+            result.append(tens[num])
+        elif num > 10:
+            ten = (num // 10) * 10
+            one = num % 10
+            if one == 0:
+                result.append(tens[ten])
+            else:
+                result.append(tens[ten] + ' ' + digits[one])
+        elif num > 0:
+            result.append(digits[num])
+        return ' '.join(result)
+    # Clean the input (remove commas or spaces)
+    number_str = number_str.replace(',', '').replace(' ', '')
+    # Check for ordinal (ends with '_')
+    is_ordinal = number_str.endswith('_') or number_str.endswith('-')
+    if is_ordinal:
+        number_str = number_str[:-1]  # Remove the _ sign
+        num = int(number_str)
+        if num > 999999999:
+            # raise ValueError("Ordinal number exceeds 9 digits")
+            return number_str
+        if num in ordinals:  # Use special forms for single-digit ordinals
+            return ordinals[num]
+        # Convert to words and modify the last word for ordinal
+        words = integer_to_words(num).split()
+        last_num = num % 100  # Get the last two digits to handle tens and ones
+        if last_num in tens:
+            words[-1] = tens[last_num] + 'ىنجى '  # e.g., 60_ → ئاتمىشىنجى
+        elif last_num % 10 == 0 and last_num > 0:
+            words[-1] = tens[last_num] + 'ىنجى '  # e.g., 60_ → ئاتمىشىنجى
+        else:
+            last_digit = num % 10
+            if last_digit in ordinals:
+                words[-1] = ordinals[last_digit] + ' '  # Replace last digit with ordinal form
+            elif last_digit == 0:
+                words[-1] += 'ىنجى'
+        return ' '.join(words)
+    # Check for percentage
+    is_percentage = number_str.endswith('%')
+    if is_percentage:
+        number_str = number_str[:-1]  # Remove the % sign
+    # Check for fraction
+    if '/' in number_str:
+        numerator, denominator = map(int, number_str.split('/'))
+        if numerator in digits and denominator in digits:
+            return f"{digits[denominator]}دە {digits[numerator]}"
+        else:
+            # raise ValueError("Fractions are only supported for single-digit numerators and denominators")
+            return number_str
+    # Split into integer and decimal parts
+    parts = number_str.split('.')
+    integer_part = parts[0]
+    decimal_part = parts[1] if len(parts) > 1 else None
+    # Validate integer part (up to 9 digits)
+    if len(integer_part) > 9:
+        # raise ValueError("Integer part exceeds 9 digits")
+        return number_str
+    # Validate decimal part (up to 9 digits)
+    if decimal_part and len(decimal_part) > 9:
+        # raise ValueError("Decimal part exceeds 9 digits")
+        return number_str
+    # Convert the integer part
+    pronunciation = integer_to_words(int(integer_part))
+    # Handle decimal part as a whole number with fractional term
+    if decimal_part:
+        pronunciation += ' پۈتۈن'
+        if decimal_part != '0':  # Only pronounce non-zero decimal parts
+            decimal_value = int(decimal_part.rstrip('0'))  # Remove trailing zeros
+            decimal_places = len(decimal_part.rstrip('0'))  # Count significant decimal places
+            fraction_term = fractions.get(decimal_places, 'مىليارددا')  # Fallback for beyond 9 digits
+            pronunciation += ' ' + fraction_term + ' ' + integer_to_words(decimal_value)
+    # Append percentage term if applicable
+    if is_percentage:
+        pronunciation += ' پىرسەنت'
+    return pronunciation.strip()
+    # return pronunciation
+def process_uyghur_text_with_numbers(text):
+    """
+    Processes a string containing Uyghur text and numbers, converting valid numbers to their
+    Uyghur pronunciation in Arabic script while preserving non-numeric text.
+    Args:
+        text (str): Input string with Uyghur text and numbers (e.g., '1/4 كىلو 25% تەملىك').
+    Returns:
+        str: String with numbers converted to Uyghur pronunciation, non-numeric text preserved.
+    """
+    text = text.replace('%', ' پىرسەنت ')
+    # Valid number characters and symbols
+    digits = '0123456789'
+    number_symbols = '/.%_-'
+    result = []
+    i = 0
+    while i < len(text):
+        # Check for spaces and preserve them
+        if text[i].isspace():
+            result.append(text[i])
+            i += 1
+            continue
+        # Try to identify a number (fraction, percentage, ordinal, decimal, or integer)
+        number_start = i
+        number_str = ''
+        is_number = False
+        # Collect potential number characters
+        while i < len(text) and (text[i] in digits or text[i] in number_symbols):
+            number_str += text[i]
+            i += 1
+            is_number = True
+        # If we found a potential number, validate and convert it
+        if is_number:
+            # Check if the string is a valid number format
+            valid = False
+            if '/' in number_str and number_str.count('/') == 1:
+                # Fraction: e.g., "1/4"
+                num, denom = number_str.split('/')
+                if num.isdigit() and denom.isdigit():
+                    valid = True
+            elif number_str.endswith('%'):
+                # Percentage: e.g., "25%"
+                if number_str[:-1].isdigit():
+                    valid = True
+            elif number_str.endswith('_') or number_str.endswith('-'):
+                # Ordinal: e.g., "1_"
+                if number_str[:-1].isdigit():
+                    valid = True
+            elif '.' in number_str and number_str.count('.') == 1:
+                # Decimal: e.g., "3.14"
+                whole, frac = number_str.split('.')
+                if whole.isdigit() and frac.isdigit():
+                    valid = True
+            elif number_str.isdigit():
+                # Integer: e.g., "123"
+                valid = True
+            if valid:
+                try:
+                    # Convert the number to Uyghur pronunciation
+                    converted = number_to_uyghur_arabic_script(number_str)
+                    result.append(converted)
+                except ValueError:
+                    # If conversion fails, append the original number string
+                    result.append(number_str)
+            else:
+                # If not a valid number format, treat as regular text
+                result.append(number_str)
+        else:
+            # Non-number character, append as is
+            result.append(text[i])
+            i += 1
+    # Join the result list into a string
+    return ''.join(result)
+def fix_pauctuations(batch):
+    batch = batch.lower()
+    batch = unicodedata.normalize('NFKC', batch)
+    # extra_punctuation = "–؛;،؟?«»‹›−—¬”“•…"  # Add your additional custom punctuation from the training set here
+    # all_punctuation = string.punctuation + extra_punctuation
+    # for char in all_punctuation:
+        # batch = batch.replace(char, '   ')
+    ## replace ug chars
+    # Replace 'ژ' with 'ج'
+    batch = batch.replace('ژ', 'ج')
+    batch = batch.replace('ک', 'ك')
+    batch = batch.replace('ی', 'ى')
+    batch = batch.replace('ه', 'ە')
+    vocab = [" ", "ئ", "ا", "ب", "ت", "ج", "خ", "د", "ر", "ز", "س", "ش", "غ", "ف", "ق", "ك", "ل", "م", "ن", "و", "ى", "ي", "پ", "چ", "ڭ", "گ", "ھ", "ۆ", "ۇ", "ۈ", "ۋ", "ې", "ە"]
+    # Process each character in the batch
+    result = []
+    for char in batch:
+        if char in vocab:
+            result.append(char)
+        elif char in {'.', '?', '؟'}:
+            result.append('  ')  # Replace dot with two spaces
+        else:
+            result.append(' ')  # Replace other non-vocab characters with one space
+    # Join the result into a string
+    return ''.join(result)
+def chinese_to_pinyin(mixed_text):
+    """
+    Convert Chinese characters in a mixed-language string to Pinyin without tone marks,
+    preserving non-Chinese text, using only English letters.
+    Args:
+        mixed_text (str): Input string containing Chinese characters and other languages (e.g., English, Uyghur)
+    Returns:
+        str: String with Chinese characters converted to Pinyin (no tone marks), non-Chinese text unchanged
+    """
+    # Regular expression to match Chinese characters (Unicode range for CJK Unified Ideographs)
+    chinese_pattern = re.compile(r'[\u4e00-\u9fff]+')
+    def replace_chinese(match):
+        chinese_text = match.group(0)
+        # Convert Chinese to Pinyin without tone marks, join syllables with spaces
+        pinyin_list = pinyin(chinese_text, style=Style.NORMAL)
+        return ' '.join([item[0] for item in pinyin_list])
+    # Replace Chinese characters with their Pinyin, leave other text unchanged
+    result = chinese_pattern.sub(replace_chinese, mixed_text)
+    return result
 # model = VitsModel.from_pretrained("facebook/mms-tts-uig-script_arabic")
 # tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-uig-script_arabic")
+uy_model_name = "piyazon/TTS-CV-Radio-RVC-Alikurban-Ug"
+model_ug = VitsModel.from_pretrained(uy_model_name, token=hf_token)
+tokenizer_ug = AutoTokenizer.from_pretrained(uy_model_name, token=hf_token)
 # model_ug = VitsModel.from_pretrained("piyazon/qutadgu_bilik")
 # tokenizer_ug = AutoTokenizer.from_pretrained("piyazon/qutadgu_bilik")
         if input.lang=="ug":
             model = model_ug
             tokenizer = tokenizer_ug
+            fixted_text = fix_pauctuations(process_uyghur_text_with_numbers(ug_latn_to_arab(chinese_to_pinyin(text))))
+            print(fixted_text)
+            inputs = tokenizer(fixted_text, return_tensors="pt")
         else:
             model = model_ru
             tokenizer = tokenizer_ru

requirements.txt CHANGED Viewed

@@ -13,4 +13,6 @@ torchcodec
 flask
 flask-cors
 pydantic
-soundfile

 flask
 flask-cors
 pydantic
+soundfile
+umsc
+pypinyin