Spaces:
Running
Running
| import re | |
| mapping = { | |
| "n\u0303": "\xf1", | |
| "g\u0306": "\u011f", | |
| "i\u0307": "i", | |
| "u\u0308": "\xfc", | |
| "o\u0308": "\xf6", | |
| "\xe7": "\u04ab", | |
| "c\u0327": "\u04ab", | |
| "s\u0327": "\u015f", | |
| "a\u0302": "\xe2", | |
| "w": "v", | |
| "x": "ks", | |
| } | |
| zero = { | |
| 0: "sıfır", | |
| } | |
| numbers_map = { | |
| 1: "bir", | |
| 2: "eki", | |
| 3: "üç", | |
| 4: "dört", | |
| 5: "beş", | |
| 6: "altı", | |
| 7: "yedi", | |
| 8: "sekiz", | |
| 9: "doquz", | |
| 10: "on", | |
| 20: "yigirmi", | |
| 30: "otuz", | |
| 40: "qırq", | |
| 50: "elli", | |
| 60: "altmış", | |
| 70: "yetmiş", | |
| 80: "seksen", | |
| 90: "doqsan", | |
| 100: "yüz", | |
| 1000: "biñ", | |
| 1_000_000: "million", | |
| 1_000_000_000: "milliard", | |
| } | |
| def spell_numbers(numbers: str) -> str: | |
| numbers_map_with_zero = {**numbers_map, **zero} | |
| for i in range(0, 10): | |
| numbers = numbers.replace(str(i), numbers_map_with_zero[i] + " ") | |
| return numbers.strip() | |
| def num2word(n): | |
| if n in numbers_map: | |
| return numbers_map[n] | |
| elif n < 100: | |
| tens = (n // 10) * 10 | |
| units = n % 10 | |
| if units == 0: | |
| return "" | |
| return (numbers_map[tens] + " " + numbers_map[units]).strip() | |
| elif n < 1000: | |
| hundreds = n // 100 | |
| rest = n % 100 | |
| return ( | |
| num2word(hundreds) + " " + numbers_map[100] + " " + num2word(rest) | |
| ).strip() | |
| elif n < 1_000_000: | |
| thousands = n // 1_000 | |
| rest = n % 1_000 | |
| return ( | |
| num2word(thousands) + " " + numbers_map[1_000] + " " + num2word(rest) | |
| ).strip() | |
| elif n < 1_000_000_000: | |
| millions = n // 1_000_000 | |
| rest = n % 1_000_000 | |
| return ( | |
| num2word(millions) + " " + numbers_map[1_000_000] + " " + num2word(rest) | |
| ).strip() | |
| elif n < 1_000_000_000_000: | |
| billions = n // 1_000_000_000 | |
| rest = n % 1_000_000_000 | |
| return ( | |
| num2word(billions) + " " + numbers_map[1_000_000_000] + " " + num2word(rest) | |
| ).strip() | |
| else: | |
| return spell_numbers(str(n)) | |
| def preprocess(text): | |
| text = text.lower() # always treat lowercase | |
| text = " " + text + " " | |
| for symbol in mapping.keys(): | |
| text = re.sub(symbol, mapping[symbol], text) | |
| separators = "?!" # TODO: add proper symbols to tts | |
| for symbol in separators: | |
| text = text.replace(symbol, ".") | |
| while True: | |
| groups_match = re.search("((\d,)+){2,}", text) | |
| if groups_match is not None: | |
| text = text.replace( | |
| groups_match.string[groups_match.start() : groups_match.end()], | |
| " ".join( | |
| groups_match.string[ | |
| groups_match.start() : groups_match.end() | |
| ].split(",") | |
| ), | |
| ) | |
| continue | |
| number_match = re.search("(\-|\+)?(\d)+((\.|,)?\d+)?", text) | |
| if number_match is None: | |
| break | |
| number = number_match.string[number_match.start() : number_match.end()] | |
| number_to_replace = number | |
| prefix = "" | |
| if number.startswith("-"): | |
| prefix = "minus " | |
| number = number.replace("-", "", 1) | |
| elif number.startswith("+"): | |
| prefix = "plüs " | |
| number = number.replace("+", "", 1) | |
| if "." in number: | |
| number = number.split(".") | |
| number = prefix + " noqta ".join( | |
| ( | |
| num2word(int(number[0])) | |
| if int(number[0]) != 0 | |
| else spell_numbers(number[0]), | |
| spell_numbers(number[1]), | |
| ) | |
| ) | |
| text = text.replace(number_to_replace, number, 1) | |
| continue | |
| elif "," in number: | |
| number = number.split(",") | |
| number = prefix + " virgül ".join( | |
| ( | |
| num2word(int(number[0])) | |
| if int(number[0]) != 0 | |
| else spell_numbers(number[0]), | |
| spell_numbers(number[1]), | |
| ) | |
| ) | |
| text = text.replace(number_to_replace, number, 1) | |
| continue | |
| if number.startswith("0"): | |
| text = text.replace(number_to_replace, prefix + spell_numbers(number), 1) | |
| continue | |
| text = text.replace(number_to_replace, prefix + num2word(int(number)), 1) | |
| return text.strip() | |