| import re | |
| DIGITS_MAP = { | |
| '0': 'صِفر', '1': 'یک', '2': 'دو', '3': 'سه', '4': 'چهار', | |
| '5': 'پنج', '6': 'شِش', '7': 'هفت', '8': 'هشت', '9': 'نُه' | |
| } | |
| TENS = { | |
| 10: 'دَه', 11: 'یازده', 12: 'دوازده', 13: 'سیزده', 14: 'چهارده', | |
| 15: 'پانزده', 16: 'شانزده', 17: 'هفده', 18: 'هجده', 19: 'نوزده', | |
| 20: 'بیست', 30: 'سی', 40: 'چهل', 50: 'پنجاه', | |
| 60: 'شصت', 70: 'هفتاد', 80: 'هشتاد', 90: 'نود' | |
| } | |
| HUNDREDS = { | |
| 100: 'صَد', 200: 'دویست', 300: 'سیصد', 400: 'چهارصد', 500: 'پانصد', | |
| 600: 'ششصد', 700: 'هفتصد', 800: 'هشتصد', 900: 'نهصد' | |
| } | |
| def _convert_three_digit(num: int) -> str: | |
| if num == 0: | |
| return '' | |
| if num < 10: | |
| return DIGITS_MAP[str(num)] | |
| elif num < 20: | |
| return TENS[num] | |
| elif num < 100: | |
| tens_part = (num // 10) * 10 | |
| ones_part = num % 10 | |
| if ones_part == 0: | |
| return TENS[tens_part] | |
| return f"{TENS[tens_part]} و {DIGITS_MAP[str(ones_part)]}" | |
| else: | |
| hundreds_part = (num // 100) * 100 | |
| rem = num % 100 | |
| if rem == 0: | |
| return HUNDREDS[hundreds_part] | |
| return f"{HUNDREDS[hundreds_part]} و {_convert_three_digit(rem)}" | |
| def num_to_text(num: int) -> str: | |
| if num == 0: | |
| return 'صِفر' | |
| if num < 0: | |
| return f"مَنفی {num_to_text(abs(num))}" | |
| if num < 1000: | |
| return _convert_three_digit(num) | |
| parts = [] | |
| if num >= 1_000_000_000: | |
| billions = num // 1_000_000_000 | |
| parts.append(f"{_convert_three_digit(billions)} میلیارد") | |
| num %= 1_000_000_000 | |
| if num >= 1_000_000: | |
| millions = num // 1_000_000 | |
| parts.append(f"{_convert_three_digit(millions)} میلیون") | |
| num %= 1_000_000 | |
| if num >= 1000: | |
| thousands = num // 1000 | |
| parts.append(f"{_convert_three_digit(thousands)} هزار") | |
| num %= 1000 | |
| if num > 0: | |
| parts.append(_convert_three_digit(num)) | |
| return ' و '.join(parts) | |
| def _read_phone_chunk(chunk: str) -> str: | |
| if not chunk: | |
| return "" | |
| if all(c == '0' for c in chunk): | |
| count = len(chunk) | |
| if count == 2: | |
| return "دو صِفر" | |
| elif count == 3: | |
| return "سِِتا صفر" | |
| elif count == 4: | |
| return "چهارتا صفر" | |
| else: | |
| return f"{num_to_text(count)} تا صِفر" | |
| result_parts = [] | |
| temp_chunk = chunk | |
| while temp_chunk.startswith('0'): | |
| result_parts.append("صِفر") | |
| temp_chunk = temp_chunk[1:] | |
| if temp_chunk: | |
| val = int(temp_chunk) | |
| result_parts.append(num_to_text(val)) | |
| return " ".join(result_parts) | |
| def _smart_split_phone(phone_str: str, has_plus: bool = False) -> list: | |
| length = len(phone_str) | |
| chunks = [] | |
| if has_plus: | |
| if phone_str.startswith('98') and len(phone_str) > 5: | |
| chunks.append("+" + phone_str[:2]) | |
| rest = phone_str[2:] | |
| if rest.startswith('9'): | |
| inner_chunks = _smart_split_phone("0" + rest) | |
| chunks.extend(inner_chunks) | |
| return chunks | |
| else: | |
| chunks.append(rest) | |
| return chunks | |
| elif phone_str.startswith('1') and length == 11: | |
| chunks.append("+" + phone_str[:1]) | |
| chunks.append(phone_str[1:4]) | |
| chunks.append(phone_str[4:7]) | |
| chunks.append(phone_str[7:]) | |
| return chunks | |
| if phone_str.startswith('09') and length == 11: | |
| chunks.append(phone_str[:4]) | |
| rest = phone_str[4:] | |
| part_mid = rest[:3] | |
| part_end = rest[3:] | |
| is_end_round = False | |
| if part_end == '0000': | |
| is_end_round = True | |
| elif part_end.endswith('00'): | |
| is_end_round = True | |
| elif part_end[1] == '0' and part_end[2] == '0': | |
| is_end_round = True | |
| if part_mid == '000': | |
| is_end_round = True | |
| if is_end_round: | |
| chunks.append(part_mid) | |
| chunks.append(part_end) | |
| else: | |
| chunks.append(rest[:3]) | |
| chunks.append(rest[3:5]) | |
| chunks.append(rest[5:]) | |
| return chunks | |
| if phone_str.startswith('0') and length == 11: | |
| chunks.append(phone_str[:3]) | |
| rest = phone_str[3:] | |
| part1 = rest[:4] | |
| part2 = rest[4:] | |
| if (part1.endswith('00') and part2.endswith('00')) or (part2 == '0000'): | |
| chunks.append(part1) | |
| chunks.append(part2) | |
| return chunks | |
| p3_1 = rest[:3] | |
| p3_2 = rest[3:6] | |
| if p3_1.endswith('0') and p3_2.endswith('0'): | |
| chunks.append(p3_1) | |
| chunks.append(p3_2) | |
| chunks.append(rest[6:]) | |
| return chunks | |
| chunks.append(rest[:2]) | |
| chunks.append(rest[2:4]) | |
| chunks.append(rest[4:6]) | |
| chunks.append(rest[6:]) | |
| return chunks | |
| if not phone_str.startswith('0'): | |
| if length == 8: | |
| chunks.append(phone_str[:2]) | |
| chunks.append(phone_str[2:4]) | |
| chunks.append(phone_str[4:6]) | |
| chunks.append(phone_str[6:]) | |
| return chunks | |
| elif length == 4: | |
| chunks.append(phone_str) | |
| return chunks | |
| elif length == 5: | |
| chunks.append(phone_str) | |
| return chunks | |
| if length == 10 and phone_str.startswith('9'): | |
| chunks.append(phone_str[:3]) | |
| chunks.append(phone_str[3:6]) | |
| chunks.append(phone_str[6:8]) | |
| chunks.append(phone_str[8:]) | |
| return chunks | |
| return [phone_str] | |
| def phone_to_text(raw_input: str) -> str: | |
| clean_input = raw_input.replace(' ', '').replace( | |
| '-', '').replace('(', '').replace(')', '') | |
| persian_digits = '۰۱۲۳۴۵۶۷۸۹' | |
| english_digits = '0123456789' | |
| trans_table = str.maketrans(persian_digits, english_digits) | |
| clean_input = clean_input.translate(trans_table) | |
| has_plus = False | |
| if clean_input.startswith('+'): | |
| has_plus = True | |
| clean_input = clean_input[1:] | |
| if not clean_input.isdigit(): | |
| return raw_input | |
| chunks = _smart_split_phone(clean_input, has_plus) | |
| text_parts = [] | |
| for ch in chunks: | |
| if ch.startswith('+'): | |
| val = int(ch[1:]) | |
| text_parts.append(f"مثبت {num_to_text(val)}") | |
| else: | |
| text_parts.append(_read_phone_chunk(ch)) | |
| return "، ".join(text_parts) | |
| def _is_likely_phone(num_str: str) -> bool: | |
| if num_str.startswith('+'): | |
| return True | |
| if num_str.startswith('09') and len(num_str) == 11: | |
| return True | |
| if num_str.startswith('0') and len(num_str) >= 7: | |
| return True | |
| return False | |
| def find_and_normalize_numbers(text: str) -> str: | |
| text = text.translate(str.maketrans('٠١٢٣٤٥٦٧٨٩', '0123456789'))\ | |
| .translate(str.maketrans('۰۱۲۳۴۵۶۷۸۹', '0123456789')) | |
| pattern = r'(?:\+|-)?\d+(?:[,\-]\d+)*' | |
| def replace_match(match): | |
| original_str = match.group() | |
| clean_str = original_str.replace(',', '') | |
| if _is_likely_phone(clean_str): | |
| return phone_to_text(clean_str) | |
| else: | |
| try: | |
| val = int(clean_str) | |
| return num_to_text(val) | |
| except ValueError: | |
| return original_str | |
| return re.sub(pattern, replace_match, text) | |
| if __name__ == "__main__": | |
| examples = [ | |
| "شماره من ۰۹۱۲۳۴۵۶۷۸۹ است", | |
| "تلفن شرکت ۰۲۱۸۸۰۵۶۰۷۰ می باشد", | |
| "کد تایید: ۸۸۹۹۱۱۰۰", | |
| "تماس بین المللی: +۹۸۹۱۵۱۰۰۲۰۳۰", | |
| "شارژ مستقیم ۰۹۳۵۲۰۰۳۰۴۰", | |
| "کد پستی ۱۱۱۱۱۰۰۰۰۰", | |
| "و با تلفن ۰۲۱-۸۸۸۰۳۳۵۴ تماس بگیرید", | |
| "قیمت این کالا ۵,۴۰۰ تومان است", | |
| "جمعیت ایران ۸۵۰۰۰۰۰۰ نفر است", | |
| "دمای هوا منفی ۵ درجه است: -5", | |
| "تعداد ۱۰۰۱ شب", | |
| "عدد صفر 0" | |
| ] | |
| print("--- بررسی عملکرد کد ادغام شده ---\n") | |
| for ex in examples: | |
| converted = find_and_normalize_numbers(ex) | |
| print(f"Original: {ex}") | |
| print(f"Converted: {converted}") | |
| print("-" * 30) | |