Spaces:
Running
Running
File size: 5,759 Bytes
106478e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
def number_to_thai_text(num, digit_by_digit=False):
# Thai numerals and place values
thai_digits = {
0: "ศูนย์", 1: "หนึ่ง", 2: "สอง", 3: "สาม", 4: "สี่",
5: "ห้า", 6: "หก", 7: "เจ็ด", 8: "แปด", 9: "เก้า"
}
thai_places = ["", "สิบ", "ร้อย", "พัน", "หมื่น", "แสน", "ล้าน"]
# Handle zero case
if num == 0:
return thai_digits[0]
# If digit_by_digit is True, read each digit separately
if digit_by_digit:
return " ".join(thai_digits[int(d)] for d in str(num))
# For very large numbers, we'll process in chunks of millions
if num >= 1000000:
millions = num // 1000000
remainder = num % 1000000
result = number_to_thai_text(millions) + "ล้าน"
if remainder > 0:
result += number_to_thai_text(remainder)
return result
# Convert number to string and reverse it for easier place value processing
num_str = str(num)
digits = [int(d) for d in num_str]
digits.reverse() # Reverse to process from units to highest place
result = []
for i, digit in enumerate(digits):
if digit == 0:
continue # Skip zeros
# Special case for tens place
if i == 1:
if digit == 1:
result.append(thai_places[i]) # "สิบ" for 10-19
elif digit == 2:
result.append("ยี่" + thai_places[i]) # "ยี่สิบ" for 20-29
else:
result.append(thai_digits[digit] + thai_places[i])
# Special case for units place
elif i == 0 and digit == 1:
if len(digits) > 1 and digits[1] in [1, 2]:
result.append("เอ็ด") # "เอ็ด" for 11, 21
else:
result.append(thai_digits[digit])
else:
result.append(thai_digits[digit] + thai_places[i])
# Reverse back and join
result.reverse()
return "".join(result)
def replace_numbers_with_thai(text):
import re
# Function to convert matched number to Thai text
def convert_match(match):
num_str = match.group(0).replace(',', '')
# Skip if the string is empty or invalid after removing commas
if not num_str or num_str == '.':
return match.group(0)
# Handle decimal numbers
if '.' in num_str:
parts = num_str.split('.')
integer_part = parts[0]
decimal_part = parts[1] if len(parts) > 1 else ''
# If integer part is empty, treat as 0
integer_value = int(integer_part) if integer_part else 0
# If integer part is too long (>7 digits), read digit by digit
if len(integer_part) > 7:
result = number_to_thai_text(integer_value, digit_by_digit=True)
else:
result = number_to_thai_text(integer_value)
# Add decimal part if it exists
if decimal_part:
result += "จุด " + " ".join(number_to_thai_text(int(d)) for d in decimal_part)
return result
# Handle integer numbers
num = int(num_str)
if len(num_str) > 7: # If number exceeds 7 digits
return number_to_thai_text(num, digit_by_digit=True)
return number_to_thai_text(num)
# Replace all numbers (with or without commas and decimals) in the text
def process_text(text):
# Split by spaces to process each word
words = text.split()
result = []
for word in words:
# Match only valid numeric strings (allowing commas and one decimal point)
if re.match(r'^[\d,]+(\.\d+)?$', word): # Valid number with optional decimal
result.append(convert_match(re.match(r'[\d,\.]+', word)))
else:
# If word contains non-numeric characters, read numbers digit-by-digit
if any(c.isdigit() for c in word):
processed = ""
num_chunk = ""
for char in word:
if char.isdigit():
num_chunk += char
else:
if num_chunk:
processed += " ".join(number_to_thai_text(int(d)) for d in num_chunk) + " "
num_chunk = ""
processed += char + " "
if num_chunk: # Handle any remaining numbers
processed += " ".join(number_to_thai_text(int(d)) for d in num_chunk)
result.append(processed.strip())
else:
result.append(word)
return " ".join(result)
return process_text(text)
# Test the functions
if __name__ == "__main__":
# Test number_to_thai_text
test_numbers = [1, 12, 500, 6450, 100000, 12345678]
for num in test_numbers:
print(f"{num:,} -> {number_to_thai_text(num)}")
# Test with decimals and mixed text
test_texts = [
"ฉันมีเงิน 500 บาท",
"ราคา 123.45 บาท",
"บ้านเลขที่ 12 34",
"วันที่ 15 08 2023",
]
for text in test_texts:
result = replace_numbers_with_thai(text)
print(f"\nOriginal: {text}")
print(f"Converted: {result}")
|