Spaces:

cdactvm
/

Punjabi_ASR_Demo

Running

App Files Files Community

Punjabi_ASR_Demo / text2int.py

cdactvm

Update text2int.py

c8bb448 verified 11 months ago

raw

history blame contribute delete

3.5 kB

	import re

	def is_number(s):
	try:
	float(s.replace(',', '')) # Handles numbers with commas
	return True
	except ValueError:
	return False

	def text_to_int(textnum, numwords={}):
	units = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine',
	'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen']
	tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety']
	scales = ['hundred', 'thousand', 'lac', 'million', 'billion', 'trillion'] # "lac" handled as 10^5
	ordinal_words = {'first': 1, 'second': 2, 'third': 3, 'fourth': 4, 'fifth': 5, 'sixth': 6,
	'seventh': 7, 'eighth': 8, 'ninth': 9, 'tenth': 10, 'eleventh': 11, 'twelfth': 12}
	ordinal_endings = [('ieth', 'y'), ('th', '')]

	if not numwords:
	numwords['and'] = (1, 0) # Handle "one hundred and twenty"
	for idx, word in enumerate(units):
	numwords[word] = (1, idx)
	for idx, word in enumerate(tens):
	if word:
	numwords[word] = (1, idx * 10)
	for idx, word in enumerate(scales):
	numwords[word] = (10 ** (5 if word == 'lac' else idx * 3 or 2), 0)

	textnum = textnum.lower().replace('-', ' ') # Normalize input
	words = textnum.split()

	current = result = 0
	curstring = ''
	onnumber = False
	lastunit = False
	lastscale = False
	decimal_part = []
	is_decimal = False

	def is_numword(x):
	return is_number(x) or x in numwords

	def from_numword(x):
	if is_number(x):
	return 0, int(x.replace(',', ''))
	return numwords[x]

	for word in words:
	if word == 'point':
	is_decimal = True
	continue

	for ending, replacement in ordinal_endings:
	if word.endswith(ending):
	word = f"{word[:-len(ending)]}{replacement}"

	if word in ordinal_words:
	scale, increment = (1, ordinal_words[word])
	current = current * scale + increment
	if scale > 100:
	result += current
	current = 0
	onnumber = True
	lastunit = False
	lastscale = False
	elif is_numword(word):
	scale, increment = from_numword(word)
	onnumber = True

	if is_decimal:
	decimal_part.append(str(increment))
	continue

	if lastunit and word not in scales:
	curstring += str(result + current) + " "
	result = current = 0

	if scale > 1:
	current = max(1, current)
	current = current * scale + increment
	if scale >= 100:
	result += current
	current = 0
	lastscale = word in scales
	lastunit = word in units
	elif word == 'and' and lastscale:
	continue # Ignore "and" when used in valid contexts
	else:
	if onnumber:
	curstring += str(result + current) + " "
	curstring += word + " "
	result = current = 0
	onnumber = False
	lastunit = False
	lastscale = False

	if onnumber:
	curstring += str(result + current)

	if decimal_part:
	curstring += '.' + ''.join(decimal_part)

	return curstring.strip()