Spaces:

cdactvm
/

Punjabi_ASR_Demo

Sleeping

App Files Files Community

cdactvm commited on Feb 5, 2025

Commit

2b75669

verified ·

1 Parent(s): 02bd48a

Update text2int.py

Browse files

Files changed (1) hide show

text2int.py +88 -102

text2int.py CHANGED Viewed

@@ -1,102 +1,88 @@
-#!/usr/bin/env python
-# coding: utf-8
-# In[3]:
-import nbimporter
-from isNumber import is_number  # Remove or replace this if unnecessary
-def text_to_int(textnum, numwords={}):
-    # Define units, tens, and scales including "lac"
-    units = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',
-            'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen',
-            'sixteen', 'seventeen', 'eighteen', 'nineteen']
-    tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety']
-    scales = ['hundred', 'thousand', 'lac', 'million', 'billion', 'trillion']  # "lac" added
-    ordinal_words = {'first': 1, 'second': 2, 'third': 3, 'fifth': 5, 'eighth': 8, 'ninth': 9, 'twelfth': 12}
-    ordinal_endings = [('ieth', 'y'), ('th', '')]
-    if not numwords:
-        numwords['and'] = (1, 0)  # Handle "one hundred and twenty"
-        # Add units, tens, and scales to numwords
-        for idx, word in enumerate(units):
-            numwords[word] = (1, idx)
-        for idx, word in enumerate(tens):
-            numwords[word] = (1, idx * 10)
-        for idx, word in enumerate(scales):
-            numwords[word] = (10 ** (5 if word == 'lac' else idx * 3 or 2), 0)  # Handle "lac" as 10^5
-    # Remove hyphens and normalize input
-    textnum = textnum.replace('-', ' ')
-    current = result = 0
-    curstring = ''
-    onnumber = False
-    lastunit = False
-    lastscale = False
-    def is_numword(x):
-        return is_number(x) or x in numwords
-    def from_numword(x):
-        if is_number(x):
-            return 0, int(x.replace(',', ''))
-        return numwords[x]
-    for word in textnum.split():
-        if word in ordinal_words:
-            scale, increment = (1, ordinal_words[word])
-            current = current * scale + increment
-            if scale > 100:
-                result += current
-                current = 0
-            onnumber = True
-            lastunit = False
-            lastscale = False
-        else:
-            for ending, replacement in ordinal_endings:
-                if word.endswith(ending):
-                    word = f"{word[:-len(ending)]}{replacement}"
-            if not is_numword(word) or (word == 'and' and not lastscale):
-                if onnumber:
-                    curstring += repr(result + current) + " "
-                curstring += word + " "
-                result = current = 0
-                onnumber = False
-                lastunit = False
-                lastscale = False
-            else:
-                scale, increment = from_numword(word)
-                onnumber = True
-                if lastunit and word not in scales:
-                    curstring += repr(result + current) + " "
-                    result = current = 0
-                if scale > 1:
-                    current = max(1, current)
-                current = current * scale + increment
-                if scale >= 100:
-                    result += current
-                    current = 0
-                lastscale = word in scales
-                lastunit = word in units
-    if onnumber:
-        curstring += repr(result + current)
-    return curstring.strip()
-# In[ ]:

+from isNumber import is_number  # Remove or replace this if unnecessary
+def text_to_int(textnum, numwords={}):
+    # Define units, tens, and scales including "lac"
+    units = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',
+            'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen',
+            'sixteen', 'seventeen', 'eighteen', 'nineteen']
+    tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety']
+    scales = ['hundred', 'thousand', 'lac', 'million', 'billion', 'trillion']  # "lac" added
+    ordinal_words = {'first': 1, 'second': 2, 'third': 3, 'fifth': 5, 'eighth': 8, 'ninth': 9, 'twelfth': 12}
+    ordinal_endings = [('ieth', 'y'), ('th', '')]
+    if not numwords:
+        numwords['and'] = (1, 0)  # Handle "one hundred and twenty"
+        # Add units, tens, and scales to numwords
+        for idx, word in enumerate(units):
+            numwords[word] = (1, idx)
+        for idx, word in enumerate(tens):
+            numwords[word] = (1, idx * 10)
+        for idx, word in enumerate(scales):
+            numwords[word] = (10 ** (5 if word == 'lac' else idx * 3 or 2), 0)  # Handle "lac" as 10^5
+    # Remove hyphens and normalize input
+    textnum = textnum.replace('-', ' ')
+    current = result = 0
+    curstring = ''
+    onnumber = False
+    lastunit = False
+    lastscale = False
+    def is_numword(x):
+        return is_number(x) or x in numwords
+    def from_numword(x):
+        if is_number(x):
+            return 0, int(x.replace(',', ''))
+        return numwords[x]
+    for word in textnum.split():
+        if word in ordinal_words:
+            scale, increment = (1, ordinal_words[word])
+            current = current * scale + increment
+            if scale > 100:
+                result += current
+                current = 0
+            onnumber = True
+            lastunit = False
+            lastscale = False
+        else:
+            for ending, replacement in ordinal_endings:
+                if word.endswith(ending):
+                    word = f"{word[:-len(ending)]}{replacement}"
+            if not is_numword(word) or (word == 'and' and not lastscale):
+                if onnumber:
+                    curstring += repr(result + current) + " "
+                curstring += word + " "
+                result = current = 0
+                onnumber = False
+                lastunit = False
+                lastscale = False
+            else:
+                scale, increment = from_numword(word)
+                onnumber = True
+                if lastunit and word not in scales:
+                    curstring += repr(result + current) + " "
+                    result = current = 0
+                if scale > 1:
+                    current = max(1, current)
+                current = current * scale + increment
+                if scale >= 100:
+                    result += current
+                    current = 0
+                lastscale = word in scales
+                lastunit = word in units
+    if onnumber:
+        curstring += repr(result + current)
+    return curstring.strip()