Add handling for fractions and hopefully other number uses
Browse files- handler.py +39 -3
handler.py
CHANGED
|
@@ -8,14 +8,46 @@ import re
|
|
| 8 |
import inflect
|
| 9 |
from typing import Dict, List, Any
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
def convert_numbers_to_text(input_string):
|
| 12 |
p = inflect.engine()
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
new_words = []
|
| 15 |
|
| 16 |
for word in words:
|
| 17 |
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
year = int(word)
|
| 20 |
if year < 2000:
|
| 21 |
# Split the year into two parts
|
|
@@ -30,7 +62,9 @@ def convert_numbers_to_text(input_string):
|
|
| 30 |
word = word.replace(',','')
|
| 31 |
number = int(word)
|
| 32 |
word = p.number_to_words(number).replace(',', '')
|
| 33 |
-
|
|
|
|
|
|
|
| 34 |
|
| 35 |
return ' '.join(new_words)
|
| 36 |
|
|
@@ -131,6 +165,8 @@ class EndpointHandler:
|
|
| 131 |
def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
| 132 |
|
| 133 |
given_text = data.get("inputs", "")
|
|
|
|
|
|
|
| 134 |
|
| 135 |
start_time = time.time()
|
| 136 |
|
|
|
|
| 8 |
import inflect
|
| 9 |
from typing import Dict, List, Any
|
| 10 |
|
| 11 |
+
def contains_special_characters(s):
|
| 12 |
+
return bool(re.search(r'[𝓵𝖾𝓞𝚟𝔟]', s))
|
| 13 |
+
|
| 14 |
+
def check_punctuation(s):
|
| 15 |
+
if s.endswith('.'):
|
| 16 |
+
return '.'
|
| 17 |
+
elif s.endswith(','):
|
| 18 |
+
return ','
|
| 19 |
+
elif s.endswith('!'):
|
| 20 |
+
return '!'
|
| 21 |
+
elif s.endswith('?'):
|
| 22 |
+
return '?'
|
| 23 |
+
else:
|
| 24 |
+
return ''
|
| 25 |
+
|
| 26 |
def convert_numbers_to_text(input_string):
|
| 27 |
p = inflect.engine()
|
| 28 |
+
new_string = input_string
|
| 29 |
+
|
| 30 |
+
# Find patterns like [6/7] or other number-character combinations
|
| 31 |
+
mixed_patterns = re.findall(r'\[?\b\d+[^)\] ]*\]?', new_string)
|
| 32 |
+
for pattern in mixed_patterns:
|
| 33 |
+
# Isolate numbers from other characters
|
| 34 |
+
numbers = re.findall(r'\d+', pattern)
|
| 35 |
+
# Replace numbers with words within the pattern
|
| 36 |
+
for number in numbers:
|
| 37 |
+
number_word = p.number_to_words(number)
|
| 38 |
+
pattern_with_words = re.sub(number_word, number, pattern, 1)
|
| 39 |
+
new_string = new_string.replace(pattern, pattern_with_words)
|
| 40 |
+
|
| 41 |
+
words = new_string.split()
|
| 42 |
new_words = []
|
| 43 |
|
| 44 |
for word in words:
|
| 45 |
|
| 46 |
+
punct = check_punctuation(word)
|
| 47 |
+
|
| 48 |
+
if contains_special_characters(word):
|
| 49 |
+
pass
|
| 50 |
+
elif word.isdigit() and len(word) == 4: # Check for years (4-digit numbers)
|
| 51 |
year = int(word)
|
| 52 |
if year < 2000:
|
| 53 |
# Split the year into two parts
|
|
|
|
| 62 |
word = word.replace(',','')
|
| 63 |
number = int(word)
|
| 64 |
word = p.number_to_words(number).replace(',', '')
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
new_words.append(word+punct)
|
| 68 |
|
| 69 |
return ' '.join(new_words)
|
| 70 |
|
|
|
|
| 165 |
def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
| 166 |
|
| 167 |
given_text = data.get("inputs", "")
|
| 168 |
+
given_text = given_text.replace('&','and')
|
| 169 |
+
given_text = given_text.replace('-',' ')
|
| 170 |
|
| 171 |
start_time = time.time()
|
| 172 |
|