hylee
commited on
Commit
·
e162c68
1
Parent(s):
e5a4b0f
allow for plurals for all math words
Browse files- handler.py +11 -9
handler.py
CHANGED
|
@@ -361,12 +361,14 @@ def load_math_terms():
|
|
| 361 |
math_terms = []
|
| 362 |
math_terms_dict = {}
|
| 363 |
for term in MATH_WORDS:
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
|
|
|
|
|
|
| 370 |
return math_terms, math_terms_dict
|
| 371 |
|
| 372 |
def run_math_density(transcript):
|
|
@@ -374,9 +376,9 @@ def run_math_density(transcript):
|
|
| 374 |
for i, utt in enumerate(transcript.utterances):
|
| 375 |
found_math_terms = set()
|
| 376 |
text = utt.get_clean_text(remove_punct=False)
|
| 377 |
-
logging.set_verbosity_info()
|
| 378 |
-
logger = logging.get_logger("transformers")
|
| 379 |
-
logger.info(f"clean text in math density: {text}
|
| 380 |
num_math_terms = 0
|
| 381 |
for term in math_terms:
|
| 382 |
count = len(re.findall(term, text))
|
|
|
|
| 361 |
math_terms = []
|
| 362 |
math_terms_dict = {}
|
| 363 |
for term in MATH_WORDS:
|
| 364 |
+
math_terms.dict[f"(^|[^a-zA-Z]){term}(s|es)?([^a-zA-Z]|$)"] = term
|
| 365 |
+
math_terms.append(f"(^|[^a-zA-Z]){term}(s|es)?([^a-zA-Z]|$)")
|
| 366 |
+
# if term in MATH_PREFIXES:
|
| 367 |
+
# math_terms_dict[f"(^|[^a-zA-Z]){term}(s|es)?([^a-zA-Z]|$)"] = term
|
| 368 |
+
# math_terms.append(f"(^|[^a-zA-Z]){term}(s|es)?([^a-zA-Z]|$)")
|
| 369 |
+
# else:
|
| 370 |
+
# math_terms_dict[f"(^|[^a-zA-Z]){term}([^a-zA-Z]|$)"] = term
|
| 371 |
+
# math_terms.append(f"(^|[^a-zA-Z]){term}([^a-zA-Z]|$)")
|
| 372 |
return math_terms, math_terms_dict
|
| 373 |
|
| 374 |
def run_math_density(transcript):
|
|
|
|
| 376 |
for i, utt in enumerate(transcript.utterances):
|
| 377 |
found_math_terms = set()
|
| 378 |
text = utt.get_clean_text(remove_punct=False)
|
| 379 |
+
# logging.set_verbosity_info()
|
| 380 |
+
# logger = logging.get_logger("transformers")
|
| 381 |
+
# logger.info(f"clean text in math density: {text}")
|
| 382 |
num_math_terms = 0
|
| 383 |
for term in math_terms:
|
| 384 |
count = len(re.findall(term, text))
|