make math terms dict more readable
Browse files- handler.py +6 -3
handler.py
CHANGED
|
@@ -259,15 +259,18 @@ class FocusingQuestionModel:
|
|
| 259 |
|
| 260 |
def load_math_terms():
|
| 261 |
math_terms = []
|
|
|
|
| 262 |
for term in MATH_WORDS:
|
| 263 |
if term in MATH_PREFIXES:
|
|
|
|
| 264 |
math_terms.append(f"(^|[^a-zA-Z]){term}(s|es)?([^a-zA-Z]|$)")
|
| 265 |
else:
|
|
|
|
| 266 |
math_terms.append(f"(^|[^a-zA-Z]){term}([^a-zA-Z]|$)")
|
| 267 |
-
return math_terms
|
| 268 |
|
| 269 |
def run_math_density(transcript):
|
| 270 |
-
math_terms = load_math_terms()
|
| 271 |
for i, utt in enumerate(transcript.utterances):
|
| 272 |
found_math_terms = set()
|
| 273 |
text = utt.get_clean_text(remove_punct=False)
|
|
@@ -275,7 +278,7 @@ def run_math_density(transcript):
|
|
| 275 |
for term in math_terms:
|
| 276 |
count = len(re.findall(term, text))
|
| 277 |
if count > 0:
|
| 278 |
-
found_math_terms.add(term)
|
| 279 |
num_math_terms += count
|
| 280 |
utt.num_math_terms = num_math_terms
|
| 281 |
utt.math_terms = list(found_math_terms)
|
|
|
|
| 259 |
|
| 260 |
def load_math_terms():
|
| 261 |
math_terms = []
|
| 262 |
+
math_terms_dict = {}
|
| 263 |
for term in MATH_WORDS:
|
| 264 |
if term in MATH_PREFIXES:
|
| 265 |
+
math_terms_dict[f"(^|[^a-zA-Z]){term}(s|es)?([^a-zA-Z]|$)"] = term
|
| 266 |
math_terms.append(f"(^|[^a-zA-Z]){term}(s|es)?([^a-zA-Z]|$)")
|
| 267 |
else:
|
| 268 |
+
math_terms_dict[f"(^|[^a-zA-Z]){term}([^a-zA-Z]|$)"] = term
|
| 269 |
math_terms.append(f"(^|[^a-zA-Z]){term}([^a-zA-Z]|$)")
|
| 270 |
+
return math_terms, math_terms_dict
|
| 271 |
|
| 272 |
def run_math_density(transcript):
|
| 273 |
+
math_terms, math_terms_dict = load_math_terms()
|
| 274 |
for i, utt in enumerate(transcript.utterances):
|
| 275 |
found_math_terms = set()
|
| 276 |
text = utt.get_clean_text(remove_punct=False)
|
|
|
|
| 278 |
for term in math_terms:
|
| 279 |
count = len(re.findall(term, text))
|
| 280 |
if count > 0:
|
| 281 |
+
found_math_terms.add(math_terms_dict[term])
|
| 282 |
num_math_terms += count
|
| 283 |
utt.num_math_terms = num_math_terms
|
| 284 |
utt.math_terms = list(found_math_terms)
|