handle plurals of math words
Browse files- handler.py +22 -18
handler.py
CHANGED
|
@@ -95,7 +95,6 @@ class Utterance:
|
|
| 95 |
f"text='{self.text}', uid={self.uid}," \
|
| 96 |
f"starttime={self.starttime}, endtime={self.endtime}, props={self.props})"
|
| 97 |
|
| 98 |
-
|
| 99 |
class Transcript:
|
| 100 |
def __init__(self, **kwargs):
|
| 101 |
self.utterances = []
|
|
@@ -183,13 +182,6 @@ class Transcript:
|
|
| 183 |
else:
|
| 184 |
student_dict.update(general_words)
|
| 185 |
|
| 186 |
-
def dict_to_list(d, category):
|
| 187 |
-
combined_dict = Counter()
|
| 188 |
-
for word, count in d.items():
|
| 189 |
-
singular_word = plural_to_singular(word)
|
| 190 |
-
combined_dict[singular_word] += count
|
| 191 |
-
return [{'text': word, 'value': count, 'category': category} for word, count in combined_dict.items()]
|
| 192 |
-
|
| 193 |
# Sorting and trimming dictionaries
|
| 194 |
dict_list = dict_to_list(teacher_dict, 'general') + dict_to_list(student_dict, 'general')
|
| 195 |
uptake_dict_list = dict_to_list(uptake_teacher_dict, 'teacher')
|
|
@@ -224,7 +216,6 @@ class Transcript:
|
|
| 224 |
def __repr__(self):
|
| 225 |
return f"Transcript(utterances={self.utterances}, custom_params={self.params})"
|
| 226 |
|
| 227 |
-
|
| 228 |
class QuestionModel:
|
| 229 |
def __init__(self, device, tokenizer, input_builder, max_length=300, path=QUESTION_MODEL):
|
| 230 |
print("Loading models...")
|
|
@@ -376,6 +367,13 @@ class FocusingQuestionModel:
|
|
| 376 |
token_type_ids=instance["token_type_ids"])
|
| 377 |
return output
|
| 378 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
def load_math_terms():
|
| 380 |
math_regexes = []
|
| 381 |
math_terms_dict = {}
|
|
@@ -394,6 +392,7 @@ def run_math_density(transcript):
|
|
| 394 |
sorted_regexes = sorted(math_regexes, key=len, reverse=True)
|
| 395 |
teacher_math_word_cloud = {}
|
| 396 |
student_math_word_cloud = {}
|
|
|
|
| 397 |
for i, utt in enumerate(transcript.utterances):
|
| 398 |
text = utt.get_clean_text(remove_punct=True)
|
| 399 |
num_matches = 0
|
|
@@ -418,21 +417,26 @@ def run_math_density(transcript):
|
|
| 418 |
num_matches += len(matches)
|
| 419 |
utt.num_math_terms = num_matches
|
| 420 |
utt.math_terms = list(match_list)
|
|
|
|
|
|
|
| 421 |
teacher_dict_list = []
|
| 422 |
student_dict_list = []
|
| 423 |
dict_list = []
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
|
|
|
|
|
|
| 432 |
sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
|
| 433 |
sorted_teacher_dict_list = sorted(teacher_dict_list, key=lambda x: x['value'], reverse=True)
|
| 434 |
sorted_student_dict_list = sorted(student_dict_list, key=lambda x: x['value'], reverse=True)
|
| 435 |
-
|
|
|
|
| 436 |
return sorted_dict_list[:50], sorted_teacher_dict_list[:50], sorted_student_dict_list[:50]
|
| 437 |
|
| 438 |
class EndpointHandler():
|
|
|
|
| 95 |
f"text='{self.text}', uid={self.uid}," \
|
| 96 |
f"starttime={self.starttime}, endtime={self.endtime}, props={self.props})"
|
| 97 |
|
|
|
|
| 98 |
class Transcript:
|
| 99 |
def __init__(self, **kwargs):
|
| 100 |
self.utterances = []
|
|
|
|
| 182 |
else:
|
| 183 |
student_dict.update(general_words)
|
| 184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
# Sorting and trimming dictionaries
|
| 186 |
dict_list = dict_to_list(teacher_dict, 'general') + dict_to_list(student_dict, 'general')
|
| 187 |
uptake_dict_list = dict_to_list(uptake_teacher_dict, 'teacher')
|
|
|
|
| 216 |
def __repr__(self):
|
| 217 |
return f"Transcript(utterances={self.utterances}, custom_params={self.params})"
|
| 218 |
|
|
|
|
| 219 |
class QuestionModel:
|
| 220 |
def __init__(self, device, tokenizer, input_builder, max_length=300, path=QUESTION_MODEL):
|
| 221 |
print("Loading models...")
|
|
|
|
| 367 |
token_type_ids=instance["token_type_ids"])
|
| 368 |
return output
|
| 369 |
|
| 370 |
+
def dict_to_list(d, category):
|
| 371 |
+
combined_dict = Counter()
|
| 372 |
+
for word, count in d.items():
|
| 373 |
+
singular_word = plural_to_singular(word)
|
| 374 |
+
combined_dict[singular_word] += count
|
| 375 |
+
return [{'text': word, 'value': count, 'category': category} for word, count in combined_dict.items()]
|
| 376 |
+
|
| 377 |
def load_math_terms():
|
| 378 |
math_regexes = []
|
| 379 |
math_terms_dict = {}
|
|
|
|
| 392 |
sorted_regexes = sorted(math_regexes, key=len, reverse=True)
|
| 393 |
teacher_math_word_cloud = {}
|
| 394 |
student_math_word_cloud = {}
|
| 395 |
+
|
| 396 |
for i, utt in enumerate(transcript.utterances):
|
| 397 |
text = utt.get_clean_text(remove_punct=True)
|
| 398 |
num_matches = 0
|
|
|
|
| 417 |
num_matches += len(matches)
|
| 418 |
utt.num_math_terms = num_matches
|
| 419 |
utt.math_terms = list(match_list)
|
| 420 |
+
|
| 421 |
+
# Initialize lists
|
| 422 |
teacher_dict_list = []
|
| 423 |
student_dict_list = []
|
| 424 |
dict_list = []
|
| 425 |
+
|
| 426 |
+
# Process teacher_math_word_cloud
|
| 427 |
+
teacher_dict_list = dict_to_list(teacher_math_word_cloud, 'math')
|
| 428 |
+
dict_list.extend(teacher_dict_list)
|
| 429 |
+
|
| 430 |
+
# Process student_math_word_cloud
|
| 431 |
+
student_dict_list = dict_to_list(student_math_word_cloud, 'math')
|
| 432 |
+
dict_list.extend(student_dict_list)
|
| 433 |
+
|
| 434 |
+
# Sort the lists
|
| 435 |
sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
|
| 436 |
sorted_teacher_dict_list = sorted(teacher_dict_list, key=lambda x: x['value'], reverse=True)
|
| 437 |
sorted_student_dict_list = sorted(student_dict_list, key=lambda x: x['value'], reverse=True)
|
| 438 |
+
|
| 439 |
+
# Return the sorted lists
|
| 440 |
return sorted_dict_list[:50], sorted_teacher_dict_list[:50], sorted_student_dict_list[:50]
|
| 441 |
|
| 442 |
class EndpointHandler():
|