stanford-nlpxed
/

transcript-analysis

Model card Files Files and versions

xet

Community

ikarasz commited on Feb 22, 2025

Commit

9b16b70

1 Parent(s): 199c4b9

handle plurals of math words

Browse files

Files changed (1) hide show

handler.py +22 -18

handler.py CHANGED Viewed

@@ -95,7 +95,6 @@ class Utterance:
                f"text='{self.text}', uid={self.uid}," \
                f"starttime={self.starttime}, endtime={self.endtime}, props={self.props})"
 class Transcript:
     def __init__(self, **kwargs):
         self.utterances = []
@@ -183,13 +182,6 @@ class Transcript:
             else:
                 student_dict.update(general_words)
-        def dict_to_list(d, category):
-            combined_dict = Counter()
-            for word, count in d.items():
-                singular_word = plural_to_singular(word)
-                combined_dict[singular_word] += count
-            return [{'text': word, 'value': count, 'category': category} for word, count in combined_dict.items()]
         # Sorting and trimming dictionaries
         dict_list = dict_to_list(teacher_dict, 'general') + dict_to_list(student_dict, 'general')
         uptake_dict_list = dict_to_list(uptake_teacher_dict, 'teacher')
@@ -224,7 +216,6 @@ class Transcript:
     def __repr__(self):
         return f"Transcript(utterances={self.utterances}, custom_params={self.params})"
 class QuestionModel:
     def __init__(self, device, tokenizer, input_builder, max_length=300, path=QUESTION_MODEL):
         print("Loading models...")
@@ -376,6 +367,13 @@ class FocusingQuestionModel:
                             token_type_ids=instance["token_type_ids"])
         return output
 def load_math_terms():
     math_regexes = []
     math_terms_dict = {}
@@ -394,6 +392,7 @@ def run_math_density(transcript):
     sorted_regexes = sorted(math_regexes, key=len, reverse=True)
     teacher_math_word_cloud = {}
     student_math_word_cloud = {}
     for i, utt in enumerate(transcript.utterances):
         text = utt.get_clean_text(remove_punct=True)
         num_matches = 0
@@ -418,21 +417,26 @@ def run_math_density(transcript):
             num_matches += len(matches)
         utt.num_math_terms = num_matches
         utt.math_terms = list(match_list)
     teacher_dict_list = []
     student_dict_list = []
     dict_list = []
-    for word in teacher_math_word_cloud.keys():
-        teacher_dict_list.append(
-            {'text': word, 'value': teacher_math_word_cloud[word], 'category': "math"})
-        dict_list.append({'text': word, 'value': teacher_math_word_cloud[word], 'category': "math"})
-    for word in student_math_word_cloud.keys():
-        student_dict_list.append(
-            {'text': word, 'value': student_math_word_cloud[word], 'category': "math"})
-        dict_list.append({'text': word, 'value': student_math_word_cloud[word], 'category': "math"})
     sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
     sorted_teacher_dict_list = sorted(teacher_dict_list, key=lambda x: x['value'], reverse=True)
     sorted_student_dict_list = sorted(student_dict_list, key=lambda x: x['value'], reverse=True)
-    # return sorted_dict_list[:50]
     return sorted_dict_list[:50], sorted_teacher_dict_list[:50], sorted_student_dict_list[:50]
 class EndpointHandler():

                f"text='{self.text}', uid={self.uid}," \
                f"starttime={self.starttime}, endtime={self.endtime}, props={self.props})"
 class Transcript:
     def __init__(self, **kwargs):
         self.utterances = []
             else:
                 student_dict.update(general_words)
         # Sorting and trimming dictionaries
         dict_list = dict_to_list(teacher_dict, 'general') + dict_to_list(student_dict, 'general')
         uptake_dict_list = dict_to_list(uptake_teacher_dict, 'teacher')
     def __repr__(self):
         return f"Transcript(utterances={self.utterances}, custom_params={self.params})"
 class QuestionModel:
     def __init__(self, device, tokenizer, input_builder, max_length=300, path=QUESTION_MODEL):
         print("Loading models...")
                             token_type_ids=instance["token_type_ids"])
         return output
+def dict_to_list(d, category):
+    combined_dict = Counter()
+    for word, count in d.items():
+        singular_word = plural_to_singular(word)
+        combined_dict[singular_word] += count
+    return [{'text': word, 'value': count, 'category': category} for word, count in combined_dict.items()]
 def load_math_terms():
     math_regexes = []
     math_terms_dict = {}
     sorted_regexes = sorted(math_regexes, key=len, reverse=True)
     teacher_math_word_cloud = {}
     student_math_word_cloud = {}
     for i, utt in enumerate(transcript.utterances):
         text = utt.get_clean_text(remove_punct=True)
         num_matches = 0
             num_matches += len(matches)
         utt.num_math_terms = num_matches
         utt.math_terms = list(match_list)
+    # Initialize lists
     teacher_dict_list = []
     student_dict_list = []
     dict_list = []
+    # Process teacher_math_word_cloud
+    teacher_dict_list = dict_to_list(teacher_math_word_cloud, 'math')
+    dict_list.extend(teacher_dict_list)
+    # Process student_math_word_cloud
+    student_dict_list = dict_to_list(student_math_word_cloud, 'math')
+    dict_list.extend(student_dict_list)
+    # Sort the lists
     sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
     sorted_teacher_dict_list = sorted(teacher_dict_list, key=lambda x: x['value'], reverse=True)
     sorted_student_dict_list = sorted(student_dict_list, key=lambda x: x['value'], reverse=True)
+    # Return the sorted lists
     return sorted_dict_list[:50], sorted_teacher_dict_list[:50], sorted_student_dict_list[:50]
 class EndpointHandler():