hylee
commited on
Commit
·
337b02e
1
Parent(s):
f776d9e
make adjustments
Browse files- handler.py +34 -6
handler.py
CHANGED
|
@@ -66,7 +66,7 @@ class Utterance:
|
|
| 66 |
'text': self.text,
|
| 67 |
'role': self.role,
|
| 68 |
'timestamp': self.timestamp,
|
| 69 |
-
'moments': {'reasoning': self.reasoning, 'questioning': self.question, 'uptake': self.uptake},
|
| 70 |
'unitMeasure': self.unit_measure,
|
| 71 |
'aggregateUnitMeasure': self.aggregate_unit_measure,
|
| 72 |
'wordCount': self.word_count
|
|
@@ -112,44 +112,70 @@ class Transcript:
|
|
| 112 |
if ((uptake_speaker is None)):
|
| 113 |
return None
|
| 114 |
teacher_words = 0
|
|
|
|
| 115 |
student_words = 0
|
|
|
|
| 116 |
for utt in self.utterances:
|
| 117 |
if (utt.speaker == uptake_speaker):
|
| 118 |
utt.role = 'teacher'
|
| 119 |
teacher_words += utt.get_num_words()
|
|
|
|
| 120 |
else:
|
| 121 |
utt.role = 'student'
|
| 122 |
student_words += utt.get_num_words()
|
|
|
|
| 123 |
teacher_percentage = round(
|
| 124 |
(teacher_words / (teacher_words + student_words)) * 100)
|
| 125 |
student_percentage = 100 - teacher_percentage
|
| 126 |
-
|
|
|
|
|
|
|
| 127 |
|
| 128 |
def get_word_cloud_dicts(self):
|
| 129 |
teacher_dict = {}
|
| 130 |
student_dict = {}
|
|
|
|
| 131 |
for utt in self.utterances:
|
|
|
|
| 132 |
words = (utt.get_clean_text(remove_punct=True)).split(' ')
|
| 133 |
for word in words:
|
| 134 |
if utt.role == 'teacher':
|
| 135 |
if word not in teacher_dict:
|
| 136 |
teacher_dict[word] = 0
|
| 137 |
teacher_dict[word] += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
else:
|
| 139 |
if word not in student_dict:
|
| 140 |
student_dict[word] = 0
|
| 141 |
student_dict[word] += 1
|
| 142 |
dict_list = []
|
|
|
|
| 143 |
for word in teacher_dict.keys():
|
| 144 |
dict_list.append(
|
| 145 |
{'text': word, 'value': teacher_dict[word], 'category': 'teacher'})
|
|
|
|
| 146 |
for word in student_dict.keys():
|
| 147 |
dict_list.append(
|
| 148 |
{'text': word, 'value': student_dict[word], 'category': 'student'})
|
| 149 |
-
|
|
|
|
|
|
|
| 150 |
|
| 151 |
def get_talk_timeline(self):
|
| 152 |
return [utterance.to_talk_timeline_dict() for utterance in self.utterances]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
def to_dict(self):
|
| 155 |
return {
|
|
@@ -325,9 +351,11 @@ class EndpointHandler():
|
|
| 325 |
question_model = QuestionModel(
|
| 326 |
self.device, self.tokenizer, self.input_builder)
|
| 327 |
question_model.run_inference(transcript)
|
| 328 |
-
transcript.update_utterance_roles
|
|
|
|
| 329 |
talk_dist, talk_len = transcript.get_talk_distribution_and_length(uptake_speaker)
|
| 330 |
talk_timeline = transcript.get_talk_timeline()
|
| 331 |
-
|
|
|
|
| 332 |
|
| 333 |
-
return talk_dist, talk_len,
|
|
|
|
| 66 |
'text': self.text,
|
| 67 |
'role': self.role,
|
| 68 |
'timestamp': self.timestamp,
|
| 69 |
+
'moments': {'reasoning': True if self.reasoning else False, 'questioning': True if self.question else False, 'uptake': True if self.uptake else False},
|
| 70 |
'unitMeasure': self.unit_measure,
|
| 71 |
'aggregateUnitMeasure': self.aggregate_unit_measure,
|
| 72 |
'wordCount': self.word_count
|
|
|
|
| 112 |
if ((uptake_speaker is None)):
|
| 113 |
return None
|
| 114 |
teacher_words = 0
|
| 115 |
+
teacher_utt_count = 0
|
| 116 |
student_words = 0
|
| 117 |
+
student_utt_count = 0
|
| 118 |
for utt in self.utterances:
|
| 119 |
if (utt.speaker == uptake_speaker):
|
| 120 |
utt.role = 'teacher'
|
| 121 |
teacher_words += utt.get_num_words()
|
| 122 |
+
teacher_utt_count += 1
|
| 123 |
else:
|
| 124 |
utt.role = 'student'
|
| 125 |
student_words += utt.get_num_words()
|
| 126 |
+
student_utt_count += 1
|
| 127 |
teacher_percentage = round(
|
| 128 |
(teacher_words / (teacher_words + student_words)) * 100)
|
| 129 |
student_percentage = 100 - teacher_percentage
|
| 130 |
+
avg_teacher_length = teacher_words / teacher_utt_count
|
| 131 |
+
avg_student_length = student_words / student_utt_count
|
| 132 |
+
return {'talk_distribution': {'teacher': teacher_percentage, 'student': student_percentage}}, {'talk_length': {'teacher': avg_teacher_length, 'student': avg_student_length}}
|
| 133 |
|
| 134 |
def get_word_cloud_dicts(self):
|
| 135 |
teacher_dict = {}
|
| 136 |
student_dict = {}
|
| 137 |
+
uptake_teacher_dict = {}
|
| 138 |
for utt in self.utterances:
|
| 139 |
+
|
| 140 |
words = (utt.get_clean_text(remove_punct=True)).split(' ')
|
| 141 |
for word in words:
|
| 142 |
if utt.role == 'teacher':
|
| 143 |
if word not in teacher_dict:
|
| 144 |
teacher_dict[word] = 0
|
| 145 |
teacher_dict[word] += 1
|
| 146 |
+
if utt.uptake == 1:
|
| 147 |
+
if word not in uptake_teacher_dict:
|
| 148 |
+
uptake_teacher_dict[word] = 0
|
| 149 |
+
uptake_teacher_dict[word] += 1
|
| 150 |
else:
|
| 151 |
if word not in student_dict:
|
| 152 |
student_dict[word] = 0
|
| 153 |
student_dict[word] += 1
|
| 154 |
dict_list = []
|
| 155 |
+
uptake_dict_list = []
|
| 156 |
for word in teacher_dict.keys():
|
| 157 |
dict_list.append(
|
| 158 |
{'text': word, 'value': teacher_dict[word], 'category': 'teacher'})
|
| 159 |
+
uptake_dict_list.append({'text': word, 'value': uptake_teacher_dict[word], 'category': 'teacher'})
|
| 160 |
for word in student_dict.keys():
|
| 161 |
dict_list.append(
|
| 162 |
{'text': word, 'value': student_dict[word], 'category': 'student'})
|
| 163 |
+
sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
|
| 164 |
+
sorted_uptake_dict_list = sorted(uptake_dict_list, key=lambda x: x['value'], reverse=True)
|
| 165 |
+
return {'common_top_words': sorted_dict_list[:50]}, {'uptake_top_words':sorted_uptake_dict_list[:50]}
|
| 166 |
|
| 167 |
def get_talk_timeline(self):
|
| 168 |
return [utterance.to_talk_timeline_dict() for utterance in self.utterances]
|
| 169 |
+
|
| 170 |
+
def calculate_aggregate_word_count(self):
|
| 171 |
+
unit_measures = [utt.unit_measure for utt in self.utterances]
|
| 172 |
+
if None in unit_measures:
|
| 173 |
+
aggregate_word_count = 0
|
| 174 |
+
for utt in self.utterances:
|
| 175 |
+
aggregate_word_count += utt.get_num_words()
|
| 176 |
+
utt.unit_measure = utt.get_num_words()
|
| 177 |
+
utt.aggregate_unit_measure = aggregate_word_count
|
| 178 |
+
|
| 179 |
|
| 180 |
def to_dict(self):
|
| 181 |
return {
|
|
|
|
| 351 |
question_model = QuestionModel(
|
| 352 |
self.device, self.tokenizer, self.input_builder)
|
| 353 |
question_model.run_inference(transcript)
|
| 354 |
+
transcript.update_utterance_roles()
|
| 355 |
+
transcript.calculate_aggregate_word_count()
|
| 356 |
talk_dist, talk_len = transcript.get_talk_distribution_and_length(uptake_speaker)
|
| 357 |
talk_timeline = transcript.get_talk_timeline()
|
| 358 |
+
talk_moments = {"talk_moments": talk_timeline}
|
| 359 |
+
word_cloud, uptake_word_cloud = transcript.get_word_cloud_dicts()
|
| 360 |
|
| 361 |
+
return talk_dist, talk_len, talk_moments, word_cloud, uptake_word_cloud
|