aekupor
/

talk-move-router

Model card Files Files and versions

xet

Community

aekupor commited on Apr 19, 2023

Commit

c2d84ec

1 Parent(s): 51e0034

Add probing

Browse files

Files changed (1) hide show

handler.py +57 -0

handler.py CHANGED Viewed

@@ -48,6 +48,53 @@ class EndpointHandler():
         if len(doc) > token_limit:
             return self.handle_long_utterances(doc)
         return utterance.text
     def handle_long_utterances(self, doc: str) -> List[str]:
         split_count = 1
@@ -153,6 +200,12 @@ class EndpointHandler():
                 utterances_list.append(self.eliciting_utterance_to_str(utterance))
             elif model_id == 'connecting':
                 utterances_list.append(self.connecting_utterance_to_str(utterance))
         cuda_available = torch.cuda.is_available()
         if model_id == 'eliciting':
@@ -163,6 +216,10 @@ class EndpointHandler():
             self.model = ClassificationModel(
                 "roberta", "aekupor/connecting", use_cuda=cuda_available
             )
         else:
             raise ValueError(f"model_id: {model_id} is not valid. Available models are: {list(self.multi_model.keys())}")

         if len(doc) > token_limit:
             return self.handle_long_utterances(doc)
         return utterance.text
+    def probing_utterance_to_str(self, utterance: Utterance) -> str:
+        #probing using prior text and truncates end of the prior text
+        doc = nlp(utterance.text)
+        prior_text = self.truncate_end(self.get_prior_text(utterance))
+        if len(doc) > token_limit:
+            utterance_text_list = self.handle_long_utterances(doc)
+            utterance_with_prior_text = []
+            for text in utterance_text_list:
+                utterance_with_prior_text.append([prior_text, text])
+            return utterance_with_prior_text, 'list'
+        else:
+            return [prior_text, utterance.text], 'single'
+    def truncate_end(self, prior_text: str) -> str:
+        max_seq_length = 512
+        prior_text_max_length = int(max_seq_length / 2) #divide by 2 because 2 columns
+        if len(prior_text) > prior_text_max_length:
+            starting_index = len(prior_text) - prior_text_max_length
+            return prior_text[starting_index:]
+        return prior_text
+    def format_speaker(self, speaker: str, source: str) -> str:
+        prior_text = ''
+        if speaker == 'student':
+            prior_text += '***STUDENT '
+        else:
+            prior_text += '***SECTION_LEADER '
+        if source == 'not chat':
+            prior_text += '(audio)*** : '
+        else:
+            prior_text += '(chat)*** : '
+        return prior_text
+    def get_prior_text(self, utterance: Utterance) -> str:
+        prior_text = ''
+        if utterance.prev_utterance != None and utterance.prev_prev_utterance != None:
+            #TODO: add in the source
+            prior_text = '\"' + self.format_speaker(utterance.prev_prev_utterance.speaker, 'not chat') + utterance.prev_prev_utterance.text + ' \n '
+            prior_text += self.format_speaker(utterance.prev_utterance.speaker, 'not chat') + utterance.prev_utterance.text + ' \n '
+        else:
+            prior_text = 'No prior utterance'
+        return prior_text
     def handle_long_utterances(self, doc: str) -> List[str]:
         split_count = 1
                 utterances_list.append(self.eliciting_utterance_to_str(utterance))
             elif model_id == 'connecting':
                 utterances_list.append(self.connecting_utterance_to_str(utterance))
+            elif model_id == 'probing':
+                utterance_str, is_list = self.probing_utterance_to_str(utterance)
+                if is_list == 'list':
+                    utterances_list.extend(utterance_str)
+                else:
+                    utterances_list.append(utterance_str)
         cuda_available = torch.cuda.is_available()
         if model_id == 'eliciting':
             self.model = ClassificationModel(
                 "roberta", "aekupor/connecting", use_cuda=cuda_available
             )
+        elif model_id == 'probing':
+            self.model = ClassificationModel(
+                "roberta", "aekupor/probing", use_cuda=cuda_available
+            )
         else:
             raise ValueError(f"model_id: {model_id} is not valid. Available models are: {list(self.multi_model.keys())}")