Ashlee Kupor commited on
Commit ·
a1cf18b
1
Parent(s): c0139e5
Add truncate end of prior text
Browse files- handler.py +12 -3
handler.py
CHANGED
|
@@ -31,10 +31,10 @@ class EndpointHandler():
|
|
| 31 |
)
|
| 32 |
|
| 33 |
def utterance_to_str(self, utterance: Utterance) -> (List[str], str):
|
| 34 |
-
#
|
| 35 |
|
| 36 |
doc = nlp(utterance.text)
|
| 37 |
-
prior_text = self.get_prior_text(utterance)
|
| 38 |
|
| 39 |
if len(doc) > token_limit:
|
| 40 |
utterance_text_list = self.handle_long_utterances(doc)
|
|
@@ -46,6 +46,15 @@ class EndpointHandler():
|
|
| 46 |
else:
|
| 47 |
return [prior_text, utterance.text], 'single'
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
def format_speaker(self, speaker: str, source: str) -> str:
|
| 50 |
prior_text = ''
|
| 51 |
if speaker == 'student':
|
|
@@ -165,7 +174,7 @@ class EndpointHandler():
|
|
| 165 |
utterances_list.extend(utterance_str)
|
| 166 |
else:
|
| 167 |
utterances_list.append(utterance_str)
|
| 168 |
-
|
| 169 |
predictions, raw_outputs = self.model.predict(utterances_list)
|
| 170 |
|
| 171 |
return predictions
|
|
|
|
| 31 |
)
|
| 32 |
|
| 33 |
def utterance_to_str(self, utterance: Utterance) -> (List[str], str):
|
| 34 |
+
#probing using prior text and truncates end of the prior text
|
| 35 |
|
| 36 |
doc = nlp(utterance.text)
|
| 37 |
+
prior_text = self.truncate_end(self.get_prior_text(utterance))
|
| 38 |
|
| 39 |
if len(doc) > token_limit:
|
| 40 |
utterance_text_list = self.handle_long_utterances(doc)
|
|
|
|
| 46 |
else:
|
| 47 |
return [prior_text, utterance.text], 'single'
|
| 48 |
|
| 49 |
+
def truncate_end(self, prior_text: str) -> str:
|
| 50 |
+
max_seq_length = 512
|
| 51 |
+
prior_text_max_length = int(max_seq_length / 2) #divide by 2 because 2 columns
|
| 52 |
+
|
| 53 |
+
if len(prior_text) > prior_text_max_length:
|
| 54 |
+
starting_index = len(prior_text) - prior_text_max_length
|
| 55 |
+
return prior_text[starting_index:]
|
| 56 |
+
return prior_text
|
| 57 |
+
|
| 58 |
def format_speaker(self, speaker: str, source: str) -> str:
|
| 59 |
prior_text = ''
|
| 60 |
if speaker == 'student':
|
|
|
|
| 174 |
utterances_list.extend(utterance_str)
|
| 175 |
else:
|
| 176 |
utterances_list.append(utterance_str)
|
| 177 |
+
|
| 178 |
predictions, raw_outputs = self.model.predict(utterances_list)
|
| 179 |
|
| 180 |
return predictions
|