Ashlee Kupor commited on
Commit ·
a3559d1
1
Parent(s): f9c7670
Decomp to get utterances list
Browse files- handler.py +18 -15
handler.py
CHANGED
|
@@ -215,22 +215,10 @@ class EndpointHandler():
|
|
| 215 |
{'speaker': utterance.speaker, 'data': utterance.text, 'time': utterance.starttime}]}])
|
| 216 |
return formatted
|
| 217 |
|
| 218 |
-
def
|
| 219 |
-
''' data_file is a str pointing to filename of type .vtt '''
|
| 220 |
-
|
| 221 |
-
# deserialize incoming request
|
| 222 |
-
data_file = data.pop("data_file", None)
|
| 223 |
-
model_id = data.pop("model_id", None)
|
| 224 |
-
|
| 225 |
-
if data_file is None:
|
| 226 |
-
raise ValueError("no data file provided")
|
| 227 |
-
|
| 228 |
-
full_transcript = self.process_vtt_transcript(data_file)
|
| 229 |
-
utterances_list = []
|
| 230 |
-
utterances_indexes = [] # entry corresponds to utterance in full_transcript
|
| 231 |
for i in range(len(full_transcript)):
|
| 232 |
utterance = full_transcript[i]
|
| 233 |
-
#TODO: filter out to only have SL utterances
|
| 234 |
if model_id == 'eliciting':
|
| 235 |
utterance_str, is_list = self.eliciting_utterance_to_str(utterance)
|
| 236 |
elif model_id == 'connecting':
|
|
@@ -241,7 +229,7 @@ class EndpointHandler():
|
|
| 241 |
utterance_str, is_list = self.adding_on_utterance_to_str(utterance)
|
| 242 |
else:
|
| 243 |
raise ValueError("model id not found")
|
| 244 |
-
|
| 245 |
if is_list == 'list':
|
| 246 |
utterances_list.extend(utterance_str)
|
| 247 |
for j in range(len(utterance_str)):
|
|
@@ -249,7 +237,22 @@ class EndpointHandler():
|
|
| 249 |
else:
|
| 250 |
utterances_list.append(utterance_str)
|
| 251 |
utterances_indexes.append(i)
|
|
|
|
| 252 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
talk_move = ""
|
| 254 |
cuda_available = torch.cuda.is_available()
|
| 255 |
if model_id == 'eliciting':
|
|
|
|
| 215 |
{'speaker': utterance.speaker, 'data': utterance.text, 'time': utterance.starttime}]}])
|
| 216 |
return formatted
|
| 217 |
|
| 218 |
+
def get_utterances_list(self, full_transcript, utterances_list, utterances_indexes, model_id):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
for i in range(len(full_transcript)):
|
| 220 |
utterance = full_transcript[i]
|
| 221 |
+
#TODO: filter out to only have SL utterances
|
| 222 |
if model_id == 'eliciting':
|
| 223 |
utterance_str, is_list = self.eliciting_utterance_to_str(utterance)
|
| 224 |
elif model_id == 'connecting':
|
|
|
|
| 229 |
utterance_str, is_list = self.adding_on_utterance_to_str(utterance)
|
| 230 |
else:
|
| 231 |
raise ValueError("model id not found")
|
| 232 |
+
|
| 233 |
if is_list == 'list':
|
| 234 |
utterances_list.extend(utterance_str)
|
| 235 |
for j in range(len(utterance_str)):
|
|
|
|
| 237 |
else:
|
| 238 |
utterances_list.append(utterance_str)
|
| 239 |
utterances_indexes.append(i)
|
| 240 |
+
return utterances_list, utterances_indexes
|
| 241 |
|
| 242 |
+
def __call__(self, data: str) -> List[Dict[str, Any]]:
|
| 243 |
+
''' data_file is a str pointing to filename of type .vtt '''
|
| 244 |
+
|
| 245 |
+
# deserialize incoming request
|
| 246 |
+
data_file = data.pop("data_file", None)
|
| 247 |
+
model_id = data.pop("model_id", None)
|
| 248 |
+
|
| 249 |
+
if data_file is None:
|
| 250 |
+
raise ValueError("no data file provided")
|
| 251 |
+
|
| 252 |
+
full_transcript = self.process_vtt_transcript(data_file)
|
| 253 |
+
# utterances_indexes entry corresponds to utterance in full_transcript
|
| 254 |
+
utterances_list, utterances_indexes = self.get_utterances_list(full_transcript, [], [], model_id)
|
| 255 |
+
|
| 256 |
talk_move = ""
|
| 257 |
cuda_available = torch.cuda.is_available()
|
| 258 |
if model_id == 'eliciting':
|