Ashlee Kupor commited on
Commit
a3559d1
·
1 Parent(s): f9c7670

Decomp to get utterances list

Browse files
Files changed (1) hide show
  1. handler.py +18 -15
handler.py CHANGED
@@ -215,22 +215,10 @@ class EndpointHandler():
215
  {'speaker': utterance.speaker, 'data': utterance.text, 'time': utterance.starttime}]}])
216
  return formatted
217
 
218
- def __call__(self, data: str) -> List[Dict[str, Any]]:
219
- ''' data_file is a str pointing to filename of type .vtt '''
220
-
221
- # deserialize incoming request
222
- data_file = data.pop("data_file", None)
223
- model_id = data.pop("model_id", None)
224
-
225
- if data_file is None:
226
- raise ValueError("no data file provided")
227
-
228
- full_transcript = self.process_vtt_transcript(data_file)
229
- utterances_list = []
230
- utterances_indexes = [] # entry corresponds to utterance in full_transcript
231
  for i in range(len(full_transcript)):
232
  utterance = full_transcript[i]
233
- #TODO: filter out to only have SL utterances
234
  if model_id == 'eliciting':
235
  utterance_str, is_list = self.eliciting_utterance_to_str(utterance)
236
  elif model_id == 'connecting':
@@ -241,7 +229,7 @@ class EndpointHandler():
241
  utterance_str, is_list = self.adding_on_utterance_to_str(utterance)
242
  else:
243
  raise ValueError("model id not found")
244
-
245
  if is_list == 'list':
246
  utterances_list.extend(utterance_str)
247
  for j in range(len(utterance_str)):
@@ -249,7 +237,22 @@ class EndpointHandler():
249
  else:
250
  utterances_list.append(utterance_str)
251
  utterances_indexes.append(i)
 
252
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  talk_move = ""
254
  cuda_available = torch.cuda.is_available()
255
  if model_id == 'eliciting':
 
215
  {'speaker': utterance.speaker, 'data': utterance.text, 'time': utterance.starttime}]}])
216
  return formatted
217
 
218
+ def get_utterances_list(self, full_transcript, utterances_list, utterances_indexes, model_id):
 
 
 
 
 
 
 
 
 
 
 
 
219
  for i in range(len(full_transcript)):
220
  utterance = full_transcript[i]
221
+ #TODO: filter out to only have SL utterances
222
  if model_id == 'eliciting':
223
  utterance_str, is_list = self.eliciting_utterance_to_str(utterance)
224
  elif model_id == 'connecting':
 
229
  utterance_str, is_list = self.adding_on_utterance_to_str(utterance)
230
  else:
231
  raise ValueError("model id not found")
232
+
233
  if is_list == 'list':
234
  utterances_list.extend(utterance_str)
235
  for j in range(len(utterance_str)):
 
237
  else:
238
  utterances_list.append(utterance_str)
239
  utterances_indexes.append(i)
240
+ return utterances_list, utterances_indexes
241
 
242
+ def __call__(self, data: str) -> List[Dict[str, Any]]:
243
+ ''' data_file is a str pointing to filename of type .vtt '''
244
+
245
+ # deserialize incoming request
246
+ data_file = data.pop("data_file", None)
247
+ model_id = data.pop("model_id", None)
248
+
249
+ if data_file is None:
250
+ raise ValueError("no data file provided")
251
+
252
+ full_transcript = self.process_vtt_transcript(data_file)
253
+ # utterances_indexes entry corresponds to utterance in full_transcript
254
+ utterances_list, utterances_indexes = self.get_utterances_list(full_transcript, [], [], model_id)
255
+
256
  talk_move = ""
257
  cuda_available = torch.cuda.is_available()
258
  if model_id == 'eliciting':