raphaelbiojout commited on
Commit
c2bb0db
·
1 Parent(s): 41738b8
Files changed (1) hide show
  1. handler.py +47 -12
handler.py CHANGED
@@ -162,10 +162,10 @@ def display_gpu_infos():
162
  if not torch.cuda.is_available():
163
  return "NO CUDA"
164
 
165
- infos = "torch.cuda.current_device(): " + str(torch.cuda.current_device()) + "\n"
166
- infos = infos + "torch.cuda.device(0): " + str(torch.cuda.device(0)) + "\n"
167
- infos = infos + "torch.cuda.device_count(): " + str(torch.cuda.device_count()) + "\n"
168
- infos = infos + "torch.cuda.get_device_name(0): " + str(torch.cuda.get_device_name(0)) + "\n"
169
  return infos
170
 
171
  class EndpointHandler():
@@ -192,8 +192,13 @@ class EndpointHandler():
192
  Return:
193
  A :obj:`dict`:. base64 encoded image
194
  """
 
 
195
 
196
- logger.info("--------------- CUDA ------------------------")
 
 
 
197
  logger.info(display_gpu_infos())
198
 
199
  # 1. process input
@@ -227,18 +232,36 @@ class EndpointHandler():
227
  # audio_nparray = ffmpeg_read(inputs, SAMPLE_RATE)
228
  # audio_tensor= torch.from_numpy(audio_nparray)
229
 
 
 
 
 
 
 
 
 
 
230
  # 2. transcribe
231
- device, batch_size, compute_type, whisper_model = whisper_config()
232
  logger.info("--------------- STARTING TRANSCRIPTION ------------------------")
233
- logger.info(f"device: {device}, batch_size: {batch_size}, compute_type:{compute_type}, whisper_model: {whisper_model}")
234
  transcription = self.model.transcribe(audio_nparray, batch_size=batch_size,language=language)
235
  if info:
236
  print(transcription["segments"][0:10000]) # before alignment
237
  logger.info(transcription["segments"][0:10000])
238
- if len(transcription["segments"]) == 0:
 
 
 
239
  logger.warning("No transcription")
240
  return {"transcription": transcription["segments"]}
241
 
 
 
 
 
 
 
 
 
242
  # 3. align
243
  if alignment:
244
  logger.info("--------------- STARTING ALIGNMENT ------------------------")
@@ -249,6 +272,14 @@ class EndpointHandler():
249
  if info:
250
  print(transcription["segments"][0:10000])
251
  logger.info(transcription["segments"][0:10000])
 
 
 
 
 
 
 
 
252
 
253
  # 4. Assign speaker labels
254
  logger.info("--------------- STARTING DIARIZATION ------------------------")
@@ -263,11 +294,15 @@ class EndpointHandler():
263
  if info:
264
  print(transcription["segments"][0:10000])
265
  logger.info(transcription["segments"][0:10000]) # segments are now assigned speaker IDs
266
-
267
- if torch.cuda.is_available():
268
- logger.info("--------------- GPU AFTER ------------------------")
269
- logger.info(display_gpu_infos())
270
 
 
 
 
 
 
 
 
 
271
  # results_json = json.dumps(results)
272
  # return {"results": results_json}
273
  return {"transcription": transcription["segments"]}
 
162
  if not torch.cuda.is_available():
163
  return "NO CUDA"
164
 
165
+ infos = "torch.cuda.current_device(): " + str(torch.cuda.current_device()) + ", "
166
+ infos = infos + "torch.cuda.device(0): " + str(torch.cuda.device(0)) + ", "
167
+ infos = infos + "torch.cuda.device_count(): " + str(torch.cuda.device_count()) + ", "
168
+ infos = infos + "torch.cuda.get_device_name(0): " + str(torch.cuda.get_device_name(0))
169
  return infos
170
 
171
  class EndpointHandler():
 
192
  Return:
193
  A :obj:`dict`:. base64 encoded image
194
  """
195
+ # get the start time
196
+ st = time.time()
197
 
198
+
199
+ logger.info("--------------- CONFIGURATION ------------------------")
200
+ device, batch_size, compute_type, whisper_model = whisper_config()
201
+ logger.info(f"device: {device}, batch_size: {batch_size}, compute_type:{compute_type}, whisper_model: {whisper_model}")
202
  logger.info(display_gpu_infos())
203
 
204
  # 1. process input
 
232
  # audio_nparray = ffmpeg_read(inputs, SAMPLE_RATE)
233
  # audio_tensor= torch.from_numpy(audio_nparray)
234
 
235
+ # get the end time
236
+ et = time.time()
237
+
238
+ # get the execution time
239
+ elapsed_time = et - st
240
+ logger.info(f"TIME for audio processing : {elapsed_time:.2f} seconds")
241
+ if info:
242
+ print(f"TIME for audio processing : {elapsed_time:.2f} seconds")
243
+
244
  # 2. transcribe
 
245
  logger.info("--------------- STARTING TRANSCRIPTION ------------------------")
 
246
  transcription = self.model.transcribe(audio_nparray, batch_size=batch_size,language=language)
247
  if info:
248
  print(transcription["segments"][0:10000]) # before alignment
249
  logger.info(transcription["segments"][0:10000])
250
+
251
+ try:
252
+ first_text = transcription["segments"][0]["text"]
253
+ except:
254
  logger.warning("No transcription")
255
  return {"transcription": transcription["segments"]}
256
 
257
+ # get the execution time
258
+ et = time.time()
259
+ elapsed_time = et - st
260
+ st = time.time()
261
+ logger.info(f"TIME for audio transcription : {elapsed_time:.2f} seconds")
262
+ if info:
263
+ print(f"TIME for audio transcription : {elapsed_time:.2f} seconds")
264
+
265
  # 3. align
266
  if alignment:
267
  logger.info("--------------- STARTING ALIGNMENT ------------------------")
 
272
  if info:
273
  print(transcription["segments"][0:10000])
274
  logger.info(transcription["segments"][0:10000])
275
+
276
+ # get the execution time
277
+ et = time.time()
278
+ elapsed_time = et - st
279
+ st = time.time()
280
+ logger.info(f"TIME for alignment : {elapsed_time:.2f} seconds")
281
+ if info:
282
+ print(f"TIME for alignment : {elapsed_time:.2f} seconds")
283
 
284
  # 4. Assign speaker labels
285
  logger.info("--------------- STARTING DIARIZATION ------------------------")
 
294
  if info:
295
  print(transcription["segments"][0:10000])
296
  logger.info(transcription["segments"][0:10000]) # segments are now assigned speaker IDs
 
 
 
 
297
 
298
+ # get the execution time
299
+ et = time.time()
300
+ elapsed_time = et - st
301
+ st = time.time()
302
+ logger.info(f"TIME for audio diarization : {elapsed_time:.2f} seconds")
303
+ if info:
304
+ print(f"TIME for audio diarization : {elapsed_time:.2f} seconds")
305
+
306
  # results_json = json.dumps(results)
307
  # return {"results": results_json}
308
  return {"transcription": transcription["segments"]}