lingdoc commited on
Commit
cb4b944
·
verified ·
1 Parent(s): 934e6d2

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +4 -2
handler.py CHANGED
@@ -1,6 +1,7 @@
1
  # handler.py (for handling asr with faster_whisper)
2
  from faster_whisper import WhisperModel, BatchedInferencePipeline
3
  from typing import Any, Dict, List
 
4
 
5
  class EndpointHandler:
6
 
@@ -19,6 +20,7 @@ class EndpointHandler:
19
  """
20
  # process input
21
  inputs = data.pop("inputs", data)
 
22
  # Retrieve custom arguments, providing defaults if necessary
23
  params = data.pop("parameters", {})
24
  language = params.get("language", "en")
@@ -26,13 +28,13 @@ class EndpointHandler:
26
  vad_params = params.get("vad_params", None)
27
  batched = params.get("batched", True)
28
  if batched:
29
- segments, info = self.batched_model.transcribe(inputs,
30
  language=language, # can use this to constrain language, otherwise the language is detected from first 30 seconds
31
  vad_filter=vad_filter,
32
  vad_parameters=vad_params
33
  )
34
  else:
35
- segments, info = self.model.transcribe(inputs, beam_size=5)
36
 
37
  segments = [segment.text for segment in segments]
38
  return " ".join(segments)
 
1
  # handler.py (for handling asr with faster_whisper)
2
  from faster_whisper import WhisperModel, BatchedInferencePipeline
3
  from typing import Any, Dict, List
4
+ from transformers.pipelines.audio_utils import ffmpeg_read
5
 
6
  class EndpointHandler:
7
 
 
20
  """
21
  # process input
22
  inputs = data.pop("inputs", data)
23
+ audio_nparray = ffmpeg_read(inputs, 16000)
24
  # Retrieve custom arguments, providing defaults if necessary
25
  params = data.pop("parameters", {})
26
  language = params.get("language", "en")
 
28
  vad_params = params.get("vad_params", None)
29
  batched = params.get("batched", True)
30
  if batched:
31
+ segments, info = self.batched_model.transcribe(audio_nparray,
32
  language=language, # can use this to constrain language, otherwise the language is detected from first 30 seconds
33
  vad_filter=vad_filter,
34
  vad_parameters=vad_params
35
  )
36
  else:
37
+ segments, info = self.model.transcribe(audio_nparray, beam_size=5)
38
 
39
  segments = [segment.text for segment in segments]
40
  return " ".join(segments)