Update handler.py
Browse files- handler.py +4 -2
handler.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
# handler.py (for handling asr with faster_whisper)
|
| 2 |
from faster_whisper import WhisperModel, BatchedInferencePipeline
|
| 3 |
from typing import Any, Dict, List
|
|
|
|
| 4 |
|
| 5 |
class EndpointHandler:
|
| 6 |
|
|
@@ -19,6 +20,7 @@ class EndpointHandler:
|
|
| 19 |
"""
|
| 20 |
# process input
|
| 21 |
inputs = data.pop("inputs", data)
|
|
|
|
| 22 |
# Retrieve custom arguments, providing defaults if necessary
|
| 23 |
params = data.pop("parameters", {})
|
| 24 |
language = params.get("language", "en")
|
|
@@ -26,13 +28,13 @@ class EndpointHandler:
|
|
| 26 |
vad_params = params.get("vad_params", None)
|
| 27 |
batched = params.get("batched", True)
|
| 28 |
if batched:
|
| 29 |
-
segments, info = self.batched_model.transcribe(
|
| 30 |
language=language, # can use this to constrain language, otherwise the language is detected from first 30 seconds
|
| 31 |
vad_filter=vad_filter,
|
| 32 |
vad_parameters=vad_params
|
| 33 |
)
|
| 34 |
else:
|
| 35 |
-
segments, info = self.model.transcribe(
|
| 36 |
|
| 37 |
segments = [segment.text for segment in segments]
|
| 38 |
return " ".join(segments)
|
|
|
|
| 1 |
# handler.py (for handling asr with faster_whisper)
|
| 2 |
from faster_whisper import WhisperModel, BatchedInferencePipeline
|
| 3 |
from typing import Any, Dict, List
|
| 4 |
+
from transformers.pipelines.audio_utils import ffmpeg_read
|
| 5 |
|
| 6 |
class EndpointHandler:
|
| 7 |
|
|
|
|
| 20 |
"""
|
| 21 |
# process input
|
| 22 |
inputs = data.pop("inputs", data)
|
| 23 |
+
audio_nparray = ffmpeg_read(inputs, 16000)
|
| 24 |
# Retrieve custom arguments, providing defaults if necessary
|
| 25 |
params = data.pop("parameters", {})
|
| 26 |
language = params.get("language", "en")
|
|
|
|
| 28 |
vad_params = params.get("vad_params", None)
|
| 29 |
batched = params.get("batched", True)
|
| 30 |
if batched:
|
| 31 |
+
segments, info = self.batched_model.transcribe(audio_nparray,
|
| 32 |
language=language, # can use this to constrain language, otherwise the language is detected from first 30 seconds
|
| 33 |
vad_filter=vad_filter,
|
| 34 |
vad_parameters=vad_params
|
| 35 |
)
|
| 36 |
else:
|
| 37 |
+
segments, info = self.model.transcribe(audio_nparray, beam_size=5)
|
| 38 |
|
| 39 |
segments = [segment.text for segment in segments]
|
| 40 |
return " ".join(segments)
|