zou8944
/

asr-model

Model card Files Files and versions

xet

Community

zouguodong commited on Jul 24, 2024

Commit

5a7eea4

1 Parent(s): 4ed889c

feat: add handler.py

Browse files

Files changed (2) hide show

.gitignore +1 -0
handler.py +43 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .idea

handler.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import json
+from typing import Dict
+from faster_whisper import WhisperModel
+from transformers.pipelines.audio_utils import ffmpeg_read
+SAMPLE_RATE = 16000
+class EndpointHandler():
+    def __init__(self):
+        # load the model
+        self.model = WhisperModel("distil-large-v3", device="cuda", compute_type="float16")
+    def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
+        """
+        Args:
+            data (:obj:):
+                includes the deserialized audio file as bytes
+        Return:
+            A :obj:`dict`:. base64 encoded image
+        """
+        # process input
+        inputs = data.pop("inputs", data)
+        audio_nparray = ffmpeg_read(inputs, SAMPLE_RATE)
+        segments, info = self.model.transcribe(audio_nparray, beam_size=5,
+                                               language="en",
+                                               condition_on_previous_text=False,
+                                               word_timestamps=True)
+        result = []
+        for segment in segments:
+            print(f"Segment: {segment.start} - {segment.end}: {segment.text}\n")
+            result.append({
+                "start": round(segment.start, 2),
+                "end": round(segment.end, 2),
+                "text": segment.text,
+                "words": [{
+                    "start": round(word.start, 2),
+                    "end": round(word.end, 2),
+                    "word": word.word
+                } for word in segment.words]
+            })
+        return {"text": json.dumps(result, indent=2, ensure_ascii=False)}