zouguodong commited on
Commit
5a7eea4
·
1 Parent(s): 4ed889c

feat: add handler.py

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. handler.py +43 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .idea
handler.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from typing import Dict
3
+
4
+ from faster_whisper import WhisperModel
5
+ from transformers.pipelines.audio_utils import ffmpeg_read
6
+
7
+ SAMPLE_RATE = 16000
8
+
9
+
10
+ class EndpointHandler():
11
+ def __init__(self):
12
+ # load the model
13
+ self.model = WhisperModel("distil-large-v3", device="cuda", compute_type="float16")
14
+
15
+ def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
16
+ """
17
+ Args:
18
+ data (:obj:):
19
+ includes the deserialized audio file as bytes
20
+ Return:
21
+ A :obj:`dict`:. base64 encoded image
22
+ """
23
+ # process input
24
+ inputs = data.pop("inputs", data)
25
+ audio_nparray = ffmpeg_read(inputs, SAMPLE_RATE)
26
+ segments, info = self.model.transcribe(audio_nparray, beam_size=5,
27
+ language="en",
28
+ condition_on_previous_text=False,
29
+ word_timestamps=True)
30
+ result = []
31
+ for segment in segments:
32
+ print(f"Segment: {segment.start} - {segment.end}: {segment.text}\n")
33
+ result.append({
34
+ "start": round(segment.start, 2),
35
+ "end": round(segment.end, 2),
36
+ "text": segment.text,
37
+ "words": [{
38
+ "start": round(word.start, 2),
39
+ "end": round(word.end, 2),
40
+ "word": word.word
41
+ } for word in segment.words]
42
+ })
43
+ return {"text": json.dumps(result, indent=2, ensure_ascii=False)}