Safetensors
wav2vec2-bert
indiejoseph commited on
Commit
f875841
·
verified ·
1 Parent(s): 7ec1172

Create handler.py

Browse files
Files changed (1) hide show
  1. handler.py +42 -0
handler.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Any
2
+ from transformers import pipeline
3
+ from pipeline import SpeechToJyutpingPipeline
4
+ from model import Wav2Vec2BertForCantonese
5
+ from transformers.pipelines import PIPELINE_REGISTRY
6
+ from transformers import Wav2Vec2CTCTokenizer, SeamlessM4TFeatureExtractor, pipeline
7
+ from model import Wav2Vec2BertForCantonese
8
+
9
+ PIPELINE_REGISTRY.register_pipeline(
10
+ "speech-to-jyutping",
11
+ pipeline_class=SpeechToJyutpingPipeline,
12
+ )
13
+
14
+
15
+ class EndpointHandler:
16
+ def __init__(self, path="hon9kon9ize/wav2vec2bert-jyutping"):
17
+ feature_extractor = SeamlessM4TFeatureExtractor.from_pretrained(path)
18
+ tokenizer = Wav2Vec2CTCTokenizer(
19
+ "vocab.json", unk_token="[UNK]", pad_token="[PAD]", word_delimiter_token="|"
20
+ )
21
+
22
+ self.pipeline = pipeline(
23
+ task="speech-to-jyutping",
24
+ model=Wav2Vec2BertForCantonese.from_pretrained(path),
25
+ feature_extractor=feature_extractor,
26
+ tokenizer=tokenizer,
27
+ )
28
+
29
+ def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
30
+ """
31
+ data args:
32
+ inputs (:obj: `str`)
33
+ Return:
34
+ A :obj:`list` | `dict`: will be serialized and returned
35
+ """
36
+ # get inputs
37
+ inputs = data.pop("inputs", data)
38
+
39
+ # run normal prediction
40
+ prediction = self.pipeline(inputs)
41
+
42
+ return prediction