| from pathlib import Path | |
| import numpy as np | |
| from lib.utils import Timer | |
| from s2ts import S2TS | |
| from s2ts import TaskExecInfo as CTaskExecInfo | |
| MODEL_DIR = Path(r"D:\yujuan\yoyo-translator-win\models\whisper-large-v3-turbo-int8") | |
| class WhisperOv: | |
| def __init__(self, model_dir=MODEL_DIR): | |
| with Timer("load Whisper"): | |
| self.instance = S2TS() | |
| ret = self.instance.start_asr_genai("en", str(model_dir), False, "") | |
| print(f"model load {'success' if ret else 'failed'}") | |
| self._warm_up() | |
| def _warm_up(self): | |
| fake_audio = np.random.randn(16000).astype(np.float32) | |
| self.transcribe(fake_audio, "en") | |
| def transcribe(self, audio: np.ndarray, language): | |
| task_info = CTaskExecInfo() | |
| task_info.audio_data = audio.tolist() | |
| task_info.audio_language = language | |
| with Timer("Whisper inference") as t: | |
| self.instance.put_asr(task_info) | |
| res: CTaskExecInfo = self.instance.get_asr(0) | |
| return "".join(word.text for word in res.words), t.duration | |