test_ui / src /open_llm_vtuber /asr /whisper_cpp_asr.py
britto224's picture
Upload 130 files
5669b22 verified
from pywhispercpp.model import Model
import numpy as np
from loguru import logger
from .asr_interface import ASRInterface
class VoiceRecognition(ASRInterface):
def __init__(
self,
model_name: str = "base",
model_dir="asr/models",
language: str = "en",
print_realtime=False,
print_progress=False,
prompt: str = None,
) -> None:
self.model = Model(
model=model_name,
models_dir=model_dir,
language=language,
print_realtime=print_realtime,
print_progress=print_progress,
)
self.prompt = prompt
def transcribe_np(self, audio: np.ndarray) -> str:
if self.prompt is not None:
segments = self.model.transcribe(
audio, new_segment_callback=logger.info, initial_prompt=self.prompt
)
else:
segments = self.model.transcribe(audio, new_segment_callback=logger.info)
full_text = ""
for segment in segments:
full_text += segment.text
return full_text