from pywhispercpp.model import Model import numpy as np from loguru import logger from .asr_interface import ASRInterface class VoiceRecognition(ASRInterface): def __init__( self, model_name: str = "base", model_dir="asr/models", language: str = "en", print_realtime=False, print_progress=False, prompt: str = None, ) -> None: self.model = Model( model=model_name, models_dir=model_dir, language=language, print_realtime=print_realtime, print_progress=print_progress, ) self.prompt = prompt def transcribe_np(self, audio: np.ndarray) -> str: if self.prompt is not None: segments = self.model.transcribe( audio, new_segment_callback=logger.info, initial_prompt=self.prompt ) else: segments = self.model.transcribe(audio, new_segment_callback=logger.info) full_text = "" for segment in segments: full_text += segment.text return full_text