| | import logging |
| | from time import perf_counter |
| |
|
| | from baseHandler import BaseHandler |
| | from funasr import AutoModel |
| | import numpy as np |
| | from rich.console import Console |
| | import torch |
| |
|
| | logging.basicConfig( |
| | format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", |
| | ) |
| | logger = logging.getLogger(__name__) |
| |
|
| | console = Console() |
| |
|
| |
|
| | class ParaformerSTTHandler(BaseHandler): |
| | """ |
| | Handles the Speech To Text generation using a Paraformer model. |
| | The default for this model is set to Chinese. |
| | This model was contributed by @wuhongsheng. |
| | """ |
| |
|
| | def setup( |
| | self, |
| | model_name="paraformer-zh", |
| | device="cuda", |
| | gen_kwargs={}, |
| | ): |
| | print(model_name) |
| | if len(model_name.split("/")) > 1: |
| | model_name = model_name.split("/")[-1] |
| | self.device = device |
| | self.model = AutoModel(model=model_name, device=device) |
| | self.warmup() |
| |
|
| | def warmup(self): |
| | logger.info(f"Warming up {self.__class__.__name__}") |
| |
|
| | |
| | n_steps = 1 |
| | dummy_input = np.array([0] * 512, dtype=np.float32) |
| | for _ in range(n_steps): |
| | _ = self.model.generate(dummy_input)[0]["text"].strip().replace(" ", "") |
| |
|
| | def process(self, spoken_prompt): |
| | logger.debug("infering paraformer...") |
| |
|
| | global pipeline_start |
| | pipeline_start = perf_counter() |
| |
|
| | pred_text = ( |
| | self.model.generate(spoken_prompt)[0]["text"].strip().replace(" ", "") |
| | ) |
| | torch.mps.empty_cache() |
| |
|
| | logger.debug("finished paraformer inference") |
| | console.print(f"[yellow]USER: {pred_text}") |
| |
|
| | yield pred_text |
| |
|