File size: 1,471 Bytes
59603db 511ff0c 59603db 1d3b1b4 59603db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
from abc import ABC, abstractmethod
from enum import Enum
import librosa
import numpy as np
from voice_dialogue.config import paths
class ASRConfigType(Enum):
"""ASR引擎类型枚举"""
FUNASR = 'funasr'
WHISPER_CPP = 'whisper_cpp'
class Language(Enum):
"""支持的语言枚举"""
AUTO = 'auto'
CHINESE = 'zh'
ENGLISH = 'en'
class ASRInterface(ABC):
"""ASR服务的抽象接口"""
supported_langs = []
def __init__(self):
warmup_audiofile = paths.AUDIO_RESOURCES_PATH / 'jfk.flac'
if warmup_audiofile.exists():
audiodata, _ = librosa.load(warmup_audiofile, sr=16000, mono=True)
else:
# 创建测试音频
audiodata = np.random.randn(16000).astype(np.float32) * 0.1 # 1秒的噪声
self.warmup_audiodata = audiodata
@abstractmethod
def setup(self, **kwargs) -> None:
"""
初始化ASR服务
Args:
**kwargs: 额外的初始化参数
"""
pass
@abstractmethod
def warmup(self) -> None:
"""预热ASR引擎"""
pass
@abstractmethod
def transcribe(self, audio_array: np.ndarray, language: str = None) -> str:
"""
将音频转换为文本
Args:
audio_array: 音频数据
language: 指定语言,如果为None则使用配置中的语言
Returns:
str: 识别结果文本
"""
pass
|