liumaolin
commited on
Commit
·
ef0d09e
1
Parent(s):
025ca3f
Refactor TTS architecture: implement runtime interface, TTS manager, universal registry, and factory pattern to support multiple engines.
Browse files- src/VoiceDialogue/main.py +2 -2
- src/VoiceDialogue/models/voice_model/__init__.py +0 -19
- src/VoiceDialogue/services/audio/audio_answer.py +6 -34
- src/VoiceDialogue/services/audio/audio_generator/__init__.py +51 -0
- src/VoiceDialogue/services/audio/audio_generator/configs/__init__.py +52 -0
- src/VoiceDialogue/{models/voice_model/moyoyo_configs.py → services/audio/audio_generator/configs/moyoyo.py} +1 -1
- src/VoiceDialogue/services/audio/audio_generator/models/__init__.py +72 -0
- src/VoiceDialogue/{models/voice_model → services/audio/audio_generator/models}/base.py +0 -37
- src/VoiceDialogue/{models/voice_model/moyoyo_tts.py → services/audio/audio_generator/models/moyoyo.py} +1 -18
- src/VoiceDialogue/services/audio/audio_generator/runtime/__init__.py +32 -0
- src/VoiceDialogue/services/audio/audio_generator/runtime/interface.py +103 -0
- src/VoiceDialogue/services/audio/audio_generator/runtime/moyoyo.py +50 -0
- src/VoiceDialogue/services/audio/audio_generator/tts_manager.py +168 -0
- src/VoiceDialogue/services/audio/audio_player.py +4 -5
src/VoiceDialogue/main.py
CHANGED
|
@@ -7,7 +7,7 @@ from config.paths import load_third_party
|
|
| 7 |
|
| 8 |
load_third_party()
|
| 9 |
|
| 10 |
-
from models
|
| 11 |
from services.audio.aec_audio_capture import EchoCancellingAudioCapture
|
| 12 |
from services.audio.audio_answer import TTSAudioGenerator
|
| 13 |
from services.audio.audio_player import AudioStreamPlayer
|
|
@@ -73,7 +73,7 @@ def launch_system(
|
|
| 73 |
audio_generator_worker = TTSAudioGenerator(
|
| 74 |
processed_answer_queue=generated_answer_queue,
|
| 75 |
tts_generated_audio_queue=tts_generated_audio_queue,
|
| 76 |
-
|
| 77 |
)
|
| 78 |
audio_generator_worker.start()
|
| 79 |
threads.append(audio_generator_worker)
|
|
|
|
| 7 |
|
| 8 |
load_third_party()
|
| 9 |
|
| 10 |
+
from services.audio.audio_generator.models import tts_config_registry, TTSConfigType
|
| 11 |
from services.audio.aec_audio_capture import EchoCancellingAudioCapture
|
| 12 |
from services.audio.audio_answer import TTSAudioGenerator
|
| 13 |
from services.audio.audio_player import AudioStreamPlayer
|
|
|
|
| 73 |
audio_generator_worker = TTSAudioGenerator(
|
| 74 |
processed_answer_queue=generated_answer_queue,
|
| 75 |
tts_generated_audio_queue=tts_generated_audio_queue,
|
| 76 |
+
tts_config=tts_speaker_config
|
| 77 |
)
|
| 78 |
audio_generator_worker.start()
|
| 79 |
threads.append(audio_generator_worker)
|
src/VoiceDialogue/models/voice_model/__init__.py
DELETED
|
@@ -1,19 +0,0 @@
|
|
| 1 |
-
from .base import TTSConfigType, VoiceModelStatus, tts_config_registry
|
| 2 |
-
from .moyoyo_configs import get_moyoyo_configs
|
| 3 |
-
from .moyoyo_tts import MoYoYoTTSConfig, MoYoYoTTSInference
|
| 4 |
-
|
| 5 |
-
# 注册MoYoYo TTS
|
| 6 |
-
moyoyo_inference = MoYoYoTTSInference()
|
| 7 |
-
tts_config_registry.register_inference_engine(TTSConfigType.MOYOYO, moyoyo_inference)
|
| 8 |
-
|
| 9 |
-
# 注册所有MoYoYo配置
|
| 10 |
-
for config in get_moyoyo_configs():
|
| 11 |
-
tts_config_registry.register_config(config)
|
| 12 |
-
|
| 13 |
-
__all__ = [
|
| 14 |
-
'TTSConfigType',
|
| 15 |
-
'VoiceModelStatus',
|
| 16 |
-
'tts_config_registry',
|
| 17 |
-
'MoYoYoTTSConfig',
|
| 18 |
-
'MoYoYoTTSInference',
|
| 19 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/VoiceDialogue/services/audio/audio_answer.py
CHANGED
|
@@ -1,56 +1,28 @@
|
|
| 1 |
import time
|
| 2 |
-
import typing
|
| 3 |
from multiprocessing import Queue
|
| 4 |
from queue import Empty
|
| 5 |
|
| 6 |
-
from config.paths import load_third_party
|
| 7 |
-
|
| 8 |
-
load_third_party()
|
| 9 |
-
|
| 10 |
-
from moyoyo_tts import TTSModule, TTS_Config
|
| 11 |
-
|
| 12 |
from models.voice_task import VoiceTask
|
| 13 |
from services.core.base import BaseThread
|
| 14 |
from services.core.constants import dropped_audio_cache, user_still_speaking_event, voice_state_manager
|
| 15 |
-
from
|
| 16 |
|
| 17 |
|
| 18 |
class TTSAudioGenerator(BaseThread):
|
| 19 |
"""TTS 音频生成器 - 负责将文本转换为音频"""
|
| 20 |
|
| 21 |
def __init__(self, group=None, target=None, name=None, args=(), kwargs={}, *, daemon=None,
|
| 22 |
-
processed_answer_queue, tts_generated_audio_queue,
|
| 23 |
super().__init__(group, target, name, args, kwargs, daemon=daemon)
|
| 24 |
self.processed_answer_queue: Queue = processed_answer_queue
|
| 25 |
self.tts_generated_audio_queue: Queue = tts_generated_audio_queue
|
| 26 |
|
| 27 |
-
self.
|
| 28 |
-
self._tts_config = voice_role
|
| 29 |
-
self.tts_module: typing.Optional[TTSModule] = None
|
| 30 |
-
|
| 31 |
-
def setup_tts_config(self, voice_role: MoYoYoTTSConfig):
|
| 32 |
-
tts_config = TTS_Config(voice_role.get_runtime_config())
|
| 33 |
-
return tts_config
|
| 34 |
-
|
| 35 |
-
def warmup(self, warmup_steps=1):
|
| 36 |
-
print('[INFO:] Warming up TTS engine...')
|
| 37 |
-
warmup_texts = ['Warming up TTS engine.', '预热文字转音频引擎。']
|
| 38 |
-
for _ in range(warmup_steps):
|
| 39 |
-
for warmup_text in warmup_texts:
|
| 40 |
-
self.tts_module.generate_audio(warmup_text)
|
| 41 |
-
print('[INFO:] Warm up TTS engine finished.')
|
| 42 |
|
| 43 |
def run(self):
|
| 44 |
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
self.tts_module = TTSModule(tts_config)
|
| 48 |
-
self.tts_module.setup_inference_params(
|
| 49 |
-
ref_audio=self._tts_config.reference_audio_path,
|
| 50 |
-
parallel_infer=False,
|
| 51 |
-
**self._tts_config.inference_parameters.model_dump()
|
| 52 |
-
)
|
| 53 |
-
self.warmup()
|
| 54 |
|
| 55 |
self.is_ready = True
|
| 56 |
|
|
@@ -80,7 +52,7 @@ class TTSAudioGenerator(BaseThread):
|
|
| 80 |
continue
|
| 81 |
|
| 82 |
voice_task.tts_start_time = time.time()
|
| 83 |
-
tts_generated_sentence_audio = self.
|
| 84 |
voice_task.tts_generated_sentence_audio = tts_generated_sentence_audio
|
| 85 |
voice_task.tts_end_time = time.time()
|
| 86 |
# print(f'生成音频:{voice_task.answer_sentence}')
|
|
|
|
| 1 |
import time
|
|
|
|
| 2 |
from multiprocessing import Queue
|
| 3 |
from queue import Empty
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
from models.voice_task import VoiceTask
|
| 6 |
from services.core.base import BaseThread
|
| 7 |
from services.core.constants import dropped_audio_cache, user_still_speaking_event, voice_state_manager
|
| 8 |
+
from .audio_generator import tts_manager, BaseTTSConfig
|
| 9 |
|
| 10 |
|
| 11 |
class TTSAudioGenerator(BaseThread):
|
| 12 |
"""TTS 音频生成器 - 负责将文本转换为音频"""
|
| 13 |
|
| 14 |
def __init__(self, group=None, target=None, name=None, args=(), kwargs={}, *, daemon=None,
|
| 15 |
+
processed_answer_queue, tts_generated_audio_queue, tts_config: BaseTTSConfig):
|
| 16 |
super().__init__(group, target, name, args, kwargs, daemon=daemon)
|
| 17 |
self.processed_answer_queue: Queue = processed_answer_queue
|
| 18 |
self.tts_generated_audio_queue: Queue = tts_generated_audio_queue
|
| 19 |
|
| 20 |
+
self.tts_instance = tts_manager.create_tts(tts_config)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
def run(self):
|
| 23 |
|
| 24 |
+
self.tts_instance.setup()
|
| 25 |
+
self.tts_instance.warmup()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
self.is_ready = True
|
| 28 |
|
|
|
|
| 52 |
continue
|
| 53 |
|
| 54 |
voice_task.tts_start_time = time.time()
|
| 55 |
+
tts_generated_sentence_audio = self.tts_instance.synthesize(voice_task.answer_sentence)
|
| 56 |
voice_task.tts_generated_sentence_audio = tts_generated_sentence_audio
|
| 57 |
voice_task.tts_end_time = time.time()
|
| 58 |
# print(f'生成音频:{voice_task.answer_sentence}')
|
src/VoiceDialogue/services/audio/audio_generator/__init__.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Audio Generator Module
|
| 3 |
+
|
| 4 |
+
提供文本转语音(TTS)功能的完整解决方案,包括:
|
| 5 |
+
- TTS管理器和注册系统
|
| 6 |
+
- 多种TTS引擎支持
|
| 7 |
+
- 配置管理
|
| 8 |
+
- 运行时接口
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
from .models import (
|
| 12 |
+
TTSConfigType,
|
| 13 |
+
VoiceModelStatus,
|
| 14 |
+
tts_config_registry,
|
| 15 |
+
BaseTTSConfig
|
| 16 |
+
)
|
| 17 |
+
from .runtime import (
|
| 18 |
+
TTSInterface,
|
| 19 |
+
TTSFactory
|
| 20 |
+
)
|
| 21 |
+
from .tts_manager import (
|
| 22 |
+
TTSManager,
|
| 23 |
+
TTSRegistryTables,
|
| 24 |
+
tts_manager,
|
| 25 |
+
tts_tables,
|
| 26 |
+
register_all_tts
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
__version__ = "1.0.0"
|
| 30 |
+
|
| 31 |
+
__all__ = [
|
| 32 |
+
# 管理器和注册表
|
| 33 |
+
'TTSManager',
|
| 34 |
+
'TTSRegistryTables',
|
| 35 |
+
'tts_manager',
|
| 36 |
+
'tts_tables',
|
| 37 |
+
'register_all_tts',
|
| 38 |
+
|
| 39 |
+
# 配置模型
|
| 40 |
+
'TTSConfigType',
|
| 41 |
+
'VoiceModelStatus',
|
| 42 |
+
'tts_config_registry',
|
| 43 |
+
'BaseTTSConfig',
|
| 44 |
+
|
| 45 |
+
# 运行时接口
|
| 46 |
+
'TTSInterface',
|
| 47 |
+
'TTSFactory',
|
| 48 |
+
]
|
| 49 |
+
|
| 50 |
+
# 模块初始化时自动注册所有TTS实现
|
| 51 |
+
# register_all_tts() 已在 tts_manager 模块中自动调用
|
src/VoiceDialogue/services/audio/audio_generator/configs/__init__.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configs Module
|
| 3 |
+
|
| 4 |
+
TTS配置模块,包含:
|
| 5 |
+
- 各种TTS引擎的预配置
|
| 6 |
+
- 配置加载函数
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
# 导入配置加载函数
|
| 10 |
+
try:
|
| 11 |
+
from .moyoyo import get_moyoyo_configs
|
| 12 |
+
|
| 13 |
+
__all__ = [
|
| 14 |
+
'get_moyoyo_configs',
|
| 15 |
+
]
|
| 16 |
+
|
| 17 |
+
# 配置获取函数映射
|
| 18 |
+
CONFIG_GETTERS = {
|
| 19 |
+
'moyoyo': get_moyoyo_configs,
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
except ImportError as e:
|
| 23 |
+
import logging
|
| 24 |
+
|
| 25 |
+
logging.warning(f"Failed to import some config modules: {e}")
|
| 26 |
+
__all__ = []
|
| 27 |
+
CONFIG_GETTERS = {}
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def get_all_configs():
|
| 31 |
+
"""获取所有可用的TTS配置"""
|
| 32 |
+
all_configs = []
|
| 33 |
+
for getter_func in CONFIG_GETTERS.values():
|
| 34 |
+
try:
|
| 35 |
+
configs = getter_func()
|
| 36 |
+
all_configs.extend(configs)
|
| 37 |
+
except Exception as e:
|
| 38 |
+
import logging
|
| 39 |
+
logging.error(f"Failed to load configs from {getter_func.__name__}: {e}")
|
| 40 |
+
return all_configs
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def get_configs_by_type(tts_type: str):
|
| 44 |
+
"""根据TTS类型获取配置"""
|
| 45 |
+
if tts_type in CONFIG_GETTERS:
|
| 46 |
+
try:
|
| 47 |
+
return CONFIG_GETTERS[tts_type]()
|
| 48 |
+
except Exception as e:
|
| 49 |
+
import logging
|
| 50 |
+
logging.error(f"Failed to load configs for {tts_type}: {e}")
|
| 51 |
+
return []
|
| 52 |
+
return []
|
src/VoiceDialogue/{models/voice_model/moyoyo_configs.py → services/audio/audio_generator/configs/moyoyo.py}
RENAMED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from .
|
| 2 |
|
| 3 |
# 基础预训练模型文件映射
|
| 4 |
BASE_PRETRAINED_FILES = {
|
|
|
|
| 1 |
+
from services.audio.audio_generator.models.moyoyo import MoYoYoTTSConfig
|
| 2 |
|
| 3 |
# 基础预训练模型文件映射
|
| 4 |
BASE_PRETRAINED_FILES = {
|
src/VoiceDialogue/services/audio/audio_generator/models/__init__.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Models Module
|
| 3 |
+
|
| 4 |
+
TTS模型定义模块,包含:
|
| 5 |
+
- 基础配置抽象类
|
| 6 |
+
- 各种TTS引擎的配置模型
|
| 7 |
+
- 全局配置注册表
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from .base import (
|
| 11 |
+
TTSConfigType,
|
| 12 |
+
VoiceModelStatus,
|
| 13 |
+
BaseTTSConfig,
|
| 14 |
+
TTSConfigRegistry,
|
| 15 |
+
tts_config_registry
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
# 导入具体的配置模型
|
| 19 |
+
try:
|
| 20 |
+
from .moyoyo import MoYoYoTTSConfig
|
| 21 |
+
|
| 22 |
+
_moyoyo_available = True
|
| 23 |
+
except ImportError:
|
| 24 |
+
_moyoyo_available = False
|
| 25 |
+
import logging
|
| 26 |
+
|
| 27 |
+
logging.warning("MoYoYo TTS config not available")
|
| 28 |
+
|
| 29 |
+
# 动态构建导出列表
|
| 30 |
+
__all__ = [
|
| 31 |
+
'TTSConfigType',
|
| 32 |
+
'VoiceModelStatus',
|
| 33 |
+
'BaseTTSConfig',
|
| 34 |
+
'TTSConfigRegistry',
|
| 35 |
+
'tts_config_registry',
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
if _moyoyo_available:
|
| 39 |
+
__all__.append('MoYoYoTTSConfig')
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# 自动注册所有可用的配置
|
| 43 |
+
def _auto_register_configs():
|
| 44 |
+
"""自动注册所有TTS配置"""
|
| 45 |
+
try:
|
| 46 |
+
if _moyoyo_available:
|
| 47 |
+
from ..configs.moyoyo import get_moyoyo_configs
|
| 48 |
+
for config in get_moyoyo_configs():
|
| 49 |
+
tts_config_registry.register_config(config)
|
| 50 |
+
except Exception as e:
|
| 51 |
+
import logging
|
| 52 |
+
logging.error(f"Failed to auto-register configs: {e}")
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
# 模块加载时自动注册配置
|
| 56 |
+
_auto_register_configs()
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
# 配置统计信息
|
| 60 |
+
def get_config_stats():
|
| 61 |
+
"""获取配置统计信息"""
|
| 62 |
+
all_configs = tts_config_registry.get_all_configs()
|
| 63 |
+
stats = {
|
| 64 |
+
'total_configs': len(all_configs),
|
| 65 |
+
'configs_by_type': {}
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
for config_type in TTSConfigType:
|
| 69 |
+
type_configs = tts_config_registry.get_configs_by_type(config_type)
|
| 70 |
+
stats['configs_by_type'][config_type.value] = len(type_configs)
|
| 71 |
+
|
| 72 |
+
return stats
|
src/VoiceDialogue/{models/voice_model → services/audio/audio_generator/models}/base.py
RENAMED
|
@@ -52,36 +52,17 @@ class BaseTTSConfig(BaseModel, ABC):
|
|
| 52 |
pass
|
| 53 |
|
| 54 |
|
| 55 |
-
class BaseTTSInference(ABC):
|
| 56 |
-
"""TTS推理基类"""
|
| 57 |
-
|
| 58 |
-
@abstractmethod
|
| 59 |
-
def generate_speech(self, text: str, config: BaseTTSConfig, **kwargs) -> bytes:
|
| 60 |
-
"""生成语音"""
|
| 61 |
-
pass
|
| 62 |
-
|
| 63 |
-
@abstractmethod
|
| 64 |
-
def is_supported_config(self, config: BaseTTSConfig) -> bool:
|
| 65 |
-
"""检查是否支持此配置"""
|
| 66 |
-
pass
|
| 67 |
-
|
| 68 |
-
|
| 69 |
class TTSConfigRegistry:
|
| 70 |
"""TTS注册表,管理所有TTS引擎和配置"""
|
| 71 |
|
| 72 |
def __init__(self):
|
| 73 |
self._configs: dict[str, BaseTTSConfig] = {}
|
| 74 |
-
self._inference_engines: dict[TTSConfigType, BaseTTSInference] = {}
|
| 75 |
|
| 76 |
def register_config(self, config: BaseTTSConfig):
|
| 77 |
"""注册TTS配置"""
|
| 78 |
key = f"{config.tts_type.value}:{config.character_name}"
|
| 79 |
self._configs[key] = config
|
| 80 |
|
| 81 |
-
def register_inference_engine(self, tts_type: TTSConfigType, engine: BaseTTSInference):
|
| 82 |
-
"""注册TTS推理引擎"""
|
| 83 |
-
self._inference_engines[tts_type] = engine
|
| 84 |
-
|
| 85 |
def get_config(self, tts_type: TTSConfigType, character_name: str) -> BaseTTSConfig:
|
| 86 |
"""获取指定配置"""
|
| 87 |
key = f"{tts_type.value}:{character_name}"
|
|
@@ -96,24 +77,6 @@ class TTSConfigRegistry:
|
|
| 96 |
"""获取所有配置"""
|
| 97 |
return list(self._configs.values())
|
| 98 |
|
| 99 |
-
def get_inference_engine(self, tts_type: TTSConfigType) -> BaseTTSInference:
|
| 100 |
-
"""获取推理引擎"""
|
| 101 |
-
return self._inference_engines.get(tts_type)
|
| 102 |
-
|
| 103 |
-
def generate_speech(self, tts_type: TTSConfigType, character_name: str,
|
| 104 |
-
text: str, **kwargs) -> bytes:
|
| 105 |
-
"""生成语音的统一接口"""
|
| 106 |
-
config = self.get_config(tts_type, character_name)
|
| 107 |
-
engine = self.get_inference_engine(tts_type)
|
| 108 |
-
|
| 109 |
-
if not config or not engine:
|
| 110 |
-
raise ValueError(f"TTS配置或引擎不存在: {tts_type.value}:{character_name}")
|
| 111 |
-
|
| 112 |
-
if not engine.is_supported_config(config):
|
| 113 |
-
raise ValueError(f"推理引擎不支持此配置: {tts_type.value}:{character_name}")
|
| 114 |
-
|
| 115 |
-
return engine.generate_speech(text, config, **kwargs)
|
| 116 |
-
|
| 117 |
|
| 118 |
# 全局TTS注册表实例
|
| 119 |
tts_config_registry = TTSConfigRegistry()
|
|
|
|
| 52 |
pass
|
| 53 |
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
class TTSConfigRegistry:
|
| 56 |
"""TTS注册表,管理所有TTS引擎和配置"""
|
| 57 |
|
| 58 |
def __init__(self):
|
| 59 |
self._configs: dict[str, BaseTTSConfig] = {}
|
|
|
|
| 60 |
|
| 61 |
def register_config(self, config: BaseTTSConfig):
|
| 62 |
"""注册TTS配置"""
|
| 63 |
key = f"{config.tts_type.value}:{config.character_name}"
|
| 64 |
self._configs[key] = config
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
def get_config(self, tts_type: TTSConfigType, character_name: str) -> BaseTTSConfig:
|
| 67 |
"""获取指定配置"""
|
| 68 |
key = f"{tts_type.value}:{character_name}"
|
|
|
|
| 77 |
"""获取所有配置"""
|
| 78 |
return list(self._configs.values())
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
# 全局TTS注册表实例
|
| 82 |
tts_config_registry = TTSConfigRegistry()
|
src/VoiceDialogue/{models/voice_model/moyoyo_tts.py → services/audio/audio_generator/models/moyoyo.py}
RENAMED
|
@@ -6,7 +6,7 @@ from pydantic import BaseModel, Field
|
|
| 6 |
|
| 7 |
from config.settings import settings
|
| 8 |
from utils.download_utils import download_file_from_huggingface
|
| 9 |
-
from .base import BaseTTSConfig,
|
| 10 |
|
| 11 |
|
| 12 |
class InferenceParameters(BaseModel):
|
|
@@ -140,20 +140,3 @@ class MoYoYoTTSConfig(BaseTTSConfig):
|
|
| 140 |
'bert_base_path': self.bert_model_path,
|
| 141 |
}
|
| 142 |
}
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
class MoYoYoTTSInference(BaseTTSInference):
|
| 146 |
-
"""MoYoYo TTS推理引擎"""
|
| 147 |
-
|
| 148 |
-
def generate_speech(self, text: str, config: BaseTTSConfig, **kwargs) -> bytes:
|
| 149 |
-
"""生成语音"""
|
| 150 |
-
if not isinstance(config, MoYoYoTTSConfig):
|
| 151 |
-
raise ValueError("配置类型不匹配,需要MoYoYoTTSConfig")
|
| 152 |
-
|
| 153 |
-
# 这里实现MoYoYo TTS的具体推理逻辑
|
| 154 |
-
# 暂时返回空字节,实际实现需要调用相应的TTS模型
|
| 155 |
-
return b""
|
| 156 |
-
|
| 157 |
-
def is_supported_config(self, config: BaseTTSConfig) -> bool:
|
| 158 |
-
"""检查是否支持此配置"""
|
| 159 |
-
return isinstance(config, MoYoYoTTSConfig)
|
|
|
|
| 6 |
|
| 7 |
from config.settings import settings
|
| 8 |
from utils.download_utils import download_file_from_huggingface
|
| 9 |
+
from .base import BaseTTSConfig, TTSConfigType, VoiceModelStatus
|
| 10 |
|
| 11 |
|
| 12 |
class InferenceParameters(BaseModel):
|
|
|
|
| 140 |
'bert_base_path': self.bert_model_path,
|
| 141 |
}
|
| 142 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/VoiceDialogue/services/audio/audio_generator/runtime/__init__.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Runtime Module
|
| 3 |
+
|
| 4 |
+
TTS运行时模块,包含:
|
| 5 |
+
- TTS抽象接口定义
|
| 6 |
+
- TTS工厂类
|
| 7 |
+
- 具体TTS实现
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from .interface import TTSInterface, TTSFactory
|
| 11 |
+
|
| 12 |
+
# 导入所有TTS实现,确保注册装饰器被执行
|
| 13 |
+
try:
|
| 14 |
+
from .moyoyo import MoYoYoTTS
|
| 15 |
+
|
| 16 |
+
__all__ = [
|
| 17 |
+
'TTSInterface',
|
| 18 |
+
'TTSFactory',
|
| 19 |
+
'MoYoYoTTS'
|
| 20 |
+
]
|
| 21 |
+
except ImportError as e:
|
| 22 |
+
# 如果某些TTS实现无法导入,不影响整体功能
|
| 23 |
+
import logging
|
| 24 |
+
|
| 25 |
+
logging.warning(f"Failed to import some TTS implementations: {e}")
|
| 26 |
+
__all__ = [
|
| 27 |
+
'TTSInterface',
|
| 28 |
+
'TTSFactory'
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
# 可用的TTS实现列表
|
| 32 |
+
AVAILABLE_TTS_IMPLEMENTATIONS = [impl for impl in __all__ if impl.endswith('TTS')]
|
src/VoiceDialogue/services/audio/audio_generator/runtime/interface.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from abc import ABC, abstractmethod
|
| 2 |
+
from typing import Tuple
|
| 3 |
+
|
| 4 |
+
import numpy as np
|
| 5 |
+
|
| 6 |
+
from ..models.base import BaseTTSConfig
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class TTSInterface(ABC):
|
| 10 |
+
"""TTS服务的抽象接口"""
|
| 11 |
+
|
| 12 |
+
def __init__(self, config: BaseTTSConfig):
|
| 13 |
+
self.config = config
|
| 14 |
+
self._is_ready = False
|
| 15 |
+
|
| 16 |
+
@abstractmethod
|
| 17 |
+
def setup(self, **kwargs) -> None:
|
| 18 |
+
"""
|
| 19 |
+
初始化TTS服务
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
**kwargs: 额外的初始化参数
|
| 23 |
+
"""
|
| 24 |
+
pass
|
| 25 |
+
|
| 26 |
+
@abstractmethod
|
| 27 |
+
def warmup(self, warmup_steps: int = 1) -> None:
|
| 28 |
+
"""
|
| 29 |
+
预热TTS引擎
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
warmup_steps: 预热步数
|
| 33 |
+
"""
|
| 34 |
+
pass
|
| 35 |
+
|
| 36 |
+
@abstractmethod
|
| 37 |
+
def synthesize(self, text: str, **kwargs) -> Tuple[int, np.ndarray]:
|
| 38 |
+
"""
|
| 39 |
+
将文本转换为语音
|
| 40 |
+
|
| 41 |
+
Args:
|
| 42 |
+
text: 要转换的文本
|
| 43 |
+
**kwargs: 额外的合成参数
|
| 44 |
+
|
| 45 |
+
Returns:
|
| 46 |
+
Tuple[np.ndarray, int]: (音频数据, 采样率)
|
| 47 |
+
"""
|
| 48 |
+
pass
|
| 49 |
+
|
| 50 |
+
@property
|
| 51 |
+
def is_ready(self) -> bool:
|
| 52 |
+
"""
|
| 53 |
+
检查TTS服务是否准备就绪
|
| 54 |
+
|
| 55 |
+
Returns:
|
| 56 |
+
bool: 是否准备就绪
|
| 57 |
+
"""
|
| 58 |
+
return self._is_ready
|
| 59 |
+
|
| 60 |
+
@is_ready.setter
|
| 61 |
+
def is_ready(self, value: bool):
|
| 62 |
+
self._is_ready = value
|
| 63 |
+
|
| 64 |
+
def get_config(self) -> BaseTTSConfig:
|
| 65 |
+
"""获取当前配置"""
|
| 66 |
+
return self.config
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
class TTSFactory:
|
| 70 |
+
"""TTS工厂类,用于创建不同的TTS实现"""
|
| 71 |
+
|
| 72 |
+
_registry = {}
|
| 73 |
+
|
| 74 |
+
@classmethod
|
| 75 |
+
def register(cls, provider_name: str, tts_class):
|
| 76 |
+
"""注册TTS提供者"""
|
| 77 |
+
cls._registry[provider_name] = tts_class
|
| 78 |
+
|
| 79 |
+
@classmethod
|
| 80 |
+
def create(cls, config: BaseTTSConfig) -> TTSInterface:
|
| 81 |
+
"""
|
| 82 |
+
根据配置创建TTS实例
|
| 83 |
+
|
| 84 |
+
Args:
|
| 85 |
+
config: TTS配置
|
| 86 |
+
|
| 87 |
+
Returns:
|
| 88 |
+
TTSInterface: TTS实例
|
| 89 |
+
|
| 90 |
+
Raises:
|
| 91 |
+
ValueError: 不支持的TTS提供者
|
| 92 |
+
"""
|
| 93 |
+
provider = config.provider.value
|
| 94 |
+
if provider not in cls._registry:
|
| 95 |
+
raise ValueError(f"不支持的TTS提供者: {provider}")
|
| 96 |
+
|
| 97 |
+
tts_class = cls._registry[provider]
|
| 98 |
+
return tts_class(config)
|
| 99 |
+
|
| 100 |
+
@classmethod
|
| 101 |
+
def list_providers(cls):
|
| 102 |
+
"""列出所有已注册的TTS提供者"""
|
| 103 |
+
return list(cls._registry.keys())
|
src/VoiceDialogue/services/audio/audio_generator/runtime/moyoyo.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import typing
|
| 2 |
+
from typing import Tuple
|
| 3 |
+
|
| 4 |
+
import numpy as np
|
| 5 |
+
|
| 6 |
+
from config.paths import load_third_party
|
| 7 |
+
from .interface import TTSInterface
|
| 8 |
+
from ..models.moyoyo import MoYoYoTTSConfig
|
| 9 |
+
from ..tts_manager import tts_tables
|
| 10 |
+
|
| 11 |
+
load_third_party()
|
| 12 |
+
|
| 13 |
+
from moyoyo_tts import TTSModule, TTS_Config
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@tts_tables.register("tts_classes", "moyoyo")
|
| 17 |
+
class MoYoYoTTS(TTSInterface):
|
| 18 |
+
"""MoYoYo TTS实现"""
|
| 19 |
+
|
| 20 |
+
def __init__(self, config: MoYoYoTTSConfig):
|
| 21 |
+
super().__init__(config)
|
| 22 |
+
self.tts_module: typing.Optional[TTSModule] = None
|
| 23 |
+
|
| 24 |
+
def setup(self, **kwargs) -> None:
|
| 25 |
+
"""设置TTS模块"""
|
| 26 |
+
tts_config = TTS_Config(self.config.get_runtime_config())
|
| 27 |
+
self.tts_module = TTSModule(tts_config)
|
| 28 |
+
self.tts_module.setup_inference_params(
|
| 29 |
+
ref_audio=self.config.reference_audio_path,
|
| 30 |
+
parallel_infer=False,
|
| 31 |
+
**self.config.inference_parameters.model_dump()
|
| 32 |
+
)
|
| 33 |
+
self.is_ready = True
|
| 34 |
+
|
| 35 |
+
def warmup(self, warmup_steps: int = 1) -> None:
|
| 36 |
+
"""预热TTS引擎"""
|
| 37 |
+
print('[INFO:] Warming up TTS engine...')
|
| 38 |
+
warmup_texts = ['Warming up TTS engine.', '预热文字转音频引擎。']
|
| 39 |
+
for _ in range(warmup_steps):
|
| 40 |
+
for warmup_text in warmup_texts:
|
| 41 |
+
self.tts_module.generate_audio(warmup_text, warmup=True)
|
| 42 |
+
print('[INFO:] Warm up TTS engine finished.')
|
| 43 |
+
|
| 44 |
+
def synthesize(self, text: str, **kwargs) -> Tuple[np.ndarray, int]:
|
| 45 |
+
"""合成语音"""
|
| 46 |
+
if not self.is_ready:
|
| 47 |
+
raise RuntimeError("TTS module is not ready. Please call setup() first.")
|
| 48 |
+
|
| 49 |
+
(sample_rate, audio_data), *_ = self.tts_module.generate_audio(text)
|
| 50 |
+
return audio_data, sample_rate
|
src/VoiceDialogue/services/audio/audio_generator/tts_manager.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import inspect
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
import re
|
| 5 |
+
from typing import Dict, Type, Optional
|
| 6 |
+
from .runtime.interface import TTSInterface
|
| 7 |
+
from .models.base import BaseTTSConfig, TTSConfigType
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@dataclass
|
| 11 |
+
class TTSRegistryTables:
|
| 12 |
+
"""TTS注册表系统,用于管理不同的TTS实现"""
|
| 13 |
+
|
| 14 |
+
tts_classes: Dict[str, Type[TTSInterface]] = None
|
| 15 |
+
|
| 16 |
+
def __post_init__(self):
|
| 17 |
+
if self.tts_classes is None:
|
| 18 |
+
self.tts_classes = {}
|
| 19 |
+
|
| 20 |
+
def print(self, key: str = None) -> None:
|
| 21 |
+
"""打印已注册的TTS类"""
|
| 22 |
+
print("\nTTS Registry Tables: \n")
|
| 23 |
+
headers = ["register name", "class name", "class location"]
|
| 24 |
+
|
| 25 |
+
if self.tts_classes and (key is None or "tts_classes" in key):
|
| 26 |
+
print(f"----------- ** tts_classes ** --------------")
|
| 27 |
+
metas = []
|
| 28 |
+
for register_key, tts_class in self.tts_classes.items():
|
| 29 |
+
class_file = inspect.getfile(tts_class)
|
| 30 |
+
class_line = inspect.getsourcelines(tts_class)[1]
|
| 31 |
+
# 简化路径显示
|
| 32 |
+
pattern = r"^.+/VoiceDialogue/"
|
| 33 |
+
class_file = re.sub(pattern, "VoiceDialogue/", class_file)
|
| 34 |
+
meta_data = [
|
| 35 |
+
register_key,
|
| 36 |
+
tts_class.__name__,
|
| 37 |
+
f"{class_file}:{class_line}",
|
| 38 |
+
]
|
| 39 |
+
metas.append(meta_data)
|
| 40 |
+
|
| 41 |
+
metas.sort(key=lambda x: x[0])
|
| 42 |
+
data = [headers] + metas
|
| 43 |
+
col_widths = [max(len(str(item)) for item in col) for col in zip(*data)]
|
| 44 |
+
|
| 45 |
+
for row in data:
|
| 46 |
+
print(
|
| 47 |
+
"| "
|
| 48 |
+
+ " | ".join(str(item).ljust(width) for item, width in zip(row, col_widths))
|
| 49 |
+
+ " |"
|
| 50 |
+
)
|
| 51 |
+
print("\n")
|
| 52 |
+
|
| 53 |
+
def register(self, register_table_key: str, key: str = None) -> callable:
|
| 54 |
+
"""装饰器,用于注册TTS类"""
|
| 55 |
+
|
| 56 |
+
def decorator(target_class):
|
| 57 |
+
if not hasattr(self, register_table_key):
|
| 58 |
+
setattr(self, register_table_key, {})
|
| 59 |
+
logging.debug(f"New TTS registry table added: {register_table_key}")
|
| 60 |
+
|
| 61 |
+
registry = getattr(self, register_table_key)
|
| 62 |
+
registry_key = key if key is not None else target_class.__name__
|
| 63 |
+
|
| 64 |
+
if registry_key in registry:
|
| 65 |
+
logging.debug(
|
| 66 |
+
f"Key {registry_key} already exists in {register_table_key}, re-register"
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
registry[registry_key] = target_class
|
| 70 |
+
logging.info(f"Registered TTS class: {registry_key} -> {target_class.__name__}")
|
| 71 |
+
return target_class
|
| 72 |
+
|
| 73 |
+
return decorator
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
# 全局TTS注册表实例
|
| 77 |
+
tts_tables = TTSRegistryTables()
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
class TTSManager:
|
| 81 |
+
"""TTS管理器,负责管理和创建TTS实例"""
|
| 82 |
+
|
| 83 |
+
def __init__(self):
|
| 84 |
+
self._tts_instances: Dict[str, TTSInterface] = {}
|
| 85 |
+
|
| 86 |
+
def create_tts(self, config: BaseTTSConfig) -> TTSInterface:
|
| 87 |
+
"""
|
| 88 |
+
根据配置创建TTS实例
|
| 89 |
+
|
| 90 |
+
Args:
|
| 91 |
+
config: TTS配置对象
|
| 92 |
+
|
| 93 |
+
Returns:
|
| 94 |
+
TTSInterface: TTS实例
|
| 95 |
+
|
| 96 |
+
Raises:
|
| 97 |
+
ValueError: 如果TTS类型未注册
|
| 98 |
+
"""
|
| 99 |
+
tts_type = config.tts_type.value
|
| 100 |
+
|
| 101 |
+
if tts_type not in tts_tables.tts_classes:
|
| 102 |
+
raise ValueError(f"未注册的TTS类型: {tts_type}. 可用类型: {list(tts_tables.tts_classes.keys())}")
|
| 103 |
+
|
| 104 |
+
tts_class = tts_tables.tts_classes[tts_type]
|
| 105 |
+
return tts_class(config)
|
| 106 |
+
|
| 107 |
+
def get_or_create_tts(self, config: BaseTTSConfig) -> TTSInterface:
|
| 108 |
+
"""
|
| 109 |
+
获取或创建TTS实例(单例模式)
|
| 110 |
+
|
| 111 |
+
Args:
|
| 112 |
+
config: TTS配置对象
|
| 113 |
+
|
| 114 |
+
Returns:
|
| 115 |
+
TTSInterface: TTS实例
|
| 116 |
+
"""
|
| 117 |
+
instance_key = f"{config.tts_type.value}:{config.character_name}"
|
| 118 |
+
|
| 119 |
+
if instance_key not in self._tts_instances:
|
| 120 |
+
self._tts_instances[instance_key] = self.create_tts(config)
|
| 121 |
+
|
| 122 |
+
return self._tts_instances[instance_key]
|
| 123 |
+
|
| 124 |
+
def list_registered_tts(self) -> Dict[str, Type[TTSInterface]]:
|
| 125 |
+
"""列出所有已注册的TTS类"""
|
| 126 |
+
return tts_tables.tts_classes.copy()
|
| 127 |
+
|
| 128 |
+
def is_tts_registered(self, tts_type: str) -> bool:
|
| 129 |
+
"""检查指定TTS类型是否已注册"""
|
| 130 |
+
return tts_type in tts_tables.tts_classes
|
| 131 |
+
|
| 132 |
+
def print_registry(self):
|
| 133 |
+
"""打印注册表信息"""
|
| 134 |
+
tts_tables.print()
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
# 全局TTS管理器实例
|
| 138 |
+
tts_manager = TTSManager()
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def register_all_tts():
|
| 142 |
+
"""自动发现并注册runtime目录中的所有TTS实现"""
|
| 143 |
+
import os
|
| 144 |
+
import importlib
|
| 145 |
+
from pathlib import Path
|
| 146 |
+
|
| 147 |
+
# 获取runtime目录路径
|
| 148 |
+
runtime_dir = Path(__file__).parent / "runtime"
|
| 149 |
+
|
| 150 |
+
# 扫描runtime目录中的Python文件
|
| 151 |
+
for py_file in runtime_dir.glob("*.py"):
|
| 152 |
+
if py_file.name in ["__init__.py", "interface.py"]:
|
| 153 |
+
continue
|
| 154 |
+
|
| 155 |
+
module_name = py_file.stem
|
| 156 |
+
try:
|
| 157 |
+
# 动态导入模块
|
| 158 |
+
module = importlib.import_module(f".runtime.{module_name}",
|
| 159 |
+
package="VoiceDialogue.services.audio.audio_generator")
|
| 160 |
+
logging.info(f"Successfully imported TTS module: {module_name}")
|
| 161 |
+
except ImportError as e:
|
| 162 |
+
logging.warning(f"Failed to import TTS module {module_name}: {e}")
|
| 163 |
+
except Exception as e:
|
| 164 |
+
logging.error(f"Unexpected error importing TTS module {module_name}: {e}")
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
# 在模块导入时自动注册所有TTS
|
| 168 |
+
register_all_tts()
|
src/VoiceDialogue/services/audio/audio_player.py
CHANGED
|
@@ -70,7 +70,8 @@ class AudioStreamPlayer(BaseThread):
|
|
| 70 |
|
| 71 |
voice_state_manager.set_audio_playing(task_id)
|
| 72 |
voice_state_manager.reset_task_id()
|
| 73 |
-
|
|
|
|
| 74 |
|
| 75 |
if self.audio_playing_queue.empty():
|
| 76 |
print(f'回答播放完了')
|
|
@@ -90,11 +91,9 @@ class AudioStreamPlayer(BaseThread):
|
|
| 90 |
|
| 91 |
chat_history_cache[voice_task.session_id] = chat_history
|
| 92 |
|
| 93 |
-
def playing_audio(self,
|
| 94 |
-
audio_data = tts_generated_audio[0][1]
|
| 95 |
-
samplerate = tts_generated_audio[0][0]
|
| 96 |
with tempfile.NamedTemporaryFile('w+b', suffix='.wav') as soundfile:
|
| 97 |
# print(f'================soundfile : {soundfile.name}')
|
| 98 |
-
sf.write(soundfile, audio_data, samplerate=
|
| 99 |
# print(soundfile.name)
|
| 100 |
playsound(soundfile.name, block=True)
|
|
|
|
| 70 |
|
| 71 |
voice_state_manager.set_audio_playing(task_id)
|
| 72 |
voice_state_manager.reset_task_id()
|
| 73 |
+
audio_data, sample_rate = voice_task.tts_generated_sentence_audio
|
| 74 |
+
self.playing_audio(audio_data, sample_rate)
|
| 75 |
|
| 76 |
if self.audio_playing_queue.empty():
|
| 77 |
print(f'回答播放完了')
|
|
|
|
| 91 |
|
| 92 |
chat_history_cache[voice_task.session_id] = chat_history
|
| 93 |
|
| 94 |
+
def playing_audio(self, audio_data, sample_rate=16000):
|
|
|
|
|
|
|
| 95 |
with tempfile.NamedTemporaryFile('w+b', suffix='.wav') as soundfile:
|
| 96 |
# print(f'================soundfile : {soundfile.name}')
|
| 97 |
+
sf.write(soundfile, audio_data, samplerate=sample_rate, subtype='PCM_16', closefd=False)
|
| 98 |
# print(soundfile.name)
|
| 99 |
playsound(soundfile.name, block=True)
|