diff --git a/main.py b/main.py new file mode 100644 index 0000000000000000000000000000000000000000..d605cf1dac69e2f554928ef62a668400447ade5a --- /dev/null +++ b/main.py @@ -0,0 +1,349 @@ +import argparse +import sys +import time +import typing +from pathlib import Path + +import uvicorn + +HERE = Path(__file__).parent +lib_path = HERE / "src" +if lib_path.as_posix() not in sys.path: + sys.path.insert(0, lib_path.as_posix()) + +from voice_dialogue.core.constants import ( + audio_frames_queue, + user_voice_queue, + transcribed_text_queue, + text_input_queue, + audio_output_queue +) +from voice_dialogue.services.audio.capture import EchoCancellingAudioCapture +from voice_dialogue.services.audio.generator import TTSAudioGenerator +from voice_dialogue.services.audio.generators.models import tts_config_registry +from voice_dialogue.services.audio.player import AudioStreamPlayer +from voice_dialogue.services.speech.monitor import SpeechStateMonitor +from voice_dialogue.services.speech.recognizer import ASRWorker +from voice_dialogue.services.text.generator import LLMResponseGenerator + +language: typing.Literal['zh', 'en'] = 'en' + + +def launch_system( + user_language: str, + speaker: str +) -> None: + """ + 启动完整的语音对话系统 + + 该函数负责启动并协调语音对话系统的所有组件,包括音频采集、语音识别、 + 文本生成、语音合成和音频播放等功能模块。系统采用多线程架构,各组件 + 通过队列进行数据传递和通信。 + + 系统工作流程: + 1. 音频采集:EchoCancellingAudioCapture 采集用户语音并进行回声消除 + 2. 语音监测:SpeechStateMonitor 检测用户是否在说话 + 3. 语音识别:ASRWorker 将用户语音转换为文本 + 4. 文本生成:LLMResponseGenerator 基于用户问题生成AI回答 + 5. 语音合成:TTSAudioGenerator 将AI回答转换为语音 + 6. 音频播放:AudioStreamPlayer 播放生成的语音 + + Args: + user_language (str): 用户语言,支持 'zh'(中文)和 'en'(英文) + speaker (str): 语音合成使用的说话人,支持: + '罗翔', '马保国', '沈逸', '杨幂', '周杰伦', '马云' + + Raises: + ValueError: 当指定的说话人不在支持列表中时抛出异常 + + Returns: + None: 函数会一直运行直到所有线程结束 + + Note: + 该函数会阻塞运行,直到系统被外部停止或发生异常 + """ + + threads = [] + # + audio_frame_probe = EchoCancellingAudioCapture(audio_frames_queue=audio_frames_queue) + audio_frame_probe.start() + threads.append(audio_frame_probe) + + # + user_voice_checker = SpeechStateMonitor( + audio_frame_queue=audio_frames_queue, + user_voice_queue=user_voice_queue, + ) + user_voice_checker.start() + threads.append(user_voice_checker) + + # + whisper_worker = ASRWorker( + user_voice_queue=user_voice_queue, transcribed_text_queue=transcribed_text_queue, + language=user_language + ) + whisper_worker.start() + threads.append(whisper_worker) + + answer_generator_worker = LLMResponseGenerator( + user_question_queue=transcribed_text_queue, + generated_answer_queue=text_input_queue + ) + answer_generator_worker.start() + threads.append(answer_generator_worker) + + # 动态获取TTS配置,而不是使用固定映射 + tts_speaker_config = _get_tts_config_by_speaker_name(speaker) + if tts_speaker_config is None: + # 如果找不到指定说话人,列出所有可用说话人并抛出异常 + available_speakers = _get_available_speaker_names() + raise ValueError(f"不支持的TTS说话人: {speaker}。可用说话人: {', '.join(available_speakers)}") + + audio_generator_worker = TTSAudioGenerator( + text_input_queue=text_input_queue, + audio_output_queue=audio_output_queue, + tts_config=tts_speaker_config + ) + audio_generator_worker.start() + threads.append(audio_generator_worker) + + audio_playing_worker = AudioStreamPlayer(audio_playing_queue=audio_output_queue) + audio_playing_worker.start() + threads.append(audio_playing_worker) + + while not all([thread.is_ready for thread in threads]): + time.sleep(0.1) + + # audio_frame_probe.start_record() + print(f'{"=" * 80}\n服务启动成功\n{"=" * 80}') + for thread in threads: + thread.join() + + +def _get_tts_config_by_speaker_name(speaker_name: str): + """ + 根据说话人名称获取TTS配置 + + 支持中文名称和英文名称,优先匹配中文名称映射, + 如果找不到则直接使用英文名称搜索 + + Args: + speaker_name (str): 说话人名称 + + Returns: + BaseTTSConfig: TTS配置,如果找不到则返回None + """ + # 中文名称到英文名称的映射(保持向后兼容) + chinese_to_english_mapping = { + '罗翔': 'Luo Xiang', + '马保国': 'Ma Baoguo', + '沈逸': 'Shen Yi', + '杨幂': 'Yang Mi', + '周杰伦': 'Zhou Jielun', + '马云': 'Ma Yun', + } + + # 首先尝试中文名称映射 + english_name = chinese_to_english_mapping.get(speaker_name, speaker_name) + + # 获取所有可用配置 + all_configs = tts_config_registry.get_all_configs() + + # 搜索匹配的配置 + for config in all_configs: + if config.character_name == english_name: + return config + + # 如果通过映射找不到,尝试直接匹配输入的名称 + if speaker_name != english_name: + for config in all_configs: + if config.character_name == speaker_name: + return config + + return None + + +def _get_available_speaker_names(): + """ + 获取所有可用的说话人名称列表 + + Returns: + list[str]: 包含中文显示名称和英文原始名称的列表 + """ + # 中文显示名称映射 + english_to_chinese_mapping = { + 'Luo Xiang': '罗翔', + 'Ma Baoguo': '马保国', + 'Shen Yi': '沈逸', + 'Yang Mi': '杨幂', + 'Zhou Jielun': '周杰伦', + 'Ma Yun': '马云', + } + + all_configs = tts_config_registry.get_all_configs() + speaker_names = [] + + for config in all_configs: + # 优先显示中文名称 + chinese_name = english_to_chinese_mapping.get(config.character_name) + if chinese_name: + speaker_names.append(chinese_name) + else: + # 如果没有中文映射,使用英文原名 + speaker_names.append(config.character_name) + + return sorted(speaker_names) + + +def _update_argument_parser_speaker_choices(): + """ + 动态更新命令行参数解析器中的说话人选项 + + Returns: + list[str]: 可用的说话人选择列表 + """ + return _get_available_speaker_names() + + +def create_argument_parser(): + """创建命令行参数解析器""" + # 动态获取可用说话人列表 + available_speakers = _update_argument_parser_speaker_choices() + + parser = argparse.ArgumentParser( + description="VoiceDialogue - 语音对话系统", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=f""" +示例用法: + # 启动命令行模式(默认) + python main.py + + # 启动命令行模式并指定参数 + python main.py --mode cli --language zh --speaker 沈逸 + + # 启动API服务器 + python main.py --mode api + + # 启动API服务器并指定端口 + python main.py --mode api --port 9000 + + # 启动API服务器并启用热重载(开发模式) + python main.py --mode api --port 8000 --reload + +支持的说话人: + {', '.join(available_speakers)} + """ + ) + + # 运行模式选择 + parser.add_argument( + '--mode', '-m', + choices=['cli', 'api'], + default='cli', + help='运行模式: cli=命令行模式, api=API服务器模式 (默认: cli)' + ) + + # 命令行模式参数 + cli_group = parser.add_argument_group('命令行模式参数') + cli_group.add_argument( + '--language', '-l', + choices=['zh', 'en'], + default='zh', + help='用户语言: zh=中文, en=英文 (默认: zh)' + ) + cli_group.add_argument( + '--speaker', '-s', + choices=available_speakers, + default='沈逸' if '沈逸' in available_speakers else (available_speakers[0] if available_speakers else '沈逸'), + help='TTS说话人 (默认: 沈逸)' + ) + + # API服务器模式参数 + api_group = parser.add_argument_group('API服务器模式参数') + api_group.add_argument( + '--host', + default='0.0.0.0', + help='服务器主机地址 (默认: 0.0.0.0)' + ) + api_group.add_argument( + '--port', '-p', + type=int, + default=8000, + help='服务器端口 (默认: 8000)' + ) + api_group.add_argument( + '--reload', + action='store_true', + help='启用热重载(开发模式)' + ) + + return parser + + +def launch_api_server(host: str = "0.0.0.0", port: int = 8000, reload: bool = False): + """ + 启动API服务器 + + Args: + host (str): 服务器主机地址,默认为 "0.0.0.0" + port (int): 服务器端口,默认为 8000 + reload (bool): 是否启用热重载,默认为 False + """ + print(f'{"=" * 80}\n正在启动API服务器...\n{"=" * 80}') + print(f"服务器地址: http://{host}:{port}") + print(f"API文档: http://{host}:{port}/docs") + print(f"热重载: {'启用' if reload else '禁用'}") + print(f'{"=" * 80}') + + # 导入并启动FastAPI应用 + uvicorn.run( + "api.app:app", + host=host, + port=port, + reload=reload, + log_level="info" + ) + + +def main(): + """ + 主程序入口函数 + + 根据命令行参数选择启动模式: + - cli: 启动命令行语音对话系统 + - api: 启动HTTP API服务器 + """ + parser = create_argument_parser() + args = parser.parse_args() + + print(f""" +{"=" * 80} +VoiceDialogue - 语音对话系统 +{"=" * 80} +运行模式: {args.mode.upper()} +{"=" * 80} + """) + + try: + if args.mode == 'cli': + print(f"语言设置: {args.language}") + print(f"说话人: {args.speaker}") + print("正在启动命令行语音对话系统...") + launch_system(args.language, args.speaker) + + elif args.mode == 'api': + launch_api_server( + host=args.host, + port=args.port, + reload=args.reload + ) + + except KeyboardInterrupt: + print("\n程序被用户中断") + except Exception as e: + print(f"程序运行出错: {e}") + raise + + +if __name__ == '__main__': + main() diff --git a/src/VoiceDialogue/services/text/__init__.py b/src/VoiceDialogue/services/text/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/src/voice_dialogue/__init__.py b/src/voice_dialogue/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..20c081f9eb130dd8dff3d245c05e134f85121b60 --- /dev/null +++ b/src/voice_dialogue/__init__.py @@ -0,0 +1,14 @@ +from .core.constants import ( + audio_frames_queue, + user_voice_queue, + transcribed_text_queue, + text_input_queue, + audio_output_queue +) +from .services.audio.capture import EchoCancellingAudioCapture +from .services.audio.generator import TTSAudioGenerator +from .services.audio.generators.models import tts_config_registry +from .services.audio.player import AudioStreamPlayer +from .services.speech.monitor import SpeechStateMonitor +from .services.speech.recognizer import ASRWorker +from .services.text.generator import LLMResponseGenerator diff --git a/src/VoiceDialogue/__init__.py b/src/voice_dialogue/api/__init__.py similarity index 100% rename from src/VoiceDialogue/__init__.py rename to src/voice_dialogue/api/__init__.py diff --git a/src/VoiceDialogue/api/app.py b/src/voice_dialogue/api/app.py similarity index 97% rename from src/VoiceDialogue/api/app.py rename to src/voice_dialogue/api/app.py index ef2caca47438abaa44f3189ecbda5d578c53535c..500df4561983cc5151ddc41acdc121322cc5c131 100644 --- a/src/VoiceDialogue/api/app.py +++ b/src/voice_dialogue/api/app.py @@ -6,7 +6,7 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse from fastapi.staticfiles import StaticFiles -from config.paths import FRONTEND_ASSETS_PATH +from voice_dialogue.config.paths import FRONTEND_ASSETS_PATH from .core.config import AppConfig from .core.lifespan import lifespan from .middleware.logging import LoggingMiddleware diff --git a/src/VoiceDialogue/api/core/__init__.py b/src/voice_dialogue/api/core/__init__.py similarity index 100% rename from src/VoiceDialogue/api/core/__init__.py rename to src/voice_dialogue/api/core/__init__.py diff --git a/src/VoiceDialogue/api/core/config.py b/src/voice_dialogue/api/core/config.py similarity index 98% rename from src/VoiceDialogue/api/core/config.py rename to src/voice_dialogue/api/core/config.py index 871ac35ec3a5c239597298668c3151d8aa70c4ca..130accfec8117e4cfbe915586e94678ee3065f77 100644 --- a/src/VoiceDialogue/api/core/config.py +++ b/src/voice_dialogue/api/core/config.py @@ -1,6 +1,8 @@ import logging from typing import Dict, Any +from voice_dialogue.services.audio.generators import tts_config_registry + logger = logging.getLogger(__name__) @@ -17,7 +19,6 @@ class TTSConfigInitializer: } try: - from services.audio.generators import tts_config_registry config_count = len(tts_config_registry.get_all_configs()) result.update({ diff --git a/src/VoiceDialogue/api/core/lifespan.py b/src/voice_dialogue/api/core/lifespan.py similarity index 97% rename from src/VoiceDialogue/api/core/lifespan.py rename to src/voice_dialogue/api/core/lifespan.py index 22d5bb42e551e86c5bafa5e4c309e08540135022..831851be6e787341e0c9548e88ea6f0183db92cb 100644 --- a/src/VoiceDialogue/api/core/lifespan.py +++ b/src/voice_dialogue/api/core/lifespan.py @@ -4,8 +4,8 @@ from contextlib import asynccontextmanager from fastapi import FastAPI -from services.audio.generators import tts_config_registry -from utils import get_system_language +from voice_dialogue.services.audio.generators import tts_config_registry +from voice_dialogue.utils import get_system_language from .config import TTSConfigInitializer from .service_factories import get_core_voice_service_definitions from .service_manager import ServiceManager diff --git a/src/VoiceDialogue/api/core/service_factories.py b/src/voice_dialogue/api/core/service_factories.py similarity index 93% rename from src/VoiceDialogue/api/core/service_factories.py rename to src/voice_dialogue/api/core/service_factories.py index c21c2da0a7cf1c83b635bb9a81f95faccafba835..add563fe3e73a396b0ea2eea52fda1d4c7a2a415 100644 --- a/src/VoiceDialogue/api/core/service_factories.py +++ b/src/voice_dialogue/api/core/service_factories.py @@ -1,11 +1,11 @@ -from core.constants import ( +from voice_dialogue.core.constants import ( transcribed_text_queue, text_input_queue, audio_output_queue, audio_frames_queue, user_voice_queue, websocket_message_queue ) -from services.audio import EchoCancellingAudioCapture, TTSAudioGenerator, AudioStreamPlayer -from services.audio.generators import BaseTTSConfig, tts_config_registry -from services.speech import SpeechStateMonitor, ASRWorker -from services.text.generator import LLMResponseGenerator +from voice_dialogue.services.audio import EchoCancellingAudioCapture, TTSAudioGenerator, AudioStreamPlayer +from voice_dialogue.services.audio.generators import BaseTTSConfig, tts_config_registry +from voice_dialogue.services.speech import SpeechStateMonitor, ASRWorker +from voice_dialogue.services.text.generator import LLMResponseGenerator from .service_manager import ServiceDefinition diff --git a/src/VoiceDialogue/api/core/service_manager.py b/src/voice_dialogue/api/core/service_manager.py similarity index 100% rename from src/VoiceDialogue/api/core/service_manager.py rename to src/voice_dialogue/api/core/service_manager.py diff --git a/src/VoiceDialogue/api/dependencies/__init__.py b/src/voice_dialogue/api/dependencies/__init__.py similarity index 100% rename from src/VoiceDialogue/api/dependencies/__init__.py rename to src/voice_dialogue/api/dependencies/__init__.py diff --git a/src/VoiceDialogue/api/dependencies/audio_deps.py b/src/voice_dialogue/api/dependencies/audio_deps.py similarity index 100% rename from src/VoiceDialogue/api/dependencies/audio_deps.py rename to src/voice_dialogue/api/dependencies/audio_deps.py diff --git a/src/VoiceDialogue/api/middleware/__init__.py b/src/voice_dialogue/api/middleware/__init__.py similarity index 100% rename from src/VoiceDialogue/api/middleware/__init__.py rename to src/voice_dialogue/api/middleware/__init__.py diff --git a/src/VoiceDialogue/api/middleware/logging.py b/src/voice_dialogue/api/middleware/logging.py similarity index 100% rename from src/VoiceDialogue/api/middleware/logging.py rename to src/voice_dialogue/api/middleware/logging.py diff --git a/src/VoiceDialogue/api/middleware/rate_limit.py b/src/voice_dialogue/api/middleware/rate_limit.py similarity index 100% rename from src/VoiceDialogue/api/middleware/rate_limit.py rename to src/voice_dialogue/api/middleware/rate_limit.py diff --git a/src/VoiceDialogue/api/routes/__init__.py b/src/voice_dialogue/api/routes/__init__.py similarity index 100% rename from src/VoiceDialogue/api/routes/__init__.py rename to src/voice_dialogue/api/routes/__init__.py diff --git a/src/VoiceDialogue/api/routes/asr_routes.py b/src/voice_dialogue/api/routes/asr_routes.py similarity index 98% rename from src/VoiceDialogue/api/routes/asr_routes.py rename to src/voice_dialogue/api/routes/asr_routes.py index ddbc67f76d6d44ed5da27ab1ef6045e16f9e1211..27f91934ee8c13ff6ed53d1c85d57aa065f5236d 100644 --- a/src/VoiceDialogue/api/routes/asr_routes.py +++ b/src/voice_dialogue/api/routes/asr_routes.py @@ -2,7 +2,7 @@ import logging from fastapi import APIRouter, HTTPException, Request, BackgroundTasks -from services.speech.recognizers import asr_manager +from voice_dialogue.services.speech.recognizers import asr_manager from ..core.service_factories import get_asr_worker_service_definition from ..schemas.asr_schemas import ( SupportedLanguagesResponse, ASRInstanceRequest, ASRInstanceResponse diff --git a/src/VoiceDialogue/api/routes/system_routes.py b/src/voice_dialogue/api/routes/system_routes.py similarity index 99% rename from src/VoiceDialogue/api/routes/system_routes.py rename to src/voice_dialogue/api/routes/system_routes.py index 87911676160f1ebba8fea5e569c9510f94b71a1a..c06600691af2f396f00ff5ed0df287da5d9214fd 100644 --- a/src/VoiceDialogue/api/routes/system_routes.py +++ b/src/voice_dialogue/api/routes/system_routes.py @@ -4,7 +4,7 @@ import time from fastapi import APIRouter, HTTPException, BackgroundTasks, Request -from core.constants import session_manager +from voice_dialogue.core.constants import session_manager from ..core.service_factories import get_audio_capture_service_definition from ..schemas.system_schemas import ( SystemStatusResponse, SystemResponse diff --git a/src/VoiceDialogue/api/routes/tts_routes.py b/src/voice_dialogue/api/routes/tts_routes.py similarity index 99% rename from src/VoiceDialogue/api/routes/tts_routes.py rename to src/voice_dialogue/api/routes/tts_routes.py index 51745d31b3f2c0e41baa464f2e83d5a4ce7510e6..5e034ea18b77c39c1372d1c52c47287cec6167c6 100644 --- a/src/VoiceDialogue/api/routes/tts_routes.py +++ b/src/voice_dialogue/api/routes/tts_routes.py @@ -3,7 +3,7 @@ from typing import Optional from fastapi import APIRouter, HTTPException, BackgroundTasks, Request -from services.audio.generators import tts_config_registry +from voice_dialogue.services.audio.generators import tts_config_registry from ..core.service_factories import get_tts_audio_generator_service_definition from ..schemas.tts_schemas import ( TTSModelInfo, TTSModelListResponse, TTSModelLoadRequest, diff --git a/src/VoiceDialogue/api/routes/websocket_routes.py b/src/voice_dialogue/api/routes/websocket_routes.py similarity index 90% rename from src/VoiceDialogue/api/routes/websocket_routes.py rename to src/voice_dialogue/api/routes/websocket_routes.py index b3192126bd44a5ed4e60567eeb5f6ed560ef14a9..477378682f2bd8f340e62328d12275c927b3aa4a 100644 --- a/src/VoiceDialogue/api/routes/websocket_routes.py +++ b/src/voice_dialogue/api/routes/websocket_routes.py @@ -4,7 +4,7 @@ from queue import Empty from fastapi import APIRouter, WebSocket, WebSocketDisconnect -from core.constants import websocket_message_queue, session_manager +from voice_dialogue.core.constants import websocket_message_queue, session_manager ws = APIRouter() logger = logging.getLogger(__name__) diff --git a/src/VoiceDialogue/api/schemas/__init__.py b/src/voice_dialogue/api/schemas/__init__.py similarity index 100% rename from src/VoiceDialogue/api/schemas/__init__.py rename to src/voice_dialogue/api/schemas/__init__.py diff --git a/src/VoiceDialogue/api/schemas/asr_schemas.py b/src/voice_dialogue/api/schemas/asr_schemas.py similarity index 100% rename from src/VoiceDialogue/api/schemas/asr_schemas.py rename to src/voice_dialogue/api/schemas/asr_schemas.py diff --git a/src/VoiceDialogue/api/schemas/system_schemas.py b/src/voice_dialogue/api/schemas/system_schemas.py similarity index 100% rename from src/VoiceDialogue/api/schemas/system_schemas.py rename to src/voice_dialogue/api/schemas/system_schemas.py diff --git a/src/VoiceDialogue/api/schemas/tts_schemas.py b/src/voice_dialogue/api/schemas/tts_schemas.py similarity index 99% rename from src/VoiceDialogue/api/schemas/tts_schemas.py rename to src/voice_dialogue/api/schemas/tts_schemas.py index aae6d11fc3a42d90728cead4f7a50b6a4e591a87..cb6e44f1cc6445dafd14e9a1a34728df06eff526 100644 --- a/src/VoiceDialogue/api/schemas/tts_schemas.py +++ b/src/voice_dialogue/api/schemas/tts_schemas.py @@ -1,6 +1,7 @@ +import hashlib from typing import List, Optional, Literal + from pydantic import BaseModel, Field -import hashlib class TTSModelInfo(BaseModel): diff --git a/src/VoiceDialogue/api/schemas/voice_schemas.py b/src/voice_dialogue/api/schemas/voice_schemas.py similarity index 100% rename from src/VoiceDialogue/api/schemas/voice_schemas.py rename to src/voice_dialogue/api/schemas/voice_schemas.py diff --git a/src/VoiceDialogue/api/server.py b/src/voice_dialogue/api/server.py similarity index 100% rename from src/VoiceDialogue/api/server.py rename to src/voice_dialogue/api/server.py diff --git a/src/VoiceDialogue/api/__init__.py b/src/voice_dialogue/config/__init__.py similarity index 100% rename from src/VoiceDialogue/api/__init__.py rename to src/voice_dialogue/config/__init__.py diff --git a/src/VoiceDialogue/config/paths.py b/src/voice_dialogue/config/paths.py similarity index 100% rename from src/VoiceDialogue/config/paths.py rename to src/voice_dialogue/config/paths.py diff --git a/src/VoiceDialogue/config/__init__.py b/src/voice_dialogue/core/__init__.py similarity index 100% rename from src/VoiceDialogue/config/__init__.py rename to src/voice_dialogue/core/__init__.py diff --git a/src/VoiceDialogue/core/base.py b/src/voice_dialogue/core/base.py similarity index 100% rename from src/VoiceDialogue/core/base.py rename to src/voice_dialogue/core/base.py diff --git a/src/VoiceDialogue/core/constants.py b/src/voice_dialogue/core/constants.py similarity index 97% rename from src/VoiceDialogue/core/constants.py rename to src/voice_dialogue/core/constants.py index 3cc119083353cf8d8d07b9b11f5c38d9e53f0203..32b10ef351727bbb130eabb93e69edf347a4e742 100644 --- a/src/VoiceDialogue/core/constants.py +++ b/src/voice_dialogue/core/constants.py @@ -3,7 +3,7 @@ import multiprocessing import threading from collections import OrderedDict -from utils.cache import LRUCacheDict +from voice_dialogue.utils.cache import LRUCacheDict from .session_manager import SessionIdManager from .state_manager import VoiceStateManager diff --git a/src/VoiceDialogue/core/enums.py b/src/voice_dialogue/core/enums.py similarity index 100% rename from src/VoiceDialogue/core/enums.py rename to src/voice_dialogue/core/enums.py diff --git a/src/VoiceDialogue/core/session_manager.py b/src/voice_dialogue/core/session_manager.py similarity index 100% rename from src/VoiceDialogue/core/session_manager.py rename to src/voice_dialogue/core/session_manager.py diff --git a/src/VoiceDialogue/core/state_manager.py b/src/voice_dialogue/core/state_manager.py similarity index 96% rename from src/VoiceDialogue/core/state_manager.py rename to src/voice_dialogue/core/state_manager.py index f5da20bf9ab0af0fd0d2bef6ba9e8bc473e04b34..e6b0ee7decf5a6439609e779ecdf094b42c9ae1b 100644 --- a/src/VoiceDialogue/core/state_manager.py +++ b/src/voice_dialogue/core/state_manager.py @@ -1,6 +1,6 @@ import uuid -from utils.cache import LRUCacheDict +from voice_dialogue.utils.cache import LRUCacheDict from .enums import AudioState diff --git a/src/VoiceDialogue/main.py b/src/voice_dialogue/main.py similarity index 100% rename from src/VoiceDialogue/main.py rename to src/voice_dialogue/main.py diff --git a/src/VoiceDialogue/models/__init__.py b/src/voice_dialogue/models/__init__.py similarity index 100% rename from src/VoiceDialogue/models/__init__.py rename to src/voice_dialogue/models/__init__.py diff --git a/src/VoiceDialogue/models/voice_task.py b/src/voice_dialogue/models/voice_task.py similarity index 100% rename from src/VoiceDialogue/models/voice_task.py rename to src/voice_dialogue/models/voice_task.py diff --git a/src/VoiceDialogue/core/__init__.py b/src/voice_dialogue/services/__init__.py similarity index 100% rename from src/VoiceDialogue/core/__init__.py rename to src/voice_dialogue/services/__init__.py diff --git a/src/VoiceDialogue/services/audio/__init__.py b/src/voice_dialogue/services/audio/__init__.py similarity index 100% rename from src/VoiceDialogue/services/audio/__init__.py rename to src/voice_dialogue/services/audio/__init__.py diff --git a/src/VoiceDialogue/services/audio/capture.py b/src/voice_dialogue/services/audio/capture.py similarity index 95% rename from src/VoiceDialogue/services/audio/capture.py rename to src/voice_dialogue/services/audio/capture.py index 8347384a5d5f1669ae0c497a54931cc460a0df68..ba54541072525f3c48ef29fc6825129f69565bb7 100644 --- a/src/VoiceDialogue/services/audio/capture.py +++ b/src/voice_dialogue/services/audio/capture.py @@ -8,8 +8,8 @@ import time import numpy as np -from config.paths import LIBRARIES_PATH -from core.base import BaseThread +from voice_dialogue.config.paths import LIBRARIES_PATH +from voice_dialogue.core.base import BaseThread class EchoCancellingAudioCapture(BaseThread): diff --git a/src/VoiceDialogue/services/audio/generator.py b/src/voice_dialogue/services/audio/generator.py similarity index 93% rename from src/VoiceDialogue/services/audio/generator.py rename to src/voice_dialogue/services/audio/generator.py index 86dbc46e5313ab6c26689b311fe204dc9cc86954..1fe4cc5ba95fc26f914755cc2c985626ad0aa2fc 100644 --- a/src/VoiceDialogue/services/audio/generator.py +++ b/src/voice_dialogue/services/audio/generator.py @@ -2,9 +2,9 @@ import time from multiprocessing import Queue from queue import Empty -from core.base import BaseThread -from core.constants import dropped_audio_cache, user_still_speaking_event, voice_state_manager -from models.voice_task import VoiceTask +from voice_dialogue.core.base import BaseThread +from voice_dialogue.core.constants import dropped_audio_cache, user_still_speaking_event, voice_state_manager +from voice_dialogue.models.voice_task import VoiceTask from .generators import tts_manager, BaseTTSConfig diff --git a/src/VoiceDialogue/services/audio/generators/__init__.py b/src/voice_dialogue/services/audio/generators/__init__.py similarity index 100% rename from src/VoiceDialogue/services/audio/generators/__init__.py rename to src/voice_dialogue/services/audio/generators/__init__.py diff --git a/src/VoiceDialogue/services/audio/generators/configs/__init__.py b/src/voice_dialogue/services/audio/generators/configs/__init__.py similarity index 100% rename from src/VoiceDialogue/services/audio/generators/configs/__init__.py rename to src/voice_dialogue/services/audio/generators/configs/__init__.py diff --git a/src/VoiceDialogue/services/audio/generators/configs/kokoro.py b/src/voice_dialogue/services/audio/generators/configs/kokoro.py similarity index 100% rename from src/VoiceDialogue/services/audio/generators/configs/kokoro.py rename to src/voice_dialogue/services/audio/generators/configs/kokoro.py diff --git a/src/VoiceDialogue/services/audio/generators/configs/moyoyo.py b/src/voice_dialogue/services/audio/generators/configs/moyoyo.py similarity index 99% rename from src/VoiceDialogue/services/audio/generators/configs/moyoyo.py rename to src/voice_dialogue/services/audio/generators/configs/moyoyo.py index 50752e665a3c653f32b1a3f9de15653094dec82a..d0a359c478445a622b9a6dc27f623a7aa7b3862f 100644 --- a/src/VoiceDialogue/services/audio/generators/configs/moyoyo.py +++ b/src/voice_dialogue/services/audio/generators/configs/moyoyo.py @@ -1,4 +1,4 @@ -from services.audio.generators.models.moyoyo import MoYoYoTTSConfig +from ..models.moyoyo import MoYoYoTTSConfig # 基础预训练模型文件映射 BASE_PRETRAINED_FILES = { diff --git a/src/VoiceDialogue/services/audio/generators/manager.py b/src/voice_dialogue/services/audio/generators/manager.py similarity index 100% rename from src/VoiceDialogue/services/audio/generators/manager.py rename to src/voice_dialogue/services/audio/generators/manager.py diff --git a/src/VoiceDialogue/services/audio/generators/models/__init__.py b/src/voice_dialogue/services/audio/generators/models/__init__.py similarity index 100% rename from src/VoiceDialogue/services/audio/generators/models/__init__.py rename to src/voice_dialogue/services/audio/generators/models/__init__.py diff --git a/src/VoiceDialogue/services/audio/generators/models/base.py b/src/voice_dialogue/services/audio/generators/models/base.py similarity index 100% rename from src/VoiceDialogue/services/audio/generators/models/base.py rename to src/voice_dialogue/services/audio/generators/models/base.py diff --git a/src/VoiceDialogue/services/audio/generators/models/kokoro.py b/src/voice_dialogue/services/audio/generators/models/kokoro.py similarity index 97% rename from src/VoiceDialogue/services/audio/generators/models/kokoro.py rename to src/voice_dialogue/services/audio/generators/models/kokoro.py index a6370d6fbed84368c6c2699cb9c3b9b37289b217..1d342ee89b9a57916764f7cfccf8c25c2b7d38d0 100644 --- a/src/VoiceDialogue/services/audio/generators/models/kokoro.py +++ b/src/voice_dialogue/services/audio/generators/models/kokoro.py @@ -3,8 +3,8 @@ from pathlib import Path from pydantic import BaseModel, Field +from voice_dialogue.config import paths from .base import BaseTTSConfig, TTSConfigType -from config import paths class InferenceParameters(BaseModel): diff --git a/src/VoiceDialogue/services/audio/generators/models/moyoyo.py b/src/voice_dialogue/services/audio/generators/models/moyoyo.py similarity index 97% rename from src/VoiceDialogue/services/audio/generators/models/moyoyo.py rename to src/voice_dialogue/services/audio/generators/models/moyoyo.py index 898ffe7672a317c091432dfcae204201985a40e5..78aec072d406359321014223143930367008d691 100644 --- a/src/VoiceDialogue/services/audio/generators/models/moyoyo.py +++ b/src/voice_dialogue/services/audio/generators/models/moyoyo.py @@ -4,8 +4,8 @@ from pathlib import Path from pydantic import BaseModel, Field -from config.paths import TTS_MODELS_PATH -from utils.download_utils import download_file_from_huggingface +from voice_dialogue.config.paths import TTS_MODELS_PATH +from voice_dialogue.utils.download_utils import download_file_from_huggingface from .base import BaseTTSConfig, TTSConfigType, VoiceModelStatus diff --git a/src/VoiceDialogue/services/audio/generators/runtime/__init__.py b/src/voice_dialogue/services/audio/generators/runtime/__init__.py similarity index 100% rename from src/VoiceDialogue/services/audio/generators/runtime/__init__.py rename to src/voice_dialogue/services/audio/generators/runtime/__init__.py diff --git a/src/VoiceDialogue/services/audio/generators/runtime/interface.py b/src/voice_dialogue/services/audio/generators/runtime/interface.py similarity index 100% rename from src/VoiceDialogue/services/audio/generators/runtime/interface.py rename to src/voice_dialogue/services/audio/generators/runtime/interface.py diff --git a/src/VoiceDialogue/services/audio/generators/runtime/kokoro.py b/src/voice_dialogue/services/audio/generators/runtime/kokoro.py similarity index 89% rename from src/VoiceDialogue/services/audio/generators/runtime/kokoro.py rename to src/voice_dialogue/services/audio/generators/runtime/kokoro.py index 5270f7b91d26f5199778e7148df6b6631f9b0160..91f945ecb11b73fd168aabff487e3e7f98dc0177 100644 --- a/src/VoiceDialogue/services/audio/generators/runtime/kokoro.py +++ b/src/voice_dialogue/services/audio/generators/runtime/kokoro.py @@ -3,9 +3,9 @@ from typing import Tuple, Optional import numpy as np from kokoro_onnx import Kokoro -from services.audio.generators.configs.kokoro import KokoroTTSConfig -from services.audio.generators.manager import tts_tables -from services.audio.generators.runtime.interface import TTSInterface +from .interface import TTSInterface +from ..configs.kokoro import KokoroTTSConfig +from ..manager import tts_tables @tts_tables.register("tts_classes", "kokoro") diff --git a/src/VoiceDialogue/services/audio/generators/runtime/moyoyo.py b/src/voice_dialogue/services/audio/generators/runtime/moyoyo.py similarity index 84% rename from src/VoiceDialogue/services/audio/generators/runtime/moyoyo.py rename to src/voice_dialogue/services/audio/generators/runtime/moyoyo.py index 4d7b838772e759fe2f52df97b1505b861e526f62..facb02096cc7711e1a8529e036ba7293391318d8 100644 --- a/src/VoiceDialogue/services/audio/generators/runtime/moyoyo.py +++ b/src/voice_dialogue/services/audio/generators/runtime/moyoyo.py @@ -3,10 +3,10 @@ from typing import Tuple import numpy as np -from config.paths import load_third_party -from services.audio.generators.manager import tts_tables -from services.audio.generators.models.moyoyo import MoYoYoTTSConfig -from services.audio.generators.runtime.interface import TTSInterface +from voice_dialogue.config.paths import load_third_party +from voice_dialogue.services.audio.generators.manager import tts_tables +from voice_dialogue.services.audio.generators.models.moyoyo import MoYoYoTTSConfig +from voice_dialogue.services.audio.generators.runtime.interface import TTSInterface load_third_party() diff --git a/src/VoiceDialogue/services/audio/player.py b/src/voice_dialogue/services/audio/player.py similarity index 96% rename from src/VoiceDialogue/services/audio/player.py rename to src/voice_dialogue/services/audio/player.py index e5cbe24d34261710100f168fdd958d864aedddcb..8cf70da7d1cfd5edffc9e0c78c215fe7ebd8e689 100644 --- a/src/VoiceDialogue/services/audio/player.py +++ b/src/voice_dialogue/services/audio/player.py @@ -6,12 +6,12 @@ from queue import Empty import soundfile as sf from playsound import playsound -from core.base import BaseThread -from core.constants import ( +from voice_dialogue.core.base import BaseThread +from voice_dialogue.core.constants import ( user_still_speaking_event, voice_state_manager, dropped_audio_cache, chat_history_cache, silence_over_threshold_event ) -from models.voice_task import VoiceTask, AnswerDisplayMessage +from voice_dialogue.models.voice_task import VoiceTask, AnswerDisplayMessage class AudioStreamPlayer(BaseThread): diff --git a/src/VoiceDialogue/services/speech/__init__.py b/src/voice_dialogue/services/speech/__init__.py similarity index 100% rename from src/VoiceDialogue/services/speech/__init__.py rename to src/voice_dialogue/services/speech/__init__.py diff --git a/src/VoiceDialogue/services/speech/monitor.py b/src/voice_dialogue/services/speech/monitor.py similarity index 98% rename from src/VoiceDialogue/services/speech/monitor.py rename to src/voice_dialogue/services/speech/monitor.py index 598f29e0ce812e10f3d4819e94b1102c5ed9fb71..6e84a40d9197f1b31a2102db76b9cb509129f55c 100644 --- a/src/VoiceDialogue/services/speech/monitor.py +++ b/src/voice_dialogue/services/speech/monitor.py @@ -13,12 +13,12 @@ from queue import Empty import librosa import numpy as np -from core.base import BaseThread -from core.constants import ( +from voice_dialogue.core.base import BaseThread +from voice_dialogue.core.constants import ( voice_state_manager, silence_over_threshold_event, user_still_speaking_event, session_manager ) -from core.enums import AudioState -from models.voice_task import VoiceTask +from voice_dialogue.core.enums import AudioState +from voice_dialogue.models.voice_task import VoiceTask class SpeechMonitorConfig: diff --git a/src/VoiceDialogue/services/speech/recognizer.py b/src/voice_dialogue/services/speech/recognizer.py similarity index 88% rename from src/VoiceDialogue/services/speech/recognizer.py rename to src/voice_dialogue/services/speech/recognizer.py index 6f3c33a6c9eb3ee4336464128624385000c63713..2767f22ec991836c3a9f9d4cb77b496bc274487d 100644 --- a/src/VoiceDialogue/services/speech/recognizer.py +++ b/src/voice_dialogue/services/speech/recognizer.py @@ -4,10 +4,10 @@ from queue import Queue import numpy as np -from core.base import BaseThread -from core.constants import user_still_speaking_event, voice_state_manager, dropped_audio_cache -from models.voice_task import VoiceTask -from utils.cache import LRUCacheDict +from voice_dialogue.core.base import BaseThread +from voice_dialogue.core.constants import user_still_speaking_event, voice_state_manager, dropped_audio_cache +from voice_dialogue.models.voice_task import VoiceTask +from voice_dialogue.utils.cache import LRUCacheDict from .recognizers import asr_manager diff --git a/src/VoiceDialogue/services/speech/recognizers/__init__.py b/src/voice_dialogue/services/speech/recognizers/__init__.py similarity index 100% rename from src/VoiceDialogue/services/speech/recognizers/__init__.py rename to src/voice_dialogue/services/speech/recognizers/__init__.py diff --git a/src/VoiceDialogue/services/speech/recognizers/manager.py b/src/voice_dialogue/services/speech/recognizers/manager.py similarity index 100% rename from src/VoiceDialogue/services/speech/recognizers/manager.py rename to src/voice_dialogue/services/speech/recognizers/manager.py diff --git a/src/VoiceDialogue/services/speech/recognizers/models/__init__.py b/src/voice_dialogue/services/speech/recognizers/models/__init__.py similarity index 100% rename from src/VoiceDialogue/services/speech/recognizers/models/__init__.py rename to src/voice_dialogue/services/speech/recognizers/models/__init__.py diff --git a/src/VoiceDialogue/services/speech/recognizers/models/base.py b/src/voice_dialogue/services/speech/recognizers/models/base.py similarity index 97% rename from src/VoiceDialogue/services/speech/recognizers/models/base.py rename to src/voice_dialogue/services/speech/recognizers/models/base.py index 4928ed60b9dbfe6e44bc1ab6282cc02eada985fc..d8887b7acdfcb6729dbfcebbda48cacf38d18087 100644 --- a/src/VoiceDialogue/services/speech/recognizers/models/base.py +++ b/src/voice_dialogue/services/speech/recognizers/models/base.py @@ -4,7 +4,7 @@ from enum import Enum import librosa import numpy as np -from config import paths +from voice_dialogue.config import paths class ASRConfigType(Enum): diff --git a/src/VoiceDialogue/services/speech/recognizers/models/funasr.py b/src/voice_dialogue/services/speech/recognizers/models/funasr.py similarity index 90% rename from src/VoiceDialogue/services/speech/recognizers/models/funasr.py rename to src/voice_dialogue/services/speech/recognizers/models/funasr.py index f24a03aabad16db411fe8eeb457b7196584f0e63..721a09e8648696b1b94d9596354ab249ce6611cc 100644 --- a/src/VoiceDialogue/services/speech/recognizers/models/funasr.py +++ b/src/voice_dialogue/services/speech/recognizers/models/funasr.py @@ -4,10 +4,10 @@ import typing import numpy as np from funasr_onnx import SeacoParaformer, CT_Transformer -from config import paths -from services.speech.recognizers.manager import asr_tables -from services.speech.recognizers.models.base import ASRInterface -from services.speech.recognizers.utils import ensure_minimum_audio_duration +from voice_dialogue.config import paths +from voice_dialogue.services.speech.recognizers.manager import asr_tables +from voice_dialogue.services.speech.recognizers.models.base import ASRInterface +from ..utils import ensure_minimum_audio_duration @asr_tables.register('asr_classes', 'funasr') diff --git a/src/VoiceDialogue/services/speech/recognizers/models/whisper.py b/src/voice_dialogue/services/speech/recognizers/models/whisper.py similarity index 87% rename from src/VoiceDialogue/services/speech/recognizers/models/whisper.py rename to src/voice_dialogue/services/speech/recognizers/models/whisper.py index a91f37adaccf5564f29d6b2679219dad647cca21..21536d7772834bd9c17a76b415ce3aa84aaf93a3 100644 --- a/src/VoiceDialogue/services/speech/recognizers/models/whisper.py +++ b/src/voice_dialogue/services/speech/recognizers/models/whisper.py @@ -3,10 +3,10 @@ import typing import numpy as np from pywhispercpp.model import Model -from config import paths -from services.speech.recognizers.manager import asr_tables -from services.speech.recognizers.models.base import ASRInterface -from services.speech.recognizers.utils import ensure_minimum_audio_duration +from voice_dialogue.config import paths +from voice_dialogue.services.speech.recognizers.manager import asr_tables +from voice_dialogue.services.speech.recognizers.models.base import ASRInterface +from ..utils import ensure_minimum_audio_duration @asr_tables.register('asr_classes', 'whisper') diff --git a/src/VoiceDialogue/services/speech/recognizers/utils.py b/src/voice_dialogue/services/speech/recognizers/utils.py similarity index 100% rename from src/VoiceDialogue/services/speech/recognizers/utils.py rename to src/voice_dialogue/services/speech/recognizers/utils.py diff --git a/src/VoiceDialogue/services/__init__.py b/src/voice_dialogue/services/text/__init__.py similarity index 100% rename from src/VoiceDialogue/services/__init__.py rename to src/voice_dialogue/services/text/__init__.py diff --git a/src/VoiceDialogue/services/text/generator.py b/src/voice_dialogue/services/text/generator.py similarity index 96% rename from src/VoiceDialogue/services/text/generator.py rename to src/voice_dialogue/services/text/generator.py index 039697936d382e431ab86d135cedaf8c53ada4f7..db94822626e0b49919e96392f504ba9e7b931888 100644 --- a/src/VoiceDialogue/services/text/generator.py +++ b/src/voice_dialogue/services/text/generator.py @@ -5,11 +5,11 @@ from queue import Queue, Empty from langchain.memory import ConversationBufferWindowMemory from langchain_core.chat_history import InMemoryChatMessageHistory -from config import paths -from core.base import BaseThread -from core.constants import chat_history_cache -from models.voice_task import VoiceTask, QuestionDisplayMessage -from services.text.processor import preprocess_sentence_text, \ +from voice_dialogue.config import paths +from voice_dialogue.core.base import BaseThread +from voice_dialogue.core.constants import chat_history_cache +from voice_dialogue.models.voice_task import VoiceTask, QuestionDisplayMessage +from voice_dialogue.services.text.processor import preprocess_sentence_text, \ create_langchain_chat_llamacpp_instance, create_langchain_pipeline, warmup_langchain_pipeline CHINESE_SYSTEM_PROMPT = ("你是善于模拟真实的思考过程的AI助手。" diff --git a/src/VoiceDialogue/services/text/processor.py b/src/voice_dialogue/services/text/processor.py similarity index 100% rename from src/VoiceDialogue/services/text/processor.py rename to src/voice_dialogue/services/text/processor.py diff --git a/src/VoiceDialogue/utils/__init__.py b/src/voice_dialogue/utils/__init__.py similarity index 92% rename from src/VoiceDialogue/utils/__init__.py rename to src/voice_dialogue/utils/__init__.py index 8adfb4f99fa2a2c3386ca4d99782411c1699bed4..d6134f90fc36601a86d9e6393d96b4fbb0bee25a 100644 --- a/src/VoiceDialogue/utils/__init__.py +++ b/src/voice_dialogue/utils/__init__.py @@ -1,3 +1,4 @@ +from voice_dialogue.config.paths import PROJECT_ROOT from .cache import LRUCacheDict from .download_utils import ( download_model_from_huggingface, download_file_from_huggingface, check_file_exists_on_huggingface, @@ -12,9 +13,7 @@ try: from pathlib import Path # 添加third_party路径 - current_dir = Path(__file__).parent - project_root = current_dir.parent.parent.parent - third_party_path = project_root / "third_party" + third_party_path = PROJECT_ROOT / "third_party" if str(third_party_path) not in sys.path: sys.path.insert(0, str(third_party_path)) diff --git a/src/VoiceDialogue/utils/cache.py b/src/voice_dialogue/utils/cache.py similarity index 100% rename from src/VoiceDialogue/utils/cache.py rename to src/voice_dialogue/utils/cache.py diff --git a/src/VoiceDialogue/utils/download_utils.py b/src/voice_dialogue/utils/download_utils.py similarity index 100% rename from src/VoiceDialogue/utils/download_utils.py rename to src/voice_dialogue/utils/download_utils.py diff --git a/src/VoiceDialogue/utils/logger.py b/src/voice_dialogue/utils/logger.py similarity index 99% rename from src/VoiceDialogue/utils/logger.py rename to src/voice_dialogue/utils/logger.py index 7e00d2a25a6a7968ab48dbbb49a8469a93eeb381..7fecd7d9bcce279437c314564c9a37a005fe7408 100644 --- a/src/VoiceDialogue/utils/logger.py +++ b/src/voice_dialogue/utils/logger.py @@ -1,8 +1,7 @@ import logging import sys -from pathlib import Path from logging.handlers import RotatingFileHandler -import datetime +from pathlib import Path def setup_logger( diff --git a/src/VoiceDialogue/utils/strings.py b/src/voice_dialogue/utils/strings.py similarity index 100% rename from src/VoiceDialogue/utils/strings.py rename to src/voice_dialogue/utils/strings.py diff --git a/src/VoiceDialogue/utils/system.py b/src/voice_dialogue/utils/system.py similarity index 100% rename from src/VoiceDialogue/utils/system.py rename to src/voice_dialogue/utils/system.py