liumaolin commited on
Commit
511ff0c
·
1 Parent(s): 6eec50d

Rename 'src/VoiceDialogue' to 'src/voice_dialogue'.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. main.py +349 -0
  2. src/VoiceDialogue/services/text/__init__.py +0 -0
  3. src/voice_dialogue/__init__.py +14 -0
  4. src/{VoiceDialogue → voice_dialogue/api}/__init__.py +0 -0
  5. src/{VoiceDialogue → voice_dialogue}/api/app.py +1 -1
  6. src/{VoiceDialogue → voice_dialogue}/api/core/__init__.py +0 -0
  7. src/{VoiceDialogue → voice_dialogue}/api/core/config.py +2 -1
  8. src/{VoiceDialogue → voice_dialogue}/api/core/lifespan.py +2 -2
  9. src/{VoiceDialogue → voice_dialogue}/api/core/service_factories.py +5 -5
  10. src/{VoiceDialogue → voice_dialogue}/api/core/service_manager.py +0 -0
  11. src/{VoiceDialogue → voice_dialogue}/api/dependencies/__init__.py +0 -0
  12. src/{VoiceDialogue → voice_dialogue}/api/dependencies/audio_deps.py +0 -0
  13. src/{VoiceDialogue → voice_dialogue}/api/middleware/__init__.py +0 -0
  14. src/{VoiceDialogue → voice_dialogue}/api/middleware/logging.py +0 -0
  15. src/{VoiceDialogue → voice_dialogue}/api/middleware/rate_limit.py +0 -0
  16. src/{VoiceDialogue → voice_dialogue}/api/routes/__init__.py +0 -0
  17. src/{VoiceDialogue → voice_dialogue}/api/routes/asr_routes.py +1 -1
  18. src/{VoiceDialogue → voice_dialogue}/api/routes/system_routes.py +1 -1
  19. src/{VoiceDialogue → voice_dialogue}/api/routes/tts_routes.py +1 -1
  20. src/{VoiceDialogue → voice_dialogue}/api/routes/websocket_routes.py +1 -1
  21. src/{VoiceDialogue → voice_dialogue}/api/schemas/__init__.py +0 -0
  22. src/{VoiceDialogue → voice_dialogue}/api/schemas/asr_schemas.py +0 -0
  23. src/{VoiceDialogue → voice_dialogue}/api/schemas/system_schemas.py +0 -0
  24. src/{VoiceDialogue → voice_dialogue}/api/schemas/tts_schemas.py +2 -1
  25. src/{VoiceDialogue → voice_dialogue}/api/schemas/voice_schemas.py +0 -0
  26. src/{VoiceDialogue → voice_dialogue}/api/server.py +0 -0
  27. src/{VoiceDialogue/api → voice_dialogue/config}/__init__.py +0 -0
  28. src/{VoiceDialogue → voice_dialogue}/config/paths.py +0 -0
  29. src/{VoiceDialogue/config → voice_dialogue/core}/__init__.py +0 -0
  30. src/{VoiceDialogue → voice_dialogue}/core/base.py +0 -0
  31. src/{VoiceDialogue → voice_dialogue}/core/constants.py +1 -1
  32. src/{VoiceDialogue → voice_dialogue}/core/enums.py +0 -0
  33. src/{VoiceDialogue → voice_dialogue}/core/session_manager.py +0 -0
  34. src/{VoiceDialogue → voice_dialogue}/core/state_manager.py +1 -1
  35. src/{VoiceDialogue → voice_dialogue}/main.py +0 -0
  36. src/{VoiceDialogue → voice_dialogue}/models/__init__.py +0 -0
  37. src/{VoiceDialogue → voice_dialogue}/models/voice_task.py +0 -0
  38. src/{VoiceDialogue/core → voice_dialogue/services}/__init__.py +0 -0
  39. src/{VoiceDialogue → voice_dialogue}/services/audio/__init__.py +0 -0
  40. src/{VoiceDialogue → voice_dialogue}/services/audio/capture.py +2 -2
  41. src/{VoiceDialogue → voice_dialogue}/services/audio/generator.py +3 -3
  42. src/{VoiceDialogue → voice_dialogue}/services/audio/generators/__init__.py +0 -0
  43. src/{VoiceDialogue → voice_dialogue}/services/audio/generators/configs/__init__.py +0 -0
  44. src/{VoiceDialogue → voice_dialogue}/services/audio/generators/configs/kokoro.py +0 -0
  45. src/{VoiceDialogue → voice_dialogue}/services/audio/generators/configs/moyoyo.py +1 -1
  46. src/{VoiceDialogue → voice_dialogue}/services/audio/generators/manager.py +0 -0
  47. src/{VoiceDialogue → voice_dialogue}/services/audio/generators/models/__init__.py +0 -0
  48. src/{VoiceDialogue → voice_dialogue}/services/audio/generators/models/base.py +0 -0
  49. src/{VoiceDialogue → voice_dialogue}/services/audio/generators/models/kokoro.py +1 -1
  50. src/{VoiceDialogue → voice_dialogue}/services/audio/generators/models/moyoyo.py +2 -2
main.py ADDED
@@ -0,0 +1,349 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import sys
3
+ import time
4
+ import typing
5
+ from pathlib import Path
6
+
7
+ import uvicorn
8
+
9
+ HERE = Path(__file__).parent
10
+ lib_path = HERE / "src"
11
+ if lib_path.as_posix() not in sys.path:
12
+ sys.path.insert(0, lib_path.as_posix())
13
+
14
+ from voice_dialogue.core.constants import (
15
+ audio_frames_queue,
16
+ user_voice_queue,
17
+ transcribed_text_queue,
18
+ text_input_queue,
19
+ audio_output_queue
20
+ )
21
+ from voice_dialogue.services.audio.capture import EchoCancellingAudioCapture
22
+ from voice_dialogue.services.audio.generator import TTSAudioGenerator
23
+ from voice_dialogue.services.audio.generators.models import tts_config_registry
24
+ from voice_dialogue.services.audio.player import AudioStreamPlayer
25
+ from voice_dialogue.services.speech.monitor import SpeechStateMonitor
26
+ from voice_dialogue.services.speech.recognizer import ASRWorker
27
+ from voice_dialogue.services.text.generator import LLMResponseGenerator
28
+
29
+ language: typing.Literal['zh', 'en'] = 'en'
30
+
31
+
32
+ def launch_system(
33
+ user_language: str,
34
+ speaker: str
35
+ ) -> None:
36
+ """
37
+ 启动完整的语音对话系统
38
+
39
+ 该函数负责启动并协调语音对话系统的所有组件,包括音频采集、语音识别、
40
+ 文本生成、语音合成和音频播放等功能模块。系统采用多线程架构,各组件
41
+ 通过队列进行数据传递和通信。
42
+
43
+ 系统工作流程:
44
+ 1. 音频采集:EchoCancellingAudioCapture 采集用户语音并进行回声消除
45
+ 2. 语音监测:SpeechStateMonitor 检测用户是否在说话
46
+ 3. 语音识别:ASRWorker 将用户语音转换为文本
47
+ 4. 文本生成:LLMResponseGenerator 基于用户问题生成AI回答
48
+ 5. 语音合成:TTSAudioGenerator 将AI回答转换为语音
49
+ 6. 音频播放:AudioStreamPlayer 播放生成的语音
50
+
51
+ Args:
52
+ user_language (str): 用户语言,支持 'zh'(中文)和 'en'(英文)
53
+ speaker (str): 语音合成使用的说话人,支持:
54
+ '罗翔', '马保国', '沈逸', '杨幂', '周杰伦', '马云'
55
+
56
+ Raises:
57
+ ValueError: 当指定的说话人不在支持列表中时抛出异常
58
+
59
+ Returns:
60
+ None: 函数会一直运行直到所有线程结束
61
+
62
+ Note:
63
+ 该函数会阻塞运行,直到系统被外部停止或发生异常
64
+ """
65
+
66
+ threads = []
67
+ #
68
+ audio_frame_probe = EchoCancellingAudioCapture(audio_frames_queue=audio_frames_queue)
69
+ audio_frame_probe.start()
70
+ threads.append(audio_frame_probe)
71
+
72
+ #
73
+ user_voice_checker = SpeechStateMonitor(
74
+ audio_frame_queue=audio_frames_queue,
75
+ user_voice_queue=user_voice_queue,
76
+ )
77
+ user_voice_checker.start()
78
+ threads.append(user_voice_checker)
79
+
80
+ #
81
+ whisper_worker = ASRWorker(
82
+ user_voice_queue=user_voice_queue, transcribed_text_queue=transcribed_text_queue,
83
+ language=user_language
84
+ )
85
+ whisper_worker.start()
86
+ threads.append(whisper_worker)
87
+
88
+ answer_generator_worker = LLMResponseGenerator(
89
+ user_question_queue=transcribed_text_queue,
90
+ generated_answer_queue=text_input_queue
91
+ )
92
+ answer_generator_worker.start()
93
+ threads.append(answer_generator_worker)
94
+
95
+ # 动态获取TTS配置,而不是使用固定映射
96
+ tts_speaker_config = _get_tts_config_by_speaker_name(speaker)
97
+ if tts_speaker_config is None:
98
+ # 如果找不到指定说话人,列出所有可用说话人并抛出异常
99
+ available_speakers = _get_available_speaker_names()
100
+ raise ValueError(f"不支持的TTS说话人: {speaker}。可用说话人: {', '.join(available_speakers)}")
101
+
102
+ audio_generator_worker = TTSAudioGenerator(
103
+ text_input_queue=text_input_queue,
104
+ audio_output_queue=audio_output_queue,
105
+ tts_config=tts_speaker_config
106
+ )
107
+ audio_generator_worker.start()
108
+ threads.append(audio_generator_worker)
109
+
110
+ audio_playing_worker = AudioStreamPlayer(audio_playing_queue=audio_output_queue)
111
+ audio_playing_worker.start()
112
+ threads.append(audio_playing_worker)
113
+
114
+ while not all([thread.is_ready for thread in threads]):
115
+ time.sleep(0.1)
116
+
117
+ # audio_frame_probe.start_record()
118
+ print(f'{"=" * 80}\n服务启动成功\n{"=" * 80}')
119
+ for thread in threads:
120
+ thread.join()
121
+
122
+
123
+ def _get_tts_config_by_speaker_name(speaker_name: str):
124
+ """
125
+ 根据说话人名称获取TTS配置
126
+
127
+ 支持中文名称和英文名称,优先匹配中文名称映射,
128
+ 如果找不到则直接使用英文名称搜索
129
+
130
+ Args:
131
+ speaker_name (str): 说话人名称
132
+
133
+ Returns:
134
+ BaseTTSConfig: TTS配置,如果找不到则返回None
135
+ """
136
+ # 中文名称到英文名称的映射(保持向后兼容)
137
+ chinese_to_english_mapping = {
138
+ '罗翔': 'Luo Xiang',
139
+ '马保国': 'Ma Baoguo',
140
+ '沈逸': 'Shen Yi',
141
+ '杨幂': 'Yang Mi',
142
+ '周杰伦': 'Zhou Jielun',
143
+ '马云': 'Ma Yun',
144
+ }
145
+
146
+ # 首先尝试中文名称映射
147
+ english_name = chinese_to_english_mapping.get(speaker_name, speaker_name)
148
+
149
+ # 获取所有可用配置
150
+ all_configs = tts_config_registry.get_all_configs()
151
+
152
+ # 搜索匹配的配置
153
+ for config in all_configs:
154
+ if config.character_name == english_name:
155
+ return config
156
+
157
+ # 如果通过映射找不到,尝试直接匹配输入的名称
158
+ if speaker_name != english_name:
159
+ for config in all_configs:
160
+ if config.character_name == speaker_name:
161
+ return config
162
+
163
+ return None
164
+
165
+
166
+ def _get_available_speaker_names():
167
+ """
168
+ 获取所有可用的说话人名称列表
169
+
170
+ Returns:
171
+ list[str]: 包含中文显示名称和英文原始名称的列表
172
+ """
173
+ # 中文显示名称映射
174
+ english_to_chinese_mapping = {
175
+ 'Luo Xiang': '罗翔',
176
+ 'Ma Baoguo': '马保国',
177
+ 'Shen Yi': '沈逸',
178
+ 'Yang Mi': '杨幂',
179
+ 'Zhou Jielun': '周杰伦',
180
+ 'Ma Yun': '马云',
181
+ }
182
+
183
+ all_configs = tts_config_registry.get_all_configs()
184
+ speaker_names = []
185
+
186
+ for config in all_configs:
187
+ # 优先显示中文名称
188
+ chinese_name = english_to_chinese_mapping.get(config.character_name)
189
+ if chinese_name:
190
+ speaker_names.append(chinese_name)
191
+ else:
192
+ # 如果没有中文映射,使用英文原名
193
+ speaker_names.append(config.character_name)
194
+
195
+ return sorted(speaker_names)
196
+
197
+
198
+ def _update_argument_parser_speaker_choices():
199
+ """
200
+ 动态更新命令行参数解析器中的说话人选项
201
+
202
+ Returns:
203
+ list[str]: 可用的说话人选择列表
204
+ """
205
+ return _get_available_speaker_names()
206
+
207
+
208
+ def create_argument_parser():
209
+ """创建命令行参数解析器"""
210
+ # 动态获取可用说话人列表
211
+ available_speakers = _update_argument_parser_speaker_choices()
212
+
213
+ parser = argparse.ArgumentParser(
214
+ description="VoiceDialogue - 语音对话系统",
215
+ formatter_class=argparse.RawDescriptionHelpFormatter,
216
+ epilog=f"""
217
+ 示例用法:
218
+ # 启动命令行模式(默认)
219
+ python main.py
220
+
221
+ # 启动命令行模式并指定参数
222
+ python main.py --mode cli --language zh --speaker 沈逸
223
+
224
+ # 启动API服务器
225
+ python main.py --mode api
226
+
227
+ # 启动API服务器并指定端口
228
+ python main.py --mode api --port 9000
229
+
230
+ # 启动API服务器并启用热重载(开发模式)
231
+ python main.py --mode api --port 8000 --reload
232
+
233
+ 支持的说话人:
234
+ {', '.join(available_speakers)}
235
+ """
236
+ )
237
+
238
+ # 运行模式选择
239
+ parser.add_argument(
240
+ '--mode', '-m',
241
+ choices=['cli', 'api'],
242
+ default='cli',
243
+ help='运行模式: cli=命令行模式, api=API服务器模式 (默认: cli)'
244
+ )
245
+
246
+ # 命令行模式参数
247
+ cli_group = parser.add_argument_group('命令行模式参数')
248
+ cli_group.add_argument(
249
+ '--language', '-l',
250
+ choices=['zh', 'en'],
251
+ default='zh',
252
+ help='用户语言: zh=中文, en=英文 (默认: zh)'
253
+ )
254
+ cli_group.add_argument(
255
+ '--speaker', '-s',
256
+ choices=available_speakers,
257
+ default='沈逸' if '沈逸' in available_speakers else (available_speakers[0] if available_speakers else '沈逸'),
258
+ help='TTS说话人 (默认: 沈逸)'
259
+ )
260
+
261
+ # API服务器模式参数
262
+ api_group = parser.add_argument_group('API服务器模式参数')
263
+ api_group.add_argument(
264
+ '--host',
265
+ default='0.0.0.0',
266
+ help='服务器主机地址 (默认: 0.0.0.0)'
267
+ )
268
+ api_group.add_argument(
269
+ '--port', '-p',
270
+ type=int,
271
+ default=8000,
272
+ help='服务器端口 (默认: 8000)'
273
+ )
274
+ api_group.add_argument(
275
+ '--reload',
276
+ action='store_true',
277
+ help='启用热重载(开发模式)'
278
+ )
279
+
280
+ return parser
281
+
282
+
283
+ def launch_api_server(host: str = "0.0.0.0", port: int = 8000, reload: bool = False):
284
+ """
285
+ 启动API服务器
286
+
287
+ Args:
288
+ host (str): 服务器主机地址,默认为 "0.0.0.0"
289
+ port (int): 服务器端口,默认为 8000
290
+ reload (bool): 是否启用热重载,默认为 False
291
+ """
292
+ print(f'{"=" * 80}\n正在启动API服务器...\n{"=" * 80}')
293
+ print(f"服务器地址: http://{host}:{port}")
294
+ print(f"API文档: http://{host}:{port}/docs")
295
+ print(f"热重载: {'启用' if reload else '禁用'}")
296
+ print(f'{"=" * 80}')
297
+
298
+ # 导入并启动FastAPI应用
299
+ uvicorn.run(
300
+ "api.app:app",
301
+ host=host,
302
+ port=port,
303
+ reload=reload,
304
+ log_level="info"
305
+ )
306
+
307
+
308
+ def main():
309
+ """
310
+ 主程序入口函数
311
+
312
+ 根据命令行参数选择启动模式:
313
+ - cli: 启动命令行语音对话系统
314
+ - api: 启动HTTP API服务器
315
+ """
316
+ parser = create_argument_parser()
317
+ args = parser.parse_args()
318
+
319
+ print(f"""
320
+ {"=" * 80}
321
+ VoiceDialogue - 语音对话系统
322
+ {"=" * 80}
323
+ 运行模式: {args.mode.upper()}
324
+ {"=" * 80}
325
+ """)
326
+
327
+ try:
328
+ if args.mode == 'cli':
329
+ print(f"语言设置: {args.language}")
330
+ print(f"说话人: {args.speaker}")
331
+ print("正在启动命令行语音对话系统...")
332
+ launch_system(args.language, args.speaker)
333
+
334
+ elif args.mode == 'api':
335
+ launch_api_server(
336
+ host=args.host,
337
+ port=args.port,
338
+ reload=args.reload
339
+ )
340
+
341
+ except KeyboardInterrupt:
342
+ print("\n程序被用户中断")
343
+ except Exception as e:
344
+ print(f"程序运行出错: {e}")
345
+ raise
346
+
347
+
348
+ if __name__ == '__main__':
349
+ main()
src/VoiceDialogue/services/text/__init__.py DELETED
File without changes
src/voice_dialogue/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .core.constants import (
2
+ audio_frames_queue,
3
+ user_voice_queue,
4
+ transcribed_text_queue,
5
+ text_input_queue,
6
+ audio_output_queue
7
+ )
8
+ from .services.audio.capture import EchoCancellingAudioCapture
9
+ from .services.audio.generator import TTSAudioGenerator
10
+ from .services.audio.generators.models import tts_config_registry
11
+ from .services.audio.player import AudioStreamPlayer
12
+ from .services.speech.monitor import SpeechStateMonitor
13
+ from .services.speech.recognizer import ASRWorker
14
+ from .services.text.generator import LLMResponseGenerator
src/{VoiceDialogue → voice_dialogue/api}/__init__.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/api/app.py RENAMED
@@ -6,7 +6,7 @@ from fastapi.middleware.cors import CORSMiddleware
6
  from fastapi.responses import FileResponse
7
  from fastapi.staticfiles import StaticFiles
8
 
9
- from config.paths import FRONTEND_ASSETS_PATH
10
  from .core.config import AppConfig
11
  from .core.lifespan import lifespan
12
  from .middleware.logging import LoggingMiddleware
 
6
  from fastapi.responses import FileResponse
7
  from fastapi.staticfiles import StaticFiles
8
 
9
+ from voice_dialogue.config.paths import FRONTEND_ASSETS_PATH
10
  from .core.config import AppConfig
11
  from .core.lifespan import lifespan
12
  from .middleware.logging import LoggingMiddleware
src/{VoiceDialogue → voice_dialogue}/api/core/__init__.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/api/core/config.py RENAMED
@@ -1,6 +1,8 @@
1
  import logging
2
  from typing import Dict, Any
3
 
 
 
4
  logger = logging.getLogger(__name__)
5
 
6
 
@@ -17,7 +19,6 @@ class TTSConfigInitializer:
17
  }
18
 
19
  try:
20
- from services.audio.generators import tts_config_registry
21
  config_count = len(tts_config_registry.get_all_configs())
22
 
23
  result.update({
 
1
  import logging
2
  from typing import Dict, Any
3
 
4
+ from voice_dialogue.services.audio.generators import tts_config_registry
5
+
6
  logger = logging.getLogger(__name__)
7
 
8
 
 
19
  }
20
 
21
  try:
 
22
  config_count = len(tts_config_registry.get_all_configs())
23
 
24
  result.update({
src/{VoiceDialogue → voice_dialogue}/api/core/lifespan.py RENAMED
@@ -4,8 +4,8 @@ from contextlib import asynccontextmanager
4
 
5
  from fastapi import FastAPI
6
 
7
- from services.audio.generators import tts_config_registry
8
- from utils import get_system_language
9
  from .config import TTSConfigInitializer
10
  from .service_factories import get_core_voice_service_definitions
11
  from .service_manager import ServiceManager
 
4
 
5
  from fastapi import FastAPI
6
 
7
+ from voice_dialogue.services.audio.generators import tts_config_registry
8
+ from voice_dialogue.utils import get_system_language
9
  from .config import TTSConfigInitializer
10
  from .service_factories import get_core_voice_service_definitions
11
  from .service_manager import ServiceManager
src/{VoiceDialogue → voice_dialogue}/api/core/service_factories.py RENAMED
@@ -1,11 +1,11 @@
1
- from core.constants import (
2
  transcribed_text_queue, text_input_queue, audio_output_queue,
3
  audio_frames_queue, user_voice_queue, websocket_message_queue
4
  )
5
- from services.audio import EchoCancellingAudioCapture, TTSAudioGenerator, AudioStreamPlayer
6
- from services.audio.generators import BaseTTSConfig, tts_config_registry
7
- from services.speech import SpeechStateMonitor, ASRWorker
8
- from services.text.generator import LLMResponseGenerator
9
  from .service_manager import ServiceDefinition
10
 
11
 
 
1
+ from voice_dialogue.core.constants import (
2
  transcribed_text_queue, text_input_queue, audio_output_queue,
3
  audio_frames_queue, user_voice_queue, websocket_message_queue
4
  )
5
+ from voice_dialogue.services.audio import EchoCancellingAudioCapture, TTSAudioGenerator, AudioStreamPlayer
6
+ from voice_dialogue.services.audio.generators import BaseTTSConfig, tts_config_registry
7
+ from voice_dialogue.services.speech import SpeechStateMonitor, ASRWorker
8
+ from voice_dialogue.services.text.generator import LLMResponseGenerator
9
  from .service_manager import ServiceDefinition
10
 
11
 
src/{VoiceDialogue → voice_dialogue}/api/core/service_manager.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/api/dependencies/__init__.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/api/dependencies/audio_deps.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/api/middleware/__init__.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/api/middleware/logging.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/api/middleware/rate_limit.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/api/routes/__init__.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/api/routes/asr_routes.py RENAMED
@@ -2,7 +2,7 @@ import logging
2
 
3
  from fastapi import APIRouter, HTTPException, Request, BackgroundTasks
4
 
5
- from services.speech.recognizers import asr_manager
6
  from ..core.service_factories import get_asr_worker_service_definition
7
  from ..schemas.asr_schemas import (
8
  SupportedLanguagesResponse, ASRInstanceRequest, ASRInstanceResponse
 
2
 
3
  from fastapi import APIRouter, HTTPException, Request, BackgroundTasks
4
 
5
+ from voice_dialogue.services.speech.recognizers import asr_manager
6
  from ..core.service_factories import get_asr_worker_service_definition
7
  from ..schemas.asr_schemas import (
8
  SupportedLanguagesResponse, ASRInstanceRequest, ASRInstanceResponse
src/{VoiceDialogue → voice_dialogue}/api/routes/system_routes.py RENAMED
@@ -4,7 +4,7 @@ import time
4
 
5
  from fastapi import APIRouter, HTTPException, BackgroundTasks, Request
6
 
7
- from core.constants import session_manager
8
  from ..core.service_factories import get_audio_capture_service_definition
9
  from ..schemas.system_schemas import (
10
  SystemStatusResponse, SystemResponse
 
4
 
5
  from fastapi import APIRouter, HTTPException, BackgroundTasks, Request
6
 
7
+ from voice_dialogue.core.constants import session_manager
8
  from ..core.service_factories import get_audio_capture_service_definition
9
  from ..schemas.system_schemas import (
10
  SystemStatusResponse, SystemResponse
src/{VoiceDialogue → voice_dialogue}/api/routes/tts_routes.py RENAMED
@@ -3,7 +3,7 @@ from typing import Optional
3
 
4
  from fastapi import APIRouter, HTTPException, BackgroundTasks, Request
5
 
6
- from services.audio.generators import tts_config_registry
7
  from ..core.service_factories import get_tts_audio_generator_service_definition
8
  from ..schemas.tts_schemas import (
9
  TTSModelInfo, TTSModelListResponse, TTSModelLoadRequest,
 
3
 
4
  from fastapi import APIRouter, HTTPException, BackgroundTasks, Request
5
 
6
+ from voice_dialogue.services.audio.generators import tts_config_registry
7
  from ..core.service_factories import get_tts_audio_generator_service_definition
8
  from ..schemas.tts_schemas import (
9
  TTSModelInfo, TTSModelListResponse, TTSModelLoadRequest,
src/{VoiceDialogue → voice_dialogue}/api/routes/websocket_routes.py RENAMED
@@ -4,7 +4,7 @@ from queue import Empty
4
 
5
  from fastapi import APIRouter, WebSocket, WebSocketDisconnect
6
 
7
- from core.constants import websocket_message_queue, session_manager
8
 
9
  ws = APIRouter()
10
  logger = logging.getLogger(__name__)
 
4
 
5
  from fastapi import APIRouter, WebSocket, WebSocketDisconnect
6
 
7
+ from voice_dialogue.core.constants import websocket_message_queue, session_manager
8
 
9
  ws = APIRouter()
10
  logger = logging.getLogger(__name__)
src/{VoiceDialogue → voice_dialogue}/api/schemas/__init__.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/api/schemas/asr_schemas.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/api/schemas/system_schemas.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/api/schemas/tts_schemas.py RENAMED
@@ -1,6 +1,7 @@
 
1
  from typing import List, Optional, Literal
 
2
  from pydantic import BaseModel, Field
3
- import hashlib
4
 
5
 
6
  class TTSModelInfo(BaseModel):
 
1
+ import hashlib
2
  from typing import List, Optional, Literal
3
+
4
  from pydantic import BaseModel, Field
 
5
 
6
 
7
  class TTSModelInfo(BaseModel):
src/{VoiceDialogue → voice_dialogue}/api/schemas/voice_schemas.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/api/server.py RENAMED
File without changes
src/{VoiceDialogue/api → voice_dialogue/config}/__init__.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/config/paths.py RENAMED
File without changes
src/{VoiceDialogue/config → voice_dialogue/core}/__init__.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/core/base.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/core/constants.py RENAMED
@@ -3,7 +3,7 @@ import multiprocessing
3
  import threading
4
  from collections import OrderedDict
5
 
6
- from utils.cache import LRUCacheDict
7
  from .session_manager import SessionIdManager
8
  from .state_manager import VoiceStateManager
9
 
 
3
  import threading
4
  from collections import OrderedDict
5
 
6
+ from voice_dialogue.utils.cache import LRUCacheDict
7
  from .session_manager import SessionIdManager
8
  from .state_manager import VoiceStateManager
9
 
src/{VoiceDialogue → voice_dialogue}/core/enums.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/core/session_manager.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/core/state_manager.py RENAMED
@@ -1,6 +1,6 @@
1
  import uuid
2
 
3
- from utils.cache import LRUCacheDict
4
  from .enums import AudioState
5
 
6
 
 
1
  import uuid
2
 
3
+ from voice_dialogue.utils.cache import LRUCacheDict
4
  from .enums import AudioState
5
 
6
 
src/{VoiceDialogue → voice_dialogue}/main.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/models/__init__.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/models/voice_task.py RENAMED
File without changes
src/{VoiceDialogue/core → voice_dialogue/services}/__init__.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/services/audio/__init__.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/services/audio/capture.py RENAMED
@@ -8,8 +8,8 @@ import time
8
 
9
  import numpy as np
10
 
11
- from config.paths import LIBRARIES_PATH
12
- from core.base import BaseThread
13
 
14
 
15
  class EchoCancellingAudioCapture(BaseThread):
 
8
 
9
  import numpy as np
10
 
11
+ from voice_dialogue.config.paths import LIBRARIES_PATH
12
+ from voice_dialogue.core.base import BaseThread
13
 
14
 
15
  class EchoCancellingAudioCapture(BaseThread):
src/{VoiceDialogue → voice_dialogue}/services/audio/generator.py RENAMED
@@ -2,9 +2,9 @@ import time
2
  from multiprocessing import Queue
3
  from queue import Empty
4
 
5
- from core.base import BaseThread
6
- from core.constants import dropped_audio_cache, user_still_speaking_event, voice_state_manager
7
- from models.voice_task import VoiceTask
8
  from .generators import tts_manager, BaseTTSConfig
9
 
10
 
 
2
  from multiprocessing import Queue
3
  from queue import Empty
4
 
5
+ from voice_dialogue.core.base import BaseThread
6
+ from voice_dialogue.core.constants import dropped_audio_cache, user_still_speaking_event, voice_state_manager
7
+ from voice_dialogue.models.voice_task import VoiceTask
8
  from .generators import tts_manager, BaseTTSConfig
9
 
10
 
src/{VoiceDialogue → voice_dialogue}/services/audio/generators/__init__.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/services/audio/generators/configs/__init__.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/services/audio/generators/configs/kokoro.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/services/audio/generators/configs/moyoyo.py RENAMED
@@ -1,4 +1,4 @@
1
- from services.audio.generators.models.moyoyo import MoYoYoTTSConfig
2
 
3
  # 基础预训练模型文件映射
4
  BASE_PRETRAINED_FILES = {
 
1
+ from ..models.moyoyo import MoYoYoTTSConfig
2
 
3
  # 基础预训练模型文件映射
4
  BASE_PRETRAINED_FILES = {
src/{VoiceDialogue → voice_dialogue}/services/audio/generators/manager.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/services/audio/generators/models/__init__.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/services/audio/generators/models/base.py RENAMED
File without changes
src/{VoiceDialogue → voice_dialogue}/services/audio/generators/models/kokoro.py RENAMED
@@ -3,8 +3,8 @@ from pathlib import Path
3
 
4
  from pydantic import BaseModel, Field
5
 
 
6
  from .base import BaseTTSConfig, TTSConfigType
7
- from config import paths
8
 
9
 
10
  class InferenceParameters(BaseModel):
 
3
 
4
  from pydantic import BaseModel, Field
5
 
6
+ from voice_dialogue.config import paths
7
  from .base import BaseTTSConfig, TTSConfigType
 
8
 
9
 
10
  class InferenceParameters(BaseModel):
src/{VoiceDialogue → voice_dialogue}/services/audio/generators/models/moyoyo.py RENAMED
@@ -4,8 +4,8 @@ from pathlib import Path
4
 
5
  from pydantic import BaseModel, Field
6
 
7
- from config.paths import TTS_MODELS_PATH
8
- from utils.download_utils import download_file_from_huggingface
9
  from .base import BaseTTSConfig, TTSConfigType, VoiceModelStatus
10
 
11
 
 
4
 
5
  from pydantic import BaseModel, Field
6
 
7
+ from voice_dialogue.config.paths import TTS_MODELS_PATH
8
+ from voice_dialogue.utils.download_utils import download_file_from_huggingface
9
  from .base import BaseTTSConfig, TTSConfigType, VoiceModelStatus
10
 
11