liumaolin commited on
Commit
2ecfa8f
·
1 Parent(s): d846f85

Add echo cancellation and VAD toggle support in service factories and routes

Browse files

- Extend `create_audio_capture` and `create_speech_monitor` methods to include `enable_echo_cancellation` and `enable_vad` options.
- Update service definitions and health checks to pass configuration dynamically.
- Add `SystemStartRequest` schema to support echo cancellation toggle during system startup.
- Adjust `_start_system_background` logic to initialize services based on the new toggles.

src/voice_dialogue/api/core/service_factories.py CHANGED
@@ -13,18 +13,20 @@ class ServiceFactories:
13
  """服务工厂类,封装所有服务的创建逻辑"""
14
 
15
  @staticmethod
16
- def create_audio_capture() -> AudioCapture:
17
  """创建音频捕获服务"""
18
  return AudioCapture(
19
- audio_frames_queue=audio_frames_queue
 
20
  )
21
 
22
  @staticmethod
23
- def create_speech_monitor() -> SpeechStateMonitor:
24
  """创建语音监控服务"""
25
  return SpeechStateMonitor(
26
  audio_frame_queue=audio_frames_queue,
27
  user_voice_queue=user_voice_queue,
 
28
  )
29
 
30
  @staticmethod
@@ -90,12 +92,12 @@ def get_core_voice_service_definitions(system_language: str, tts_config: BaseTTS
90
  # ),
91
 
92
  # 语音状态监控服务
93
- ServiceDefinition(
94
- name="speech_monitor",
95
- factory=ServiceFactories.create_speech_monitor,
96
- dependencies=[],
97
- health_check=lambda service: hasattr(service, 'is_ready') and service.is_ready
98
- ),
99
 
100
  # ASR语音识别服务
101
  ServiceDefinition(
@@ -129,11 +131,21 @@ def get_core_voice_service_definitions(system_language: str, tts_config: BaseTTS
129
  ]
130
 
131
 
132
- def get_audio_capture_service_definition() -> ServiceDefinition:
133
  """获取音频捕获服务定义"""
134
  return ServiceDefinition(
135
  name="audio_capture",
136
- factory=ServiceFactories.create_audio_capture,
 
 
 
 
 
 
 
 
 
 
137
  dependencies=[],
138
  health_check=lambda service: hasattr(service, 'is_ready') and service.is_ready
139
  )
 
13
  """服务工厂类,封装所有服务的创建逻辑"""
14
 
15
  @staticmethod
16
+ def create_audio_capture(enable_echo_cancellation: bool = True) -> AudioCapture:
17
  """创建音频捕获服务"""
18
  return AudioCapture(
19
+ audio_frames_queue=audio_frames_queue,
20
+ enable_echo_cancellation=enable_echo_cancellation
21
  )
22
 
23
  @staticmethod
24
+ def create_speech_monitor(enable_vad: bool = False) -> SpeechStateMonitor:
25
  """创建语音监控服务"""
26
  return SpeechStateMonitor(
27
  audio_frame_queue=audio_frames_queue,
28
  user_voice_queue=user_voice_queue,
29
+ enable_vad=enable_vad
30
  )
31
 
32
  @staticmethod
 
92
  # ),
93
 
94
  # 语音状态监控服务
95
+ # ServiceDefinition(
96
+ # name="speech_monitor",
97
+ # factory=ServiceFactories.create_speech_monitor,
98
+ # dependencies=[],
99
+ # health_check=lambda service: hasattr(service, 'is_ready') and service.is_ready
100
+ # ),
101
 
102
  # ASR语音识别服务
103
  ServiceDefinition(
 
131
  ]
132
 
133
 
134
+ def get_audio_capture_service_definition(enable_echo_cancellation: bool = True) -> ServiceDefinition:
135
  """获取音频捕获服务定义"""
136
  return ServiceDefinition(
137
  name="audio_capture",
138
+ factory=lambda: ServiceFactories.create_audio_capture(enable_echo_cancellation),
139
+ dependencies=[],
140
+ health_check=lambda service: hasattr(service, 'is_ready') and service.is_ready
141
+ )
142
+
143
+
144
+ def get_speech_monitor_service_definition(enable_vad: bool = False) -> ServiceDefinition:
145
+ """获取语音监控服务定义"""
146
+ return ServiceDefinition(
147
+ name="speech_monitor",
148
+ factory=lambda: ServiceFactories.create_speech_monitor(enable_vad),
149
  dependencies=[],
150
  health_check=lambda service: hasattr(service, 'is_ready') and service.is_ready
151
  )
src/voice_dialogue/api/routes/system_routes.py CHANGED
@@ -5,9 +5,9 @@ from fastapi import APIRouter, HTTPException, BackgroundTasks, Request
5
 
6
  from voice_dialogue.core.constants import session_manager
7
  from voice_dialogue.utils.logger import logger
8
- from ..core.service_factories import get_audio_capture_service_definition
9
  from ..schemas.system_schemas import (
10
- SystemStatusResponse, SystemResponse
11
  )
12
 
13
  router = APIRouter()
@@ -62,6 +62,7 @@ async def get_system_status(request: Request):
62
 
63
  @router.post("/start", response_model=SystemResponse, summary="启动系统")
64
  async def start_system(
 
65
  fastapi_request: Request,
66
  background_tasks: BackgroundTasks
67
  ):
@@ -82,7 +83,8 @@ async def start_system(
82
  # 在后台启动系统
83
  background_tasks.add_task(
84
  _start_system_background,
85
- fastapi_request
 
86
  )
87
 
88
  return SystemResponse(
@@ -135,6 +137,28 @@ async def stop_system(request: Request):
135
  except Exception as e:
136
  logger.error(f"停止音频捕获服务时发生错误: {e}", exc_info=True)
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  # 停止audio_player服务
139
  if service_manager.is_service_running("audio_player"):
140
  audio_player_service = service_manager.get_service("audio_player")
@@ -190,7 +214,7 @@ async def restart_system(
190
  raise HTTPException(status_code=500, detail=f"系统重启失败: {str(e)}")
191
 
192
 
193
- async def _start_system_background(request: Request):
194
  """
195
  后台启动系统的实际逻辑 - 创建并启动audio_capture服务
196
  """
@@ -229,12 +253,25 @@ async def _start_system_background(request: Request):
229
  else:
230
  logger.warning("未找到音频播放服务,系统将继续启动但可能无法播放音频")
231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  # 检查audio_capture服务是否已存在
233
  if service_manager.is_service_running("audio_capture"):
234
  logger.info("音频捕获服务已在运行")
235
  else:
236
  # 创建audio_capture服务定义
237
- audio_capture_def = get_audio_capture_service_definition()
238
 
239
  # 启动audio_capture服务
240
  success = service_manager.start_service(audio_capture_def)
 
5
 
6
  from voice_dialogue.core.constants import session_manager
7
  from voice_dialogue.utils.logger import logger
8
+ from ..core.service_factories import get_audio_capture_service_definition, get_speech_monitor_service_definition
9
  from ..schemas.system_schemas import (
10
+ SystemStatusResponse, SystemResponse, SystemStartRequest
11
  )
12
 
13
  router = APIRouter()
 
62
 
63
  @router.post("/start", response_model=SystemResponse, summary="启动系统")
64
  async def start_system(
65
+ request: SystemStartRequest,
66
  fastapi_request: Request,
67
  background_tasks: BackgroundTasks
68
  ):
 
83
  # 在后台启动系统
84
  background_tasks.add_task(
85
  _start_system_background,
86
+ fastapi_request,
87
+ request.enable_echo_cancellation
88
  )
89
 
90
  return SystemResponse(
 
137
  except Exception as e:
138
  logger.error(f"停止音频捕获服务时发生错误: {e}", exc_info=True)
139
 
140
+ # 停止语音监控服务
141
+ if service_manager.is_service_running("speech_monitor"):
142
+ speech_monitor_service = service_manager.get_service("speech_monitor")
143
+ if speech_monitor_service:
144
+ try:
145
+ speech_monitor_service.exit()
146
+ logger.info("语音监控服务已停止")
147
+
148
+ # 等待服务停止
149
+ timeout = 5
150
+ start_time = time.time()
151
+ while speech_monitor_service.is_alive() and (time.time() - start_time) < timeout:
152
+ await asyncio.sleep(0.1)
153
+
154
+ # 从服务管理器中移除
155
+ if "speech_monitor" in service_manager.services:
156
+ del service_manager.services["speech_monitor"]
157
+
158
+ except Exception as e:
159
+ logger.error(f"停止语音监控服务时发生错误: {e}", exc_info=True)
160
+
161
+
162
  # 停止audio_player服务
163
  if service_manager.is_service_running("audio_player"):
164
  audio_player_service = service_manager.get_service("audio_player")
 
214
  raise HTTPException(status_code=500, detail=f"系统重启失败: {str(e)}")
215
 
216
 
217
+ async def _start_system_background(request: Request, enable_echo_cancellation: bool = True):
218
  """
219
  后台启动系统的实际逻辑 - 创建并启动audio_capture服务
220
  """
 
253
  else:
254
  logger.warning("未找到音频播放服务,系统将继续启动但可能无法播放音频")
255
 
256
+ if service_manager.is_service_running("speech_monitor"):
257
+ logger.info("语音监控服务已在运行")
258
+ else:
259
+ # 创建语音监控服务定义
260
+ enable_vad = not enable_echo_cancellation
261
+ speech_monitor_def = get_speech_monitor_service_definition(enable_vad)
262
+
263
+ # 启动语音监控服务
264
+ success = service_manager.start_service(speech_monitor_def)
265
+ if not success:
266
+ raise RuntimeError("语音监控服务启动失败")
267
+ logger.info("语音监控服务启动成功")
268
+
269
  # 检查audio_capture服务是否已存在
270
  if service_manager.is_service_running("audio_capture"):
271
  logger.info("音频捕获服务已在运行")
272
  else:
273
  # 创建audio_capture服务定义
274
+ audio_capture_def = get_audio_capture_service_definition(enable_echo_cancellation)
275
 
276
  # 启动audio_capture服务
277
  success = service_manager.start_service(audio_capture_def)
src/voice_dialogue/api/schemas/system_schemas.py CHANGED
@@ -15,6 +15,11 @@ class SystemStatusResponse(BaseModel):
15
  services_details: Optional[Dict[str, Any]] = Field(None, description="服务详细状态信息")
16
 
17
 
 
 
 
 
 
18
  class SystemResponse(BaseModel):
19
  """系统操作响应"""
20
  success: bool = Field(..., description="操作是否成功")
 
15
  services_details: Optional[Dict[str, Any]] = Field(None, description="服务详细状态信息")
16
 
17
 
18
+ class SystemStartRequest(BaseModel):
19
+ """系统启动请求"""
20
+ enable_echo_cancellation: bool = Field(default=True, description="是否启用回声消除")
21
+
22
+
23
  class SystemResponse(BaseModel):
24
  """系统操作响应"""
25
  success: bool = Field(..., description="操作是否成功")