liumaolin commited on
Commit
919ff3f
·
1 Parent(s): 619c761

Refactor directories across services: rename `audio_generator` to `generators`, `asr` to `recognizers`, and update all import paths for consistency and improved module organization.

Browse files
Files changed (36) hide show
  1. src/VoiceDialogue/api/core/config.py +1 -1
  2. src/VoiceDialogue/api/core/lifespan.py +1 -1
  3. src/VoiceDialogue/api/core/service_factories.py +2 -2
  4. src/VoiceDialogue/api/dependencies/model_deps.py +1 -1
  5. src/VoiceDialogue/api/routes/asr_routes.py +1 -1
  6. src/VoiceDialogue/api/routes/tts_routes.py +1 -1
  7. src/VoiceDialogue/main.py +7 -7
  8. src/VoiceDialogue/services/audio/__init__.py +3 -3
  9. src/VoiceDialogue/services/audio/{aec_audio_capture.py → capture.py} +0 -0
  10. src/VoiceDialogue/services/audio/{audio_answer.py → generator.py} +1 -1
  11. src/VoiceDialogue/services/audio/{audio_generator → generators}/__init__.py +0 -0
  12. src/VoiceDialogue/services/audio/{audio_generator → generators}/configs/__init__.py +0 -0
  13. src/VoiceDialogue/services/audio/{audio_generator → generators}/configs/kokoro.py +0 -0
  14. src/VoiceDialogue/services/audio/{audio_generator → generators}/configs/moyoyo.py +1 -1
  15. src/VoiceDialogue/services/audio/{audio_generator → generators}/manager.py +0 -0
  16. src/VoiceDialogue/services/audio/{audio_generator → generators}/models/__init__.py +0 -0
  17. src/VoiceDialogue/services/audio/{audio_generator → generators}/models/base.py +0 -0
  18. src/VoiceDialogue/services/audio/{audio_generator → generators}/models/kokoro.py +0 -0
  19. src/VoiceDialogue/services/audio/{audio_generator → generators}/models/moyoyo.py +0 -0
  20. src/VoiceDialogue/services/audio/{audio_generator → generators}/runtime/__init__.py +0 -0
  21. src/VoiceDialogue/services/audio/{audio_generator → generators}/runtime/interface.py +0 -0
  22. src/VoiceDialogue/services/audio/{audio_generator → generators}/runtime/kokoro.py +3 -3
  23. src/VoiceDialogue/services/audio/{audio_generator → generators}/runtime/moyoyo.py +3 -3
  24. src/VoiceDialogue/services/audio/{audio_player.py → player.py} +0 -0
  25. src/VoiceDialogue/services/speech/__init__.py +2 -2
  26. src/VoiceDialogue/services/speech/{speech_monitor.py → monitor.py} +0 -0
  27. src/VoiceDialogue/services/speech/{asr_service.py → recognizer.py} +1 -1
  28. src/VoiceDialogue/services/speech/{asr → recognizers}/__init__.py +0 -0
  29. src/VoiceDialogue/services/speech/{asr → recognizers}/manager.py +0 -0
  30. src/VoiceDialogue/services/speech/{asr → recognizers}/models/__init__.py +0 -0
  31. src/VoiceDialogue/services/speech/{asr → recognizers}/models/base.py +0 -0
  32. src/VoiceDialogue/services/speech/{asr → recognizers}/models/funasr.py +3 -3
  33. src/VoiceDialogue/services/speech/{asr → recognizers}/models/whisper.py +3 -3
  34. src/VoiceDialogue/services/speech/{asr → recognizers}/utils.py +0 -0
  35. src/VoiceDialogue/services/text/{text_generator.py → generator.py} +1 -1
  36. src/VoiceDialogue/services/text/{langchain_llm.py → processor.py} +0 -0
src/VoiceDialogue/api/core/config.py CHANGED
@@ -17,7 +17,7 @@ class TTSConfigInitializer:
17
  }
18
 
19
  try:
20
- from services.audio.audio_generator import tts_config_registry
21
  config_count = len(tts_config_registry.get_all_configs())
22
 
23
  result.update({
 
17
  }
18
 
19
  try:
20
+ from services.audio.generators import tts_config_registry
21
  config_count = len(tts_config_registry.get_all_configs())
22
 
23
  result.update({
src/VoiceDialogue/api/core/lifespan.py CHANGED
@@ -4,7 +4,7 @@ from contextlib import asynccontextmanager
4
 
5
  from fastapi import FastAPI
6
 
7
- from services.audio.audio_generator import tts_config_registry
8
  from utils import get_system_language
9
  from .config import TTSConfigInitializer
10
  from .service_factories import get_core_voice_service_definitions
 
4
 
5
  from fastapi import FastAPI
6
 
7
+ from services.audio.generators import tts_config_registry
8
  from utils import get_system_language
9
  from .config import TTSConfigInitializer
10
  from .service_factories import get_core_voice_service_definitions
src/VoiceDialogue/api/core/service_factories.py CHANGED
@@ -3,9 +3,9 @@ from core.constants import (
3
  audio_frames_queue, user_voice_queue
4
  )
5
  from services.audio import EchoCancellingAudioCapture, TTSAudioGenerator, AudioStreamPlayer
6
- from services.audio.audio_generator import BaseTTSConfig, tts_config_registry
7
  from services.speech import SpeechStateMonitor, ASRWorker
8
- from services.text.text_generator import LLMResponseGenerator
9
  from .service_manager import ServiceDefinition
10
 
11
 
 
3
  audio_frames_queue, user_voice_queue
4
  )
5
  from services.audio import EchoCancellingAudioCapture, TTSAudioGenerator, AudioStreamPlayer
6
+ from services.audio.generators import BaseTTSConfig, tts_config_registry
7
  from services.speech import SpeechStateMonitor, ASRWorker
8
+ from services.text.generator import LLMResponseGenerator
9
  from .service_manager import ServiceDefinition
10
 
11
 
src/VoiceDialogue/api/dependencies/model_deps.py CHANGED
@@ -37,7 +37,7 @@ def get_language_model(model_name: Optional[str] = None):
37
  def get_voice_model(speaker_name: str = "沈逸"):
38
  """获取语音模型依赖"""
39
  try:
40
- from services.audio.audio_generator.voice_model import voice_model_registry
41
 
42
  speaker_mapping = {
43
  '罗翔': 0,
 
37
  def get_voice_model(speaker_name: str = "沈逸"):
38
  """获取语音模型依赖"""
39
  try:
40
+ from services.audio.generators.voice_model import voice_model_registry
41
 
42
  speaker_mapping = {
43
  '罗翔': 0,
src/VoiceDialogue/api/routes/asr_routes.py CHANGED
@@ -2,7 +2,7 @@ import logging
2
 
3
  from fastapi import APIRouter, HTTPException, Request, BackgroundTasks
4
 
5
- from services.speech.asr import asr_manager
6
  from ..core.service_factories import get_asr_worker_service_definition
7
  from ..schemas.asr_schemas import (
8
  SupportedLanguagesResponse, ASRInstanceRequest, ASRInstanceResponse
 
2
 
3
  from fastapi import APIRouter, HTTPException, Request, BackgroundTasks
4
 
5
+ from services.speech.recognizers import asr_manager
6
  from ..core.service_factories import get_asr_worker_service_definition
7
  from ..schemas.asr_schemas import (
8
  SupportedLanguagesResponse, ASRInstanceRequest, ASRInstanceResponse
src/VoiceDialogue/api/routes/tts_routes.py CHANGED
@@ -3,7 +3,7 @@ from typing import Optional
3
 
4
  from fastapi import APIRouter, HTTPException, BackgroundTasks, Request
5
 
6
- from services.audio.audio_generator import tts_config_registry
7
  from ..core.service_factories import get_tts_audio_generator_service_definition
8
  from ..schemas.tts_schemas import (
9
  TTSModelInfo, TTSModelListResponse, TTSModelLoadRequest,
 
3
 
4
  from fastapi import APIRouter, HTTPException, BackgroundTasks, Request
5
 
6
+ from services.audio.generators import tts_config_registry
7
  from ..core.service_factories import get_tts_audio_generator_service_definition
8
  from ..schemas.tts_schemas import (
9
  TTSModelInfo, TTSModelListResponse, TTSModelLoadRequest,
src/VoiceDialogue/main.py CHANGED
@@ -12,13 +12,13 @@ from core.constants import (
12
  text_input_queue,
13
  audio_output_queue
14
  )
15
- from services.audio.aec_audio_capture import EchoCancellingAudioCapture
16
- from services.audio.audio_answer import TTSAudioGenerator
17
- from services.audio.audio_generator.models import tts_config_registry
18
- from services.audio.audio_player import AudioStreamPlayer
19
- from services.speech.asr_service import ASRWorker
20
- from services.speech.speech_monitor import SpeechStateMonitor
21
- from services.text.text_generator import LLMResponseGenerator
22
 
23
  HERE = Path(__file__).parent
24
  language: typing.Literal['zh', 'en'] = 'en'
 
12
  text_input_queue,
13
  audio_output_queue
14
  )
15
+ from services.audio.capture import EchoCancellingAudioCapture
16
+ from services.audio.generator import TTSAudioGenerator
17
+ from services.audio.generators.models import tts_config_registry
18
+ from services.audio.player import AudioStreamPlayer
19
+ from services.speech.monitor import SpeechStateMonitor
20
+ from services.speech.recognizer import ASRWorker
21
+ from services.text.generator import LLMResponseGenerator
22
 
23
  HERE = Path(__file__).parent
24
  language: typing.Literal['zh', 'en'] = 'en'
src/VoiceDialogue/services/audio/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
- from .aec_audio_capture import EchoCancellingAudioCapture
2
- from .audio_answer import TTSAudioGenerator
3
- from .audio_player import AudioStreamPlayer
4
 
5
  __all__ = (
6
  "EchoCancellingAudioCapture",
 
1
+ from .capture import EchoCancellingAudioCapture
2
+ from .generator import TTSAudioGenerator
3
+ from .player import AudioStreamPlayer
4
 
5
  __all__ = (
6
  "EchoCancellingAudioCapture",
src/VoiceDialogue/services/audio/{aec_audio_capture.py → capture.py} RENAMED
File without changes
src/VoiceDialogue/services/audio/{audio_answer.py → generator.py} RENAMED
@@ -5,7 +5,7 @@ from queue import Empty
5
  from core.base import BaseThread
6
  from core.constants import dropped_audio_cache, user_still_speaking_event, voice_state_manager
7
  from models.voice_task import VoiceTask
8
- from .audio_generator import tts_manager, BaseTTSConfig
9
 
10
 
11
  class TTSAudioGenerator(BaseThread):
 
5
  from core.base import BaseThread
6
  from core.constants import dropped_audio_cache, user_still_speaking_event, voice_state_manager
7
  from models.voice_task import VoiceTask
8
+ from .generators import tts_manager, BaseTTSConfig
9
 
10
 
11
  class TTSAudioGenerator(BaseThread):
src/VoiceDialogue/services/audio/{audio_generator → generators}/__init__.py RENAMED
File without changes
src/VoiceDialogue/services/audio/{audio_generator → generators}/configs/__init__.py RENAMED
File without changes
src/VoiceDialogue/services/audio/{audio_generator → generators}/configs/kokoro.py RENAMED
File without changes
src/VoiceDialogue/services/audio/{audio_generator → generators}/configs/moyoyo.py RENAMED
@@ -1,4 +1,4 @@
1
- from services.audio.audio_generator.models.moyoyo import MoYoYoTTSConfig
2
 
3
  # 基础预训练模型文件映射
4
  BASE_PRETRAINED_FILES = {
 
1
+ from services.audio.generators.models.moyoyo import MoYoYoTTSConfig
2
 
3
  # 基础预训练模型文件映射
4
  BASE_PRETRAINED_FILES = {
src/VoiceDialogue/services/audio/{audio_generator → generators}/manager.py RENAMED
File without changes
src/VoiceDialogue/services/audio/{audio_generator → generators}/models/__init__.py RENAMED
File without changes
src/VoiceDialogue/services/audio/{audio_generator → generators}/models/base.py RENAMED
File without changes
src/VoiceDialogue/services/audio/{audio_generator → generators}/models/kokoro.py RENAMED
File without changes
src/VoiceDialogue/services/audio/{audio_generator → generators}/models/moyoyo.py RENAMED
File without changes
src/VoiceDialogue/services/audio/{audio_generator → generators}/runtime/__init__.py RENAMED
File without changes
src/VoiceDialogue/services/audio/{audio_generator → generators}/runtime/interface.py RENAMED
File without changes
src/VoiceDialogue/services/audio/{audio_generator → generators}/runtime/kokoro.py RENAMED
@@ -3,9 +3,9 @@ from typing import Tuple, Optional
3
  import numpy as np
4
  from kokoro_onnx import Kokoro
5
 
6
- from services.audio.audio_generator.configs.kokoro import KokoroTTSConfig
7
- from services.audio.audio_generator.manager import tts_tables
8
- from services.audio.audio_generator.runtime.interface import TTSInterface
9
 
10
 
11
  @tts_tables.register("tts_classes", "kokoro")
 
3
  import numpy as np
4
  from kokoro_onnx import Kokoro
5
 
6
+ from services.audio.generators.configs.kokoro import KokoroTTSConfig
7
+ from services.audio.generators.manager import tts_tables
8
+ from services.audio.generators.runtime.interface import TTSInterface
9
 
10
 
11
  @tts_tables.register("tts_classes", "kokoro")
src/VoiceDialogue/services/audio/{audio_generator → generators}/runtime/moyoyo.py RENAMED
@@ -4,9 +4,9 @@ from typing import Tuple
4
  import numpy as np
5
 
6
  from config.paths import load_third_party
7
- from services.audio.audio_generator.manager import tts_tables
8
- from services.audio.audio_generator.models.moyoyo import MoYoYoTTSConfig
9
- from services.audio.audio_generator.runtime.interface import TTSInterface
10
 
11
  load_third_party()
12
 
 
4
  import numpy as np
5
 
6
  from config.paths import load_third_party
7
+ from services.audio.generators.manager import tts_tables
8
+ from services.audio.generators.models.moyoyo import MoYoYoTTSConfig
9
+ from services.audio.generators.runtime.interface import TTSInterface
10
 
11
  load_third_party()
12
 
src/VoiceDialogue/services/audio/{audio_player.py → player.py} RENAMED
File without changes
src/VoiceDialogue/services/speech/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- from .asr_service import ASRWorker
2
- from .speech_monitor import SpeechStateMonitor
3
 
4
  __all__ = ['ASRWorker', 'SpeechStateMonitor']
 
1
+ from .recognizer import ASRWorker
2
+ from .monitor import SpeechStateMonitor
3
 
4
  __all__ = ['ASRWorker', 'SpeechStateMonitor']
src/VoiceDialogue/services/speech/{speech_monitor.py → monitor.py} RENAMED
File without changes
src/VoiceDialogue/services/speech/{asr_service.py → recognizer.py} RENAMED
@@ -8,7 +8,7 @@ from core.base import BaseThread
8
  from core.constants import user_still_speaking_event, voice_state_manager, dropped_audio_cache
9
  from models.voice_task import VoiceTask
10
  from utils.cache import LRUCacheDict
11
- from .asr import asr_manager
12
 
13
 
14
  class ASRWorker(BaseThread):
 
8
  from core.constants import user_still_speaking_event, voice_state_manager, dropped_audio_cache
9
  from models.voice_task import VoiceTask
10
  from utils.cache import LRUCacheDict
11
+ from .recognizers import asr_manager
12
 
13
 
14
  class ASRWorker(BaseThread):
src/VoiceDialogue/services/speech/{asr → recognizers}/__init__.py RENAMED
File without changes
src/VoiceDialogue/services/speech/{asr → recognizers}/manager.py RENAMED
File without changes
src/VoiceDialogue/services/speech/{asr → recognizers}/models/__init__.py RENAMED
File without changes
src/VoiceDialogue/services/speech/{asr → recognizers}/models/base.py RENAMED
File without changes
src/VoiceDialogue/services/speech/{asr → recognizers}/models/funasr.py RENAMED
@@ -5,9 +5,9 @@ import numpy as np
5
  from funasr_onnx import SeacoParaformer, CT_Transformer
6
 
7
  from config import paths
8
- from services.speech.asr.manager import asr_tables
9
- from services.speech.asr.models.base import ASRInterface
10
- from services.speech.asr.utils import ensure_minimum_audio_duration
11
 
12
 
13
  @asr_tables.register('asr_classes', 'funasr')
 
5
  from funasr_onnx import SeacoParaformer, CT_Transformer
6
 
7
  from config import paths
8
+ from services.speech.recognizers.manager import asr_tables
9
+ from services.speech.recognizers.models.base import ASRInterface
10
+ from services.speech.recognizers.utils import ensure_minimum_audio_duration
11
 
12
 
13
  @asr_tables.register('asr_classes', 'funasr')
src/VoiceDialogue/services/speech/{asr → recognizers}/models/whisper.py RENAMED
@@ -4,9 +4,9 @@ import numpy as np
4
  from pywhispercpp.model import Model
5
 
6
  from config import paths
7
- from services.speech.asr.manager import asr_tables
8
- from services.speech.asr.models.base import ASRInterface
9
- from services.speech.asr.utils import ensure_minimum_audio_duration
10
 
11
 
12
  @asr_tables.register('asr_classes', 'whisper')
 
4
  from pywhispercpp.model import Model
5
 
6
  from config import paths
7
+ from services.speech.recognizers.manager import asr_tables
8
+ from services.speech.recognizers.models.base import ASRInterface
9
+ from services.speech.recognizers.utils import ensure_minimum_audio_duration
10
 
11
 
12
  @asr_tables.register('asr_classes', 'whisper')
src/VoiceDialogue/services/speech/{asr → recognizers}/utils.py RENAMED
File without changes
src/VoiceDialogue/services/text/{text_generator.py → generator.py} RENAMED
@@ -9,7 +9,7 @@ from config import paths
9
  from core.base import BaseThread
10
  from core.constants import chat_history_cache
11
  from models.voice_task import VoiceTask
12
- from services.text.langchain_llm import preprocess_sentence_text, \
13
  create_langchain_chat_llamacpp_instance, create_langchain_pipeline, warmup_langchain_pipeline
14
 
15
  CHINESE_SYSTEM_PROMPT = ("你是善于模拟真实的思考过程的AI助手。"
 
9
  from core.base import BaseThread
10
  from core.constants import chat_history_cache
11
  from models.voice_task import VoiceTask
12
+ from services.text.processor import preprocess_sentence_text, \
13
  create_langchain_chat_llamacpp_instance, create_langchain_pipeline, warmup_langchain_pipeline
14
 
15
  CHINESE_SYSTEM_PROMPT = ("你是善于模拟真实的思考过程的AI助手。"
src/VoiceDialogue/services/text/{langchain_llm.py → processor.py} RENAMED
File without changes