dvalle08 commited on
Commit
5e32359
·
1 Parent(s): 0bfc688

Refactor .gitignore and enhance Pocket TTS and Moonshine STT: Simplify .gitignore by removing unnecessary entries and adding environment-specific files. Update Pocket TTS to use a configurable sample rate and replace UUID with shortuuid for request IDs. Modify Moonshine STT to use NotGivenOr for language parameter and update stream class inheritance for better clarity.

Browse files
.gitignore CHANGED
@@ -1,191 +1,32 @@
1
- # Created by https://www.toptal.com/developers/gitignore/api/python,pythonvanilla
2
- # Edit at https://www.toptal.com/developers/gitignore?templates=python,pythonvanilla
 
 
3
 
4
- ### Python ###
5
- # Byte-compiled / optimized / DLL files
6
  __pycache__/
7
- *.py[cod]
8
- *$py.class
9
-
10
- # C extensions
11
- *.so
12
-
13
- # Distribution / packaging
14
- .Python
15
- build/
16
- develop-eggs/
17
- dist/
18
- downloads/
19
- eggs/
20
- .eggs/
21
- lib/
22
- lib64/
23
- parts/
24
- sdist/
25
- var/
26
- wheels/
27
- share/python-wheels/
28
  *.egg-info/
29
- .installed.cfg
30
- *.egg
31
- MANIFEST
32
-
33
- # PyInstaller
34
- # Usually these files are written by a python script from a template
35
- # before PyInstaller builds the exe, so as to inject date/other infos into it.
36
- *.manifest
37
- *.spec
38
-
39
- # Installer logs
40
- pip-log.txt
41
- pip-delete-this-directory.txt
42
-
43
- # Translations
44
- *.mo
45
- *.pot
46
-
47
- # Django stuff:
48
- *.log
49
- local_settings.py
50
- db.sqlite3
51
- db.sqlite3-journal
52
-
53
- # Flask stuff:
54
- instance/
55
- .webassets-cache
56
-
57
- # Scrapy stuff:
58
- .scrapy
59
-
60
- # Sphinx documentation
61
- docs/_build/
62
-
63
- # PyBuilder
64
- .pybuilder/
65
- target/
66
-
67
- # Jupyter Notebook
68
- .ipynb_checkpoints
69
-
70
- # IPython
71
- profile_default/
72
- ipython_config.py
73
-
74
- # pyenv
75
- # For a library or package, you might want to ignore these files since the code is
76
- # intended to run in multiple environments; otherwise, check them in:
77
- # .python-version
78
-
79
- # pipenv
80
- # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
81
- # However, in case of collaboration, if having platform-specific dependencies or dependencies
82
- # having no cross-platform support, pipenv may install dependencies that don't work, or not
83
- # install all needed dependencies.
84
- #Pipfile.lock
85
-
86
- # poetry
87
- # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
88
- # This is especially recommended for binary packages to ensure reproducibility, and is more
89
- # commonly ignored for libraries.
90
- # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
91
- #poetry.lock
92
-
93
- # pdm
94
- # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
95
- #pdm.lock
96
- # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
97
- # in version control.
98
- # https://pdm.fming.dev/#use-with-ide
99
- .pdm.toml
100
-
101
- # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
102
- __pypackages__/
103
-
104
- # Celery stuff
105
- celerybeat-schedule
106
- celerybeat.pid
107
 
108
- # SageMath parsed files
109
- *.sage.py
 
 
 
110
 
111
- # Environments
112
- .env
113
- .venv
114
- env/
115
- venv/
116
- ENV/
117
- env.bak/
118
- venv.bak/
119
  dev/
120
- nvidia_services/cache/asr/
121
- nvidia_services/cache/tts/
122
- .claude/
123
- .cursor/
124
  .pytest_cache/
125
- # Spyder project settings
126
- .spyderproject
127
- .spyproject
128
-
129
- # Rope project settings
130
- .ropeproject
131
-
132
- # mkdocs documentation
133
- /site
134
-
135
- # mypy
136
- .mypy_cache/
137
- .dmypy.json
138
- dmypy.json
139
-
140
- # Pyre type checker
141
- .pyre/
142
-
143
- # pytype static type analyzer
144
- .pytype/
145
-
146
- # Cython debug symbols
147
- cython_debug/
148
-
149
- # PyCharm
150
- # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
151
- # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
152
- # and can be added to the global gitignore or merged into this file. For a more nuclear
153
- # option (not recommended) you can uncomment the following to ignore the entire idea folder.
154
- #.idea/
155
-
156
- ### Python Patch ###
157
- # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
158
- poetry.toml
159
-
160
- # ruff
161
- .ruff_cache/
162
-
163
- # LSP config files
164
- pyrightconfig.json
165
-
166
- ### PythonVanilla ###
167
- # Byte-compiled / optimized / DLL files
168
-
169
- # C extensions
170
-
171
- # Distribution / packaging
172
-
173
- # Installer logs
174
-
175
- # Translations
176
-
177
- # pyenv
178
- # For a library or package, you might want to ignore these files since the code is
179
- # intended to run in multiple environments; otherwise, check them in:
180
- # .python-version
181
-
182
- # pipenv
183
- # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
184
- # However, in case of collaboration, if having platform-specific dependencies or dependencies
185
- # having no cross-platform support, pipenv may install dependencies that don't work, or not
186
- # install all needed dependencies.
187
-
188
- # PEP 582; used by e.g. github.com/David-OConnor/pyflow
189
 
 
 
 
 
 
 
190
 
191
- # End of https://www.toptal.com/developers/gitignore/api/python,pythonvanillanvidia_services/cache/
 
 
1
+ # Environment
2
+ .env
3
+ .venv/
4
+ .streamlit/
5
 
6
+ # Python
 
7
  __pycache__/
8
+ *.pyc
9
+ *.pyo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  *.egg-info/
11
+ dist/
12
+ build/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ # IDE
15
+ .cursor/
16
+ .cursorignore
17
+ .claude/
18
+ CLAUDE.md
19
 
20
+ # Dev files
 
 
 
 
 
 
 
21
  dev/
 
 
 
 
22
  .pytest_cache/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # Model weights (never commit)
25
+ *.onnx
26
+ *.pt
27
+ *.bin
28
+ *.safetensors
29
+ model_cache/
30
 
31
+ # OS
32
+ .DS_Store
src/agent/agent.py CHANGED
@@ -91,6 +91,7 @@ async def session_handler(ctx: agents.JobContext) -> None:
91
  voice=settings.voice.POCKET_TTS_VOICE,
92
  temperature=settings.voice.POCKET_TTS_TEMPERATURE,
93
  lsd_decode_steps=settings.voice.POCKET_TTS_LSD_DECODE_STEPS,
 
94
  metrics_callback=tts_metrics_callback,
95
  )
96
 
 
91
  voice=settings.voice.POCKET_TTS_VOICE,
92
  temperature=settings.voice.POCKET_TTS_TEMPERATURE,
93
  lsd_decode_steps=settings.voice.POCKET_TTS_LSD_DECODE_STEPS,
94
+ sample_rate=settings.voice.SAMPLE_RATE_OUTPUT,
95
  metrics_callback=tts_metrics_callback,
96
  )
97
 
src/plugins/moonshine_stt/stt.py CHANGED
@@ -10,8 +10,8 @@ import torch
10
  from transformers import AutoProcessor, MoonshineStreamingForConditionalGeneration
11
  from livekit import rtc
12
  from livekit.agents import stt
13
- from livekit.agents.types import APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS
14
- from livekit.agents.utils import AudioBuffer
15
 
16
 
17
  @dataclass
@@ -49,7 +49,7 @@ class MoonshineSTT(stt.STT):
49
  self,
50
  buffer: AudioBuffer,
51
  *,
52
- language: str | None = None,
53
  conn_options: APIConnectOptions,
54
  ) -> stt.SpeechEvent:
55
  config = self._sanitize_options(language=language)
@@ -85,7 +85,7 @@ class MoonshineSTT(stt.STT):
85
  *,
86
  language: str | None = None,
87
  conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
88
- ) -> stt.SpeechStream:
89
  config = self._sanitize_options(language=language)
90
  return MoonshineSTTStream(
91
  stt=self,
@@ -98,7 +98,7 @@ class MoonshineSTT(stt.STT):
98
  )
99
 
100
 
101
- class MoonshineSTTStream(stt.SpeechStream):
102
  def __init__(
103
  self,
104
  *,
@@ -148,8 +148,7 @@ class MoonshineSTTStream(stt.SpeechStream):
148
 
149
  async def _finalize_segment(self) -> None:
150
  # Generate a unique request ID for this segment
151
- import uuid
152
- request_id = str(uuid.uuid4())
153
 
154
  if len(self._buffer) == 0:
155
  # Don't emit metrics for empty segments - just return
 
10
  from transformers import AutoProcessor, MoonshineStreamingForConditionalGeneration
11
  from livekit import rtc
12
  from livekit.agents import stt
13
+ from livekit.agents.types import APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS, NOT_GIVEN, NotGivenOr
14
+ from livekit.agents.utils import AudioBuffer, shortuuid
15
 
16
 
17
  @dataclass
 
49
  self,
50
  buffer: AudioBuffer,
51
  *,
52
+ language: NotGivenOr[str] = NOT_GIVEN,
53
  conn_options: APIConnectOptions,
54
  ) -> stt.SpeechEvent:
55
  config = self._sanitize_options(language=language)
 
85
  *,
86
  language: str | None = None,
87
  conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
88
+ ) -> stt.RecognizeStream:
89
  config = self._sanitize_options(language=language)
90
  return MoonshineSTTStream(
91
  stt=self,
 
98
  )
99
 
100
 
101
+ class MoonshineSTTStream(stt.RecognizeStream):
102
  def __init__(
103
  self,
104
  *,
 
148
 
149
  async def _finalize_segment(self) -> None:
150
  # Generate a unique request ID for this segment
151
+ request_id = shortuuid("STT_")
 
152
 
153
  if len(self._buffer) == 0:
154
  # Don't emit metrics for empty segments - just return
src/plugins/pocket_tts/tts.py CHANGED
@@ -4,7 +4,6 @@ from __future__ import annotations
4
  import asyncio
5
  import logging
6
  import time
7
- import uuid
8
  from typing import Callable
9
 
10
  import numpy as np
@@ -14,9 +13,9 @@ from scipy import signal
14
 
15
  from livekit.agents import tts
16
  from livekit.agents.types import APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS
 
17
 
18
- from src.core.logger import logger
19
- from src.core.settings import settings
20
 
21
  # Reduce verbosity of pocket_tts library to avoid console spam
22
  logging.getLogger("pocket_tts").setLevel(logging.WARNING)
@@ -34,6 +33,7 @@ class PocketTTS(tts.TTS):
34
  voice: str = "alba",
35
  temperature: float = 0.7,
36
  lsd_decode_steps: int = 1,
 
37
  metrics_callback: OptionalTTSMetricsCallback = None,
38
  ) -> None:
39
  """Initialize Pocket TTS plugin.
@@ -43,9 +43,10 @@ class PocketTTS(tts.TTS):
43
  or path to audio file for custom voice
44
  temperature: Sampling temperature (0.0-2.0)
45
  lsd_decode_steps: LSD decoding steps (higher = better quality, slower)
 
46
  """
47
  # Use the configured output sample rate (default 48000 Hz)
48
- self._output_sample_rate = settings.voice.SAMPLE_RATE_OUTPUT
49
  self._native_sample_rate = 24000 # Pocket TTS native rate
50
 
51
  super().__init__(
@@ -149,7 +150,7 @@ class PocketSynthesizeStream(tts.SynthesizeStream):
149
  Args:
150
  output_emitter: Audio emitter for pushing generated audio
151
  """
152
- request_id = str(uuid.uuid4())
153
 
154
  output_emitter.initialize(
155
  request_id=request_id,
@@ -165,7 +166,7 @@ class PocketSynthesizeStream(tts.SynthesizeStream):
165
  if isinstance(data, self._FlushSentinel):
166
  if text_buffer.strip():
167
  # Create a new segment for each text chunk
168
- segment_id = str(uuid.uuid4())
169
  output_emitter.start_segment(segment_id=segment_id)
170
  await self._synthesize_segment(text_buffer, output_emitter, segment_id)
171
  output_emitter.end_segment()
@@ -177,7 +178,7 @@ class PocketSynthesizeStream(tts.SynthesizeStream):
177
 
178
  # Process any remaining text
179
  if text_buffer.strip():
180
- segment_id = str(uuid.uuid4())
181
  output_emitter.start_segment(segment_id=segment_id)
182
  await self._synthesize_segment(text_buffer, output_emitter, segment_id)
183
  output_emitter.end_segment()
 
4
  import asyncio
5
  import logging
6
  import time
 
7
  from typing import Callable
8
 
9
  import numpy as np
 
13
 
14
  from livekit.agents import tts
15
  from livekit.agents.types import APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS
16
+ from livekit.agents.utils import shortuuid
17
 
18
+ logger = logging.getLogger(__name__)
 
19
 
20
  # Reduce verbosity of pocket_tts library to avoid console spam
21
  logging.getLogger("pocket_tts").setLevel(logging.WARNING)
 
33
  voice: str = "alba",
34
  temperature: float = 0.7,
35
  lsd_decode_steps: int = 1,
36
+ sample_rate: int = 48000,
37
  metrics_callback: OptionalTTSMetricsCallback = None,
38
  ) -> None:
39
  """Initialize Pocket TTS plugin.
 
43
  or path to audio file for custom voice
44
  temperature: Sampling temperature (0.0-2.0)
45
  lsd_decode_steps: LSD decoding steps (higher = better quality, slower)
46
+ sample_rate: Output sample rate in Hz (default 48000)
47
  """
48
  # Use the configured output sample rate (default 48000 Hz)
49
+ self._output_sample_rate = sample_rate
50
  self._native_sample_rate = 24000 # Pocket TTS native rate
51
 
52
  super().__init__(
 
150
  Args:
151
  output_emitter: Audio emitter for pushing generated audio
152
  """
153
+ request_id = shortuuid("TTS_")
154
 
155
  output_emitter.initialize(
156
  request_id=request_id,
 
166
  if isinstance(data, self._FlushSentinel):
167
  if text_buffer.strip():
168
  # Create a new segment for each text chunk
169
+ segment_id = shortuuid("SEG_")
170
  output_emitter.start_segment(segment_id=segment_id)
171
  await self._synthesize_segment(text_buffer, output_emitter, segment_id)
172
  output_emitter.end_segment()
 
178
 
179
  # Process any remaining text
180
  if text_buffer.strip():
181
+ segment_id = shortuuid("SEG_")
182
  output_emitter.start_segment(segment_id=segment_id)
183
  await self._synthesize_segment(text_buffer, output_emitter, segment_id)
184
  output_emitter.end_segment()