flrtemis commited on
Commit
d0f0efe
·
verified ·
1 Parent(s): 56e7e0e

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ audio/bria.mp3 filter=lfs diff=lfs merge=lfs -text
37
+ audio/sample_fr_hibiki_crepes.mp3 filter=lfs diff=lfs merge=lfs -text
.github/ISSUE_TEMPLATE/bug.yml ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bug Report
2
+ description: You found a bug.
3
+ labels: ["bug", "triage"]
4
+ body:
5
+ - type: markdown
6
+ attributes:
7
+ value: |
8
+ Please first check the [FAQ](https://github.com/kyutai-labs/delayed-streams-modeling/blob/main/FAQ.md).
9
+ - type: dropdown
10
+ id: backend
11
+ attributes:
12
+ label: Backend impacted
13
+ description: Which backend is concerned with your bug report?
14
+ options:
15
+ - The PyTorch implementation
16
+ - The MLX implementation
17
+ - The Rust implementation
18
+ - Other / All
19
+ default: 0
20
+ validations:
21
+ required: true
22
+ - type: dropdown
23
+ id: os
24
+ attributes:
25
+ label: Operating system
26
+ description: What is your operating system?
27
+ options:
28
+ - Linux
29
+ - Mac OS X
30
+ - Windows (unsupported)
31
+ default: 0
32
+ validations:
33
+ required: true
34
+ - type: dropdown
35
+ id: hardware
36
+ attributes:
37
+ label: Hardware
38
+ description: What hardware are you using?
39
+ options:
40
+ - CPU
41
+ - GPU with CUDA
42
+ - Metal with MLX
43
+ default: 0
44
+ validations:
45
+ required: true
46
+ - type: textarea
47
+ id: description
48
+ attributes:
49
+ label: Description
50
+ description: Provide a detailed description of your bug.
51
+ placeholder:
52
+ value:
53
+ validations:
54
+ required: true
55
+ - type: textarea
56
+ id: more_info
57
+ attributes:
58
+ label: Extra information
59
+ description: Please provide any other relevant information, such as log extracts, code etc.
60
+ placeholder:
61
+ value:
62
+ validations:
63
+ required: true
64
+ - type: textarea
65
+ id: env
66
+ attributes:
67
+ label: Environment
68
+ description: Please provide any other relevant information, such as log extracts, code etc.
69
+ placeholder:
70
+ value: |
71
+ Fill in the following information on your system.
72
+ - Operating system version:
73
+
74
+ If the backend impacted is PyTorch:
75
+ - Python version:
76
+ - PyTorch version:
77
+ - CUDA version (run `python -c 'import torch; print(torch.version.cuda)'`):
78
+ - GPU model and memory:
79
+
80
+ If the backend is MLX:
81
+ - Mac model:
82
+ validations:
83
+ required: true
.github/ISSUE_TEMPLATE/question.yml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Question
2
+ description: You have a question about the codebase, the paper, or the implementation.
3
+ labels: ["question", "triage"]
4
+ body:
5
+ - type: markdown
6
+ attributes:
7
+ value: |
8
+ Please first check the [FAQ](https://github.com/kyutai-labs/delayed-streams-modeling/blob/main/FAQ.md).
9
+ - type: checkboxes
10
+ id: terms
11
+ attributes:
12
+ label: Due diligence
13
+ description: Have you searched the existing issues / FAQ / Google / asked ChatGPT?
14
+ options:
15
+ - label: I have done my due diligence in trying to find the answer myself.
16
+ required: true
17
+
18
+ - type: dropdown
19
+ id: backend
20
+ attributes:
21
+ label: Topic
22
+ description: What is your question about?
23
+ options:
24
+ - The paper
25
+ - The PyTorch implementation
26
+ - The MLX implementation
27
+ - The Rust implementation
28
+ - Other / All
29
+ default: 0
30
+ validations:
31
+ required: true
32
+ - type: textarea
33
+ id: question
34
+ attributes:
35
+ label: Question
36
+ description: What is your question?
37
+ placeholder: Your question. Please make sure this is directly related to our codebase. We will not provide support for installing PyTorch, CUDA, Rust etc.
38
+ value:
39
+ validations:
40
+ required: true
.github/PULL_REQUEST_TEMPLATE.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ## Checklist
2
+
3
+ - [ ] Read CONTRIBUTING.md, and accept the CLA by including the provided snippet. We will not accept PR without this.
4
+ - [ ] Run pre-commit hook.
5
+ - [ ] If you changed Rust code, run `cargo check`, `cargo clippy`, `cargo test`.
6
+
7
+ ## PR Description
8
+
9
+ <!-- Description for the PR -->
.github/actions/moshi_build/action.yml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: moshi_build
2
+ description: 'Build env.'
3
+ runs:
4
+ using: "composite"
5
+ steps:
6
+ - uses: actions/setup-python@v2
7
+ with:
8
+ python-version: '3.10.14'
9
+ - uses: actions/cache@v3
10
+ id: cache
11
+ with:
12
+ path: env
13
+ key: env-${{ hashFiles('moshi/pyproject.toml') }}
14
+ - name: Install dependencies
15
+ if: steps.cache.outputs.cache-hit != 'true'
16
+ shell: bash
17
+ run: |
18
+ python3 -m venv env
19
+ . env/bin/activate
20
+ python -m pip install --upgrade pip
21
+ pip install torch==2.4.0 --index-url https://download.pytorch.org/whl/cpu
22
+ pip install moshi==0.2.7
23
+ pip install pre-commit
24
+ - name: Setup env
25
+ shell: bash
26
+ run: |
27
+ source env/bin/activate
28
+ pre-commit install
29
+ - name: Install uv
30
+ uses: astral-sh/setup-uv@v6
31
+ with:
32
+ version: "0.8.13"
.github/workflows/precommit.yml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: precommit
2
+ on:
3
+ push:
4
+ branches: [ main ]
5
+ pull_request:
6
+ branches: [ main ]
7
+
8
+ jobs:
9
+ run_precommit:
10
+ name: Run precommit
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v2
14
+ - uses: ./.github/actions/moshi_build
15
+ - run: |
16
+ source env/bin/activate
17
+ pre-commit run --all-files
.gitignore ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # Abstra
171
+ # Abstra is an AI-powered process automation framework.
172
+ # Ignore directories containing user credentials, local state, and settings.
173
+ # Learn more at https://abstra.io/docs
174
+ .abstra/
175
+
176
+ # Visual Studio Code
177
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
178
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
179
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
180
+ # you could uncomment the following to ignore the enitre vscode folder
181
+ # .vscode/
182
+
183
+ # Ruff stuff:
184
+ .ruff_cache/
185
+
186
+ # PyPI configuration file
187
+ .pypirc
188
+
189
+ # Cursor
190
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
191
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
192
+ # refer to https://docs.cursor.com/context/ignore-files
193
+ .cursorignore
194
+ .cursorindexingignore
195
+ out*.wav
.pre-commit-config.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: local
3
+ hooks:
4
+ - id: ruff
5
+ name: ruff
6
+ language: system
7
+ entry: bash -c 'uvx ruff check'
8
+ pass_filenames: false
9
+ always_run: true
10
+ - id: ruff-format
11
+ name: ruff format
12
+ language: system
13
+ entry: bash -c 'uvx ruff format --check'
14
+ pass_filenames: false
15
+ always_run: true
16
+ # Get rid of Jupyter Notebook output because we don't want to keep it in Git
17
+ - repo: https://github.com/kynan/nbstripout
18
+ rev: 0.8.1
19
+ hooks:
20
+ - id: nbstripout
21
+ - repo: https://github.com/pre-commit/pre-commit-hooks
22
+ rev: v5.0.0
23
+ hooks:
24
+ - id: check-added-large-files
25
+ args: ["--maxkb=2048"]
.vscode/settings.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "python.analysis.typeCheckingMode": "standard"
3
+ }
CONTRIBUTING.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contributing to Delayed-Streams-Modeling
2
+
3
+ ## Pull Requests
4
+
5
+ Delayed-Streams-Modeling is the implementation of a research paper.
6
+ Therefore, we do not plan on accepting many pull requests for new features.
7
+ However, we certainly welcome them for bug fixes.
8
+
9
+ 1. Fork the repo and create your branch from `main`.
10
+ 2. If you have changed APIs, update the documentation accordingly.
11
+ 3. Ensure pre-commit hooks pass properly, in particular the linting and typing.
12
+ 4. When changing the Rust code, run `cargo check`, `cargo clippy`, `cargo test`.
13
+ 5. Accept the Contributor License Agreement (see after).
14
+
15
+ Note that in general, we will not accept refactoring of the code.
16
+
17
+
18
+ ## Contributor License Agreement ("CLA")
19
+
20
+ In order to accept your pull request, we need you to submit a Contributor License Agreement.
21
+
22
+ If you agree with the full CLA provided in the next paragraph, copy the following statement in your PR, changing your Github Handle:
23
+
24
+ > I, {your GitHub handle}, confirm that I have read and understood the terms of the CLA of Kyutai-labs, as outlined in the repository's CONTRIBUTING.md, and I agree to be bound by these terms.
25
+
26
+ The full CLA is provided as follows:
27
+
28
+ > I, {your GitHub handle}, hereby grant to Kyutai-labs a perpetual, worldwide, non-exclusive, royalty-free,
29
+ > irrevocable license to use, modify, distribute, and sublicense my Contributions.
30
+
31
+ > I understand and accept that Contributions are limited to modifications, improvements, or changes
32
+ > to the project’s source code submitted via pull requests. I accept that Kyutai-labs has full discretion to
33
+ > review, accept, reject, or request changes to any Contributions I submit, and that submitting
34
+ > a pull request does not guarantee its inclusion in the project.
35
+
36
+ > By submitting a Contribution, I grant Kyutai-labs a perpetual, worldwide license to use, modify,
37
+ > reproduce, distribute, and create derivative works based on my Contributions.
38
+ > I also agree to assign all patent rights for any inventions or improvements that arise from my Contributions,
39
+ > giving the Kyutai-labs full rights to file for and enforce patents.
40
+ > I understand that the Kyutai-labs may commercialize, relicense, or exploit the project and my Contributions without further notice or obligation to me.
41
+ > I confirm that my Contributions are original and that I have the legal right to grant this license.
42
+ > If my Contributions include third-party materials, I will ensure that I have the necessary permissions
43
+ > and will disclose this information. I accept that once my Contributions are integrated, they may be altered or removed at the Kyutai-labs’s discretion.
44
+
45
+ > I acknowledge that I am making these Contributions voluntarily and will not receive any compensation.
46
+ > Furthermore, I understand that all Contributions, including mine, are provided on an "as-is" basis, with no warranties.
47
+ > By submitting a pull request, I agree to be bound by these terms.
48
+
49
+ ## Issues
50
+
51
+ Please submit issues on our Github repository.
52
+
53
+ ## License
54
+
55
+ By contributing to Delayed-Streams-Modeling, you agree that your contributions
56
+ will be licensed under the LICENSE-* files in the root directory of this source
57
+ tree. In particular, the rust code is licensed under APACHE, and the python code
58
+ under MIT.
FAQ.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FAQ
2
+
3
+ Here is the answer to a number of frequently asked questions.
4
+
5
+ ### Torch compilation issues
6
+
7
+ With some PyTorch/triton versions, one might encounter compilation errors
8
+ like the following:
9
+ ```
10
+ Traceback (most recent call last):
11
+ ...
12
+ File "site-packages/torch/_inductor/runtime/triton_heuristics.py", line 1153, in make_launcher
13
+ "launch_enter_hook": binary.__class__.launch_enter_hook,
14
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
15
+ torch._inductor.exc.InductorError: AttributeError: type object 'CompiledKernel' has no attribute 'launch_enter_hook'
16
+ ```
17
+
18
+ If that's the case, you can disable torch compilation by setting the following
19
+ environment variable.
20
+ ```bash
21
+ export NO_TORCH_COMPILE=1
22
+ ```
23
+
24
+ ### Issues installing the sentencepiece dependency
25
+
26
+ On some linux distributions (arch) or on macos, the local version of cmake can
27
+ be too recent for the sentencepiece dependency.
28
+
29
+ ```
30
+ CMake Error at CMakeLists.txt:15 (cmake_minimum_required):
31
+ Compatibility with CMake < 3.5 has been removed from CMake.
32
+ ```
33
+
34
+ You can either downgrade your cmake version, e.g. 3.31.0 on arch works or try
35
+ setting `CMAKE_POLICY_VERSION_MINIMUM=3.5`.
36
+
37
+ If you run into some errors when compiling the sentencepiece rust bindings,
38
+ these could also be due to gcc being too recent, e.g. gcc 15. You can get
39
+ around this by using gcc-13, e.g. by setting the following after installing
40
+ the proper gcc packages.
41
+ ```bash
42
+ export CMAKE_C_COMPILER=/usr/bin/gcc-13
43
+ export CMAKE_CXX_COMPILER=/usr/bin/g++-13
44
+ CC=gcc-13 CXX=g++-13 cargo build --release
45
+ ```
46
+
47
+ Alternatively you can set `CXXFLAGS="-include cstdint"`, see this
48
+ [issue](https://github.com/google/sentencepiece/issues/1108).
49
+
50
+ ### Will you release training code?
51
+
52
+ Some finetuning code can be found in the [kyutai-labs/moshi-finetune repo](https://github.com/kyutai-labs/moshi-finetune).
53
+ This code has not been adapted to the Speech-To-Text and Text-To-Speech models
54
+ yet, but it should be a good starting point.
55
+
56
+
LICENSE-APACHE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
LICENSE-MIT ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Permission is hereby granted, free of charge, to any
2
+ person obtaining a copy of this software and associated
3
+ documentation files (the "Software"), to deal in the
4
+ Software without restriction, including without
5
+ limitation the rights to use, copy, modify, merge,
6
+ publish, distribute, sublicense, and/or sell copies of
7
+ the Software, and to permit persons to whom the Software
8
+ is furnished to do so, subject to the following
9
+ conditions:
10
+
11
+ The above copyright notice and this permission notice
12
+ shall be included in all copies or substantial portions
13
+ of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
16
+ ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
17
+ TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
18
+ PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
19
+ SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
22
+ IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23
+ DEALINGS IN THE SOFTWARE.
README.md CHANGED
@@ -1,10 +1,345 @@
1
- ---
2
- title: Https Huggingface Co Spaces Ftrtemis Moshi
3
- emoji: 🔥
4
- colorFrom: indigo
5
- colorTo: green
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Delayed Streams Modeling: Kyutai STT & TTS
2
+
3
+ This repo contains instructions and examples of how to run
4
+ [Kyutai Speech-To-Text](#kyutai-speech-to-text)
5
+ and [Kyutai Text-To-Speech](#kyutai-text-to-speech) models.
6
+ See also [Unmute](https://github.com/kyutai-labs/unmute), a voice AI system built using Kyutai STT and Kyutai TTS.
7
+
8
+ But wait, what is "Delayed Streams Modeling"? It is a technique for solving many streaming X-to-Y tasks (with X, Y in `{speech, text}`)
9
+ that formalize the approach we had with Moshi and Hibiki. See our [pre-print about DSM](https://arxiv.org/abs/2509.08753).
10
+
11
+ ## Kyutai Speech-To-Text
12
+
13
+ <a href="https://huggingface.co/collections/kyutai/speech-to-text-685403682cf8a23ab9466886" target="_blank" style="margin: 2px;">
14
+ <img alt="Hugging Face" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-KyutaiSTT-blue" style="display: inline-block; vertical-align: middle;"/>
15
+ </a>
16
+ <a target="_blank" href="https://colab.research.google.com/github/kyutai-labs/delayed-streams-modeling/blob/main/stt_pytorch.ipynb">
17
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
18
+ </a>
19
+
20
+ **More details can be found on the [project page](https://kyutai.org/next/stt).**
21
+
22
+ Kyutai STT models are optimized for real-time usage, can be batched for efficiency, and return word level timestamps.
23
+ We provide two models:
24
+ - `kyutai/stt-1b-en_fr`, an English and French model with ~1B parameters, a 0.5 second delay, and a [semantic VAD](https://kyutai.org/next/stt#semantic-vad).
25
+ - `kyutai/stt-2.6b-en`, an English-only model with ~2.6B parameters and a 2.5 second delay.
26
+
27
+ These speech-to-text models have several advantages:
28
+ - Streaming inference: the models can process audio in chunks, which allows
29
+ for real-time transcription, and is great for interactive applications.
30
+ - Easy batching for maximum efficiency: a H100 can process 400 streams in
31
+ real-time.
32
+ - They return word-level timestamps.
33
+ - The 1B model has a semantic Voice Activity Detection (VAD) component that
34
+ can be used to detect when the user is speaking. This is especially useful
35
+ for building voice agents.
36
+
37
+ ### Implementations overview
38
+
39
+ We provide different implementations of Kyutai STT for different use cases.
40
+ Here is how to choose which one to use:
41
+
42
+ - **PyTorch: for research and tinkering.**
43
+ If you want to call the model from Python for research or experimentation, use our PyTorch implementation.
44
+ - **Rust: for production.**
45
+ If you want to serve Kyutai STT in a production setting, use our Rust server.
46
+ Our robust Rust server provides streaming access to the model over websockets.
47
+ We use this server to run [Unmute](https://unmute.sh/); on a L40S GPU, we can serve 64 simultaneous connections at a real-time factor of 3x.
48
+ - **MLX: for on-device inference on iPhone and Mac.**
49
+ MLX is Apple's ML framework that allows you to use hardware acceleration on Apple silicon.
50
+ If you want to run the model on a Mac or an iPhone, choose the MLX implementation.
51
+
52
+ <details>
53
+ <summary>PyTorch implementation</summary>
54
+ <a href="https://huggingface.co/kyutai/stt-2.6b-en" target="_blank" style="margin: 2px;">
55
+ <img alt="Hugging Face" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue" style="display: inline-block; vertical-align: middle;"/>
56
+ </a>
57
+ <a target="_blank" href="https://colab.research.google.com/github/kyutai-labs/delayed-streams-modeling/blob/main/stt_pytorch.ipynb">
58
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
59
+ </a>
60
+
61
+ For an example of how to use the model in a way where you can directly stream in PyTorch tensors,
62
+ [see our Colab notebook](https://colab.research.google.com/github/kyutai-labs/delayed-streams-modeling/blob/main/stt_pytorch.ipynb).
63
+
64
+ This requires the [moshi package](https://pypi.org/project/moshi/)
65
+ with version 0.2.6 or later, which can be installed via pip.
66
+
67
+ If you just want to run the model on a file, you can use `moshi.run_inference`.
68
+
69
+ ```bash
70
+ python -m moshi.run_inference --hf-repo kyutai/stt-2.6b-en audio/bria.mp3
71
+ ```
72
+
73
+ If you have [uv](https://docs.astral.sh/uv/) installed, you can skip the installation step
74
+ and just prefix the command above with `uvx --with moshi`.
75
+
76
+ Additionally, we provide two scripts that highlight different usage scenarios. The first script illustrates how to extract word-level timestamps from the model's outputs:
77
+
78
+ ```bash
79
+ uv run \
80
+ scripts/stt_from_file_pytorch.py \
81
+ --hf-repo kyutai/stt-2.6b-en \
82
+ audio/bria.mp3
83
+ ```
84
+
85
+ The second script can be used to run a model on an existing Hugging Face dataset and calculate its performance metrics:
86
+ ```bash
87
+ uv run scripts/evaluate_on_dataset.py \
88
+ --dataset meanwhile \
89
+ --hf-repo kyutai/stt-2.6b-en
90
+ ```
91
+
92
+ Another example shows how one can provide a text-, audio-, or text-audio prompt to our STT model:
93
+ ```bash
94
+ uv run scripts/stt_from_file_pytorch_with_prompt.py \
95
+ --hf-repo kyutai/stt-2.6b-en \
96
+ --file bria.mp3 \
97
+ --prompt_file ./audio/loonah.mp3 \
98
+ --prompt_text "Loonah" \
99
+ --cut-prompt-transcript
100
+ ```
101
+ Produces the transcript of `bria.mp3` using the `Loonah` spelling for the name, instead of the `Luna` used without any prompt:
102
+ ```
103
+ In the heart of an ancient forest, where the trees whispered secrets of the past, there lived a peculiar rabbit named Loonah (...)
104
+ ```
105
+
106
+ Apart from nudging the model for a specific spelling of a word, other potential use-cases include speaker adaptation and steering the model towards a specific formatting style or even a language.
107
+ However, please bear in mind that is an experimental feature and its behavior is very sensitive to the prompt provided.
108
+ </details>
109
+
110
+ <details>
111
+ <summary>Rust server</summary>
112
+
113
+ <a href="https://huggingface.co/kyutai/stt-2.6b-en-candle" target="_blank" style="margin: 2px;">
114
+ <img alt="Hugging Face" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue" style="display: inline-block; vertical-align: middle;"/>
115
+ </a>
116
+
117
+ The Rust implementation provides a server that can process multiple streaming
118
+ queries in parallel. Depending on the amount of memory on your GPU, you may
119
+ have to adjust the batch size from the config file. For a L40S GPU, a batch size
120
+ of 64 works well and requests can be processed at 3x real-time speed.
121
+
122
+ In order to run the server, install the [moshi-server
123
+ crate](https://crates.io/crates/moshi-server) via the following command. The
124
+ server code can be found in the
125
+ [kyutai-labs/moshi](https://github.com/kyutai-labs/moshi/tree/main/rust/moshi-server)
126
+ repository.
127
+ ```bash
128
+ cargo install --features cuda moshi-server
129
+ ```
130
+
131
+ Then the server can be started via the following command using the config file
132
+ from this repository.
133
+ For `kyutai/stt-1b-en_fr`, use `configs/config-stt-en_fr.hf.toml`,
134
+ and for `kyutai/stt-2.6b-en`, use `configs/config-stt-en-hf.toml`,
135
+
136
+ ```bash
137
+ moshi-server worker --config configs/config-stt-en_fr-hf.toml
138
+ ```
139
+
140
+ Once the server has started you can transcribe audio from your microphone with the following script.
141
+ ```bash
142
+ uv run scripts/stt_from_mic_rust_server.py
143
+ ```
144
+
145
+ We also provide a script for transcribing from an audio file.
146
+ ```bash
147
+ uv run scripts/stt_from_file_rust_server.py audio/bria.mp3
148
+ ```
149
+
150
+ The script limits the decoding speed to simulates real-time processing of the audio.
151
+ Faster processing can be triggered by setting
152
+ the real-time factor, e.g. `--rtf 1000` will process
153
+ the data as fast as possible.
154
+ </details>
155
+
156
+ <details>
157
+ <summary>Rust standalone</summary>
158
+ <a href="https://huggingface.co/kyutai/stt-2.6b-en-candle" target="_blank" style="margin: 2px;">
159
+ <img alt="Hugging Face" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue" style="display: inline-block; vertical-align: middle;"/>
160
+ </a>
161
+
162
+ A standalone Rust example script is provided in the `stt-rs` directory in this repo.
163
+ This can be used as follows:
164
+ ```bash
165
+ cd stt-rs
166
+ cargo run --features cuda -r -- ../audio/bria.mp3
167
+ ```
168
+ You can get the timestamps by adding the `--timestamps` flag, and see the output
169
+ of the semantic VAD by adding the `--vad` flag.
170
+ </details>
171
+
172
+ <details>
173
+ <summary>MLX implementation</summary>
174
+ <a href="https://huggingface.co/kyutai/stt-2.6b-en-mlx" target="_blank" style="margin: 2px;">
175
+ <img alt="Hugging Face" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue" style="display: inline-block; vertical-align: middle;"/>
176
+ </a>
177
+
178
+ [MLX](https://ml-explore.github.io/mlx/build/html/index.html) is Apple's ML framework that allows you to use
179
+ hardware acceleration on Apple silicon.
180
+
181
+ This requires the [moshi-mlx package](https://pypi.org/project/moshi-mlx/)
182
+ with version 0.2.6 or later, which can be installed via pip.
183
+
184
+ If you just want to run the model on a file, you can use `moshi_mlx.run_inference`:
185
+
186
+ ```bash
187
+ python -m moshi_mlx.run_inference --hf-repo kyutai/stt-2.6b-en-mlx audio/bria.mp3 --temp 0
188
+ ```
189
+
190
+ If you have [uv](https://docs.astral.sh/uv/) installed, you can skip the installation step
191
+ and just prefix the command above with `uvx --with moshi-mlx`.
192
+
193
+ If you want to transcribe audio from your microphone, use:
194
+
195
+ ```bash
196
+ python scripts/stt_from_mic_mlx.py
197
+ ```
198
+
199
+ The MLX models can also be used in swift using the [moshi-swift
200
+ codebase](https://github.com/kyutai-labs/moshi-swift), the 1b model has been
201
+ tested to work fine on an iPhone 16 Pro.
202
+ </details>
203
+
204
+ ## Kyutai Text-to-Speech
205
+
206
+ <a href="https://huggingface.co/collections/kyutai/text-to-speech-6866192e7e004ed04fd39e29" target="_blank" style="margin: 2px;">
207
+ <img alt="Hugging Face" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-KyutaiTTS-blue" style="display: inline-block; vertical-align: middle;"/>
208
+ </a>
209
+ <a target="_blank" href="https://colab.research.google.com/github/kyutai-labs/delayed-streams-modeling/blob/main/tts_pytorch.ipynb">
210
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
211
+ </a>
212
+
213
+ **More details can be found on the [project page](https://kyutai.org/next/tts).**
214
+
215
+ We provide different implementations of Kyutai TTS for different use cases. Here is how to choose which one to use:
216
+
217
+ - PyTorch: for research and tinkering. If you want to call the model from Python for research or experimentation, use our PyTorch implementation.
218
+ - Rust: for production. If you want to serve Kyutai TTS in a production setting, use our Rust server. Our robust Rust server provides streaming access to the model over websockets. We use this server to run Unmute.
219
+ - MLX: for on-device inference on iPhone and Mac. MLX is Apple's ML framework that allows you to use hardware acceleration on Apple silicon. If you want to run the model on a Mac or an iPhone, choose the MLX implementation.
220
+
221
+ <details>
222
+ <summary>PyTorch implementation</summary>
223
+
224
+ <a target="_blank" href="https://colab.research.google.com/github/kyutai-labs/delayed-streams-modeling/blob/main/tts_pytorch.ipynb">
225
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
226
+ </a>
227
+
228
+ Check out our [Colab notebook](https://colab.research.google.com/github/kyutai-labs/delayed-streams-modeling/blob/main/tts_pytorch.ipynb) or use the script:
229
+
230
+ ```bash
231
+ # From stdin, plays audio immediately
232
+ echo "Hey, how are you?" | python scripts/tts_pytorch.py - -
233
+
234
+ # From text file to audio file
235
+ python scripts/tts_pytorch.py text_to_say.txt audio_output.wav
236
+ ```
237
+
238
+ The `tts_pytorch.py` script waits for all the text to be available before
239
+ starting the audio generation. A fully streaming implementation is available in
240
+ the `tts_pytorch_streaming.py` script, which can be used as follows:
241
+
242
+ ```bash
243
+ echo "Hey, how are you?" | python scripts/tts_pytorch_streaming.py audio_output.wav
244
+ ```
245
+
246
+ This requires the [moshi package](https://pypi.org/project/moshi/), which can be installed via pip.
247
+ If you have [uv](https://docs.astral.sh/uv/) installed, you can skip the installation step
248
+ and just prefix the command above with `uvx --with moshi`.
249
+ </details>
250
+
251
+ <details>
252
+ <summary>Rust server</summary>
253
+
254
+
255
+ The Rust implementation provides a server that can process multiple streaming
256
+ queries in parallel.
257
+
258
+ Installing the Rust server is a bit tricky because it uses our Python implementation under the hood,
259
+ which also requires installing the Python dependencies.
260
+ Use the [start_tts.sh](https://github.com/kyutai-labs/unmute/blob/main/dockerless/start_tts.sh) script to properly install the Rust server.
261
+ If you already installed the `moshi-server` crate before and it's not working, you might need to force a reinstall by running `cargo uninstall moshi-server` first.
262
+ Feel free to open an issue if the installation is still broken.
263
+
264
+ Once installed, the server can be started via the following command using the config file
265
+ from this repository.
266
+
267
+ ```bash
268
+ moshi-server worker --config configs/config-tts.toml
269
+ ```
270
+
271
+ Once the server has started you can connect to it using our script as follows:
272
+ ```bash
273
+ # From stdin, plays audio immediately
274
+ echo "Hey, how are you?" | python scripts/tts_rust_server.py - -
275
+
276
+ # From text file to audio file
277
+ python scripts/tts_rust_server.py text_to_say.txt audio_output.wav
278
+ ```
279
+
280
+ You can configure the server by modifying `configs/config-tts.toml`. See comments in that file to see what options are available.
281
+ </details>
282
+
283
+ <details>
284
+ <summary>MLX implementation</summary>
285
+
286
+ [MLX](https://ml-explore.github.io/mlx/build/html/index.html) is Apple's ML framework that allows you to use
287
+ hardware acceleration on Apple silicon.
288
+
289
+ Use our example script to run Kyutai TTS on MLX.
290
+ The script takes text from stdin or a file and can output to a file or stream the resulting audio.
291
+ When streaming the output, if the model is not fast enough to keep with
292
+ real-time, you can use the `--quantize 8` or `--quantize 4` flags to quantize
293
+ the model resulting in faster inference.
294
+
295
+ ```bash
296
+ # From stdin, plays audio immediately
297
+ echo "Hey, how are you?" | python scripts/tts_mlx.py - - --quantize 8
298
+
299
+ # From text file to audio file
300
+ python scripts/tts_mlx.py text_to_say.txt audio_output.wav
301
+ ```
302
+
303
+ This requires the [moshi-mlx package](https://pypi.org/project/moshi-mlx/), which can be installed via pip.
304
+ If you have [uv](https://docs.astral.sh/uv/) installed, you can skip the installation step
305
+ and just prefix the command above with `uvx --with moshi-mlx`.
306
+ </details>
307
+
308
+ ## FAQ
309
+
310
+ Checkout the [Frequently Asked Questions](FAQ.md) section before opening an issue.
311
+
312
+ ## License
313
+
314
+ The present code is provided under the MIT license for the Python parts, and Apache license for the Rust backend.
315
+ The web client code is provided under the MIT license.
316
+ Note that parts of this code is based on [AudioCraft](https://github.com/facebookresearch/audiocraft), released under
317
+ the MIT license.
318
+
319
+ The weights for the speech-to-text models are released under the CC-BY 4.0 license.
320
+
321
+ ## Developing
322
+
323
+ Install the [pre-commit hooks](https://pre-commit.com/) by running:
324
+
325
+ ```bash
326
+ pip install pre-commit
327
+ pre-commit install
328
+ ```
329
+
330
+ If you're using `uv`, you can replace the two commands with `uvx pre-commit install`.
331
+
332
+ ## Citation
333
+
334
+ Please cite the following paper.
335
+ ```
336
+ @techreport{kyutai2025streaming,
337
+ title={Streaming Sequence-to-Sequence Learning with Delayed Streams Modeling},
338
+ author={Neil Zeghidour and Eugene Kharitonov and Manu Orsini and Václav Volhejn and Gabriel de Marmiesse and Edouard Grave and Patrick Pérez and Laurent Mazaré and Alexandre Défossez},
339
+ year={2025},
340
+ eprint={2509.08753},
341
+ archivePrefix={arXiv},
342
+ primaryClass={cs.CL},
343
+ url={https://arxiv.org/abs/2509.08753},
344
+ }
345
+ ```
audio/bria.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3fa2f70249b671746ad032a1ca701c8c91c22dfeda45b9c5ecb6d453275a85c
3
+ size 717635
audio/loona.mp3 ADDED
Binary file (9 kB). View file
 
audio/sample_fr_hibiki_crepes.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47d47d0143a55847a27beb61a990cf91c07ac83548796b853b441d3041e635ee
3
+ size 759450
configs/config-stt-en-hf.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ static_dir = "./static/"
2
+ log_dir = "$HOME/tmp/tts-logs"
3
+ instance_name = "tts"
4
+ authorized_ids = ["public_token"]
5
+
6
+ [modules.asr]
7
+ path = "/api/asr-streaming"
8
+ type = "BatchedAsr"
9
+ lm_model_file = "hf://kyutai/stt-2.6b-en-candle/model.safetensors"
10
+ text_tokenizer_file = "hf://kyutai/stt-2.6b-en-candle/tokenizer_en_audio_4000.model"
11
+ audio_tokenizer_file = "hf://kyutai/stt-2.6b-en-candle/mimi-pytorch-e351c8d8@125.safetensors"
12
+ asr_delay_in_tokens = 32
13
+ batch_size = 16
14
+ conditioning_learnt_padding = true
15
+ temperature = 0
16
+
17
+ [modules.asr.model]
18
+ audio_vocab_size = 2049
19
+ text_in_vocab_size = 4001
20
+ text_out_vocab_size = 4000
21
+ audio_codebooks = 32
22
+
23
+ [modules.asr.model.transformer]
24
+ d_model = 2048
25
+ num_heads = 32
26
+ num_layers = 48
27
+ dim_feedforward = 8192
28
+ causal = true
29
+ norm_first = true
30
+ bias_ff = false
31
+ bias_attn = false
32
+ context = 375
33
+ max_period = 100000
34
+ use_conv_block = false
35
+ use_conv_bias = true
36
+ gating = "silu"
37
+ norm = "RmsNorm"
38
+ positional_embedding = "Rope"
39
+ conv_layout = false
40
+ conv_kernel_size = 3
41
+ kv_repeat = 1
42
+ max_seq_len = 40960
configs/config-stt-en_fr-hf.toml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ static_dir = "./static/"
2
+ log_dir = "$HOME/tmp/tts-logs"
3
+ instance_name = "tts"
4
+ authorized_ids = ["public_token"]
5
+
6
+ [modules.asr]
7
+ path = "/api/asr-streaming"
8
+ type = "BatchedAsr"
9
+ lm_model_file = "hf://kyutai/stt-1b-en_fr-candle/model.safetensors"
10
+ text_tokenizer_file = "hf://kyutai/stt-1b-en_fr-candle/tokenizer_en_fr_audio_8000.model"
11
+ audio_tokenizer_file = "hf://kyutai/stt-1b-en_fr-candle/mimi-pytorch-e351c8d8@125.safetensors"
12
+ asr_delay_in_tokens = 6
13
+ batch_size = 64
14
+ conditioning_learnt_padding = true
15
+ temperature = 0.0
16
+
17
+ [modules.asr.model]
18
+ audio_vocab_size = 2049
19
+ text_in_vocab_size = 8001
20
+ text_out_vocab_size = 8000
21
+ audio_codebooks = 32
22
+
23
+ [modules.asr.model.transformer]
24
+ d_model = 2048
25
+ num_heads = 16
26
+ num_layers = 16
27
+ dim_feedforward = 8192
28
+ causal = true
29
+ norm_first = true
30
+ bias_ff = false
31
+ bias_attn = false
32
+ context = 750
33
+ max_period = 100000
34
+ use_conv_block = false
35
+ use_conv_bias = true
36
+ gating = "silu"
37
+ norm = "RmsNorm"
38
+ positional_embedding = "Rope"
39
+ conv_layout = false
40
+ conv_kernel_size = 3
41
+ kv_repeat = 1
42
+ max_seq_len = 40960
43
+
44
+ [modules.asr.model.extra_heads]
45
+ num_heads = 4
46
+ dim = 6
configs/config-tts.toml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ static_dir = "./static/"
2
+ log_dir = "$HOME/tmp/tts-logs"
3
+ # Used to identify the server when logging.
4
+ instance_name = "tts"
5
+ # Simple security: require clients to provide an auth token when connecting.
6
+ # It can be set by setting auth_id to the query string, e.g.
7
+ # "localhost:8089/api/tts_streaming?auth_id=public_token"
8
+ # or by setting the kyutai-api-key HTTP header, see the tts_rust_server.py example.
9
+ authorized_ids = ["public_token"]
10
+
11
+ [modules.tts_py]
12
+ type = "Py"
13
+ # Under which path should the TTS be available? This is relevant because the server
14
+ # can run STT at the same time.
15
+ path = "/api/tts_streaming"
16
+ text_tokenizer_file = "hf://kyutai/tts-1.6b-en_fr/tokenizer_spm_8k_en_fr_audio.model"
17
+ # Batch size determines how many parallel connections can the server handle.
18
+ # Higher values mean slower inference. Adjust to your GPU memory capacity.
19
+ batch_size = 8
20
+ text_bos_token = 1
21
+
22
+ [modules.tts_py.py]
23
+ log_folder = "$HOME/tmp/moshi-server-logs"
24
+ # The folder to read voices from. Can be a local directory, or a Hugging Face repo
25
+ # using the "hf-snapshot://" prefix. We use a glob to only download the .safetensors files
26
+ # with voice embeddings since the repo also contains .wav files we don't need.
27
+ voice_folder = "hf-snapshot://kyutai/tts-voices/**/*.safetensors"
28
+ # This voice will be used if the user doesn't specify one, or selects a non-existent one.
29
+ # This usually means something is wrong, so here we set it to a strange voice to make it clear
30
+ # that something is off.
31
+ # Relative to the voice folder.
32
+ default_voice = "unmute-prod-website/default_voice.wav"
33
+
34
+ # Classifier-free guidance coefficient (see https://arxiv.org/abs/2207.12598).
35
+ # TLDR: A higher CFG value makes the model adhere to the voice more closely,
36
+ # but it can affect audio quality and make it more likely to make mistakes
37
+ # like inserting words that aren't in the script.
38
+ # Technical details:
39
+ # CFG has the disadvantage of increasing inference time, because you need to run the model
40
+ # twice for each step (once with the voice embedding, once without).
41
+ # The default model, "tts-1.6b-en_fr", is trained with CFG distillation, which means it learns
42
+ # to mimic CFG with different coefs during training, without actually using CFG at inference time.
43
+ # These is only a fixed set of CFG coefs the model was trained with, so using a different value
44
+ # will not work. The recommended value for this model is 2.0.
45
+ cfg_coef = 2.0
46
+
47
+ # Whether the unconditioned branch of the CFG should still have text conditioning or not.
48
+ # Typically, no need to touch this.
49
+ cfg_is_no_text = true
50
+
51
+ # Number of padding frames to force between words. Will make the model articulate
52
+ # a bit better with values such as 1.
53
+ padding_between = 1
54
+ # Number of quantization levels for the residual vector quantizer.
55
+ # Higher means better sounding audio but longer inference.
56
+ # The maximum is typically 32, reasonable values are 8-32.
57
+ n_q = 24
58
+ # Make the model speak faster or slower by changing how likely it is to sample the padding token.
59
+ # Should be between -2 and 2, with positive values leading to slower speech.
60
+ padding_bonus = 0
scripts/stt_evaluate_on_dataset.py ADDED
@@ -0,0 +1,387 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.12"
3
+ # dependencies = [
4
+ # "datasets",
5
+ # "jiwer==3.1.0",
6
+ # "julius",
7
+ # "librosa",
8
+ # "moshi",
9
+ # "openai-whisper",
10
+ # "soundfile",
11
+ # ]
12
+ # ///
13
+ """
14
+ Example implementation of the streaming STT example. Here we group
15
+ test utterances in batches (pre- and post-padded with silence) and
16
+ and then feed these batches into the streaming STT model frame-by-frame.
17
+ """
18
+
19
+ # The outputs I get on my H100 using this code with the 2.6B model,
20
+ # bsz 32:
21
+
22
+ # LibriVox === cer: 4.09% wer: 7.33% corpus_wer: 6.78% RTF = 52.72
23
+ # Ami === cer: 15.99% wer: 18.78% corpus_wer: 12.20% RTF = 28.37
24
+ # LibriSpeech other === cer: 2.31% wer: 5.24% corpus_wer: 4.33% RTF = 44.76
25
+ # LibriSpeech clean === cer: 0.67% wer: 1.95% corpus_wer: 1.69% RTF = 68.19
26
+ # Tedlium (short) === cer: 2.15% wer: 3.65% corpus_wer: 3.33% RTF = 67.44
27
+ # spgispeech === cer: 0.99% wer: 2.00% corpus_wer: 2.03% RTF = 78.64
28
+ # gigaspeech === cer: 6.80% wer: 11.31% corpus_wer: 9.81% RTF = 64.04
29
+ # earnings22 (short) === cer: 12.63% wer: 15.70% corpus_wer: 11.02% RTF = 50.13
30
+
31
+ # Meanwhile === cer: 2.02% wer: 5.50% corpus_wer: 5.60% RTF = 69.19
32
+ # Tedlium (long) == cer: 1.53% wer: 2.56% corpus_wer: 2.97% RTF = 33.92
33
+ # Rev16 === cer: 6.57% wer: 10.08% corpus_wer: 11.43% RTF = 40.34
34
+ # Earnings21 === cer: 5.73% wer: 9.84% corpus_wer: 10.38% RTF = 73.15
35
+
36
+ import argparse
37
+ import dataclasses
38
+ import time
39
+
40
+ import jiwer
41
+ import julius
42
+ import moshi.models
43
+ import torch
44
+ import tqdm
45
+ from datasets import Dataset, load_dataset
46
+ from whisper.normalizers import EnglishTextNormalizer
47
+
48
+ _NORMALIZER = EnglishTextNormalizer()
49
+
50
+
51
+ def get_text(sample):
52
+ possible_keys = [
53
+ "text",
54
+ "sentence",
55
+ "normalized_text",
56
+ "transcript",
57
+ "transcription",
58
+ ]
59
+ for key in possible_keys:
60
+ if key in sample:
61
+ return sample[key]
62
+ raise ValueError(
63
+ f"Expected transcript column of either {possible_keys}."
64
+ f"Got sample with keys: {', '.join(sample.keys())}. Ensure a text column name is present in the dataset."
65
+ )
66
+
67
+
68
+ # The two functions below are adapted from https://github.com/huggingface/open_asr_leaderboard/blob/main/normalizer/data_utils.py
69
+
70
+
71
+ def normalize(batch):
72
+ batch["original_text"] = get_text(batch)
73
+ batch["norm_text"] = _NORMALIZER(batch["original_text"])
74
+ return batch
75
+
76
+
77
+ def is_target_text_in_range(ref):
78
+ if ref.strip() == "ignore time segment in scoring":
79
+ return False
80
+ else:
81
+ return ref.strip() != ""
82
+
83
+
84
+ # End of the adapted part
85
+
86
+
87
+ class AsrMetrics:
88
+ def __init__(self):
89
+ self.cer_sum = 0.0
90
+ self.wer_sum = 0.0
91
+ self.errors_sum = 0.0
92
+ self.total_words_sum = 0.0
93
+ self.num_sequences = 0.0
94
+
95
+ def update(self, hyp: str, ref: str) -> None:
96
+ normalized_ref = _NORMALIZER(ref)
97
+ normalized_hyp = _NORMALIZER(hyp)
98
+
99
+ this_wer = jiwer.wer(normalized_ref, normalized_hyp)
100
+ this_cer = jiwer.cer(normalized_ref, normalized_hyp)
101
+ measures = jiwer.compute_measures(normalized_ref, normalized_hyp)
102
+
103
+ self.wer_sum += this_wer
104
+ self.cer_sum += this_cer
105
+ self.errors_sum += (
106
+ measures["substitutions"] + measures["deletions"] + measures["insertions"]
107
+ )
108
+ self.total_words_sum += (
109
+ measures["substitutions"] + measures["deletions"] + measures["hits"]
110
+ )
111
+ self.num_sequences += 1
112
+
113
+ def compute(self) -> dict:
114
+ assert self.num_sequences > 0, (
115
+ "Unable to compute with total number of comparisons <= 0"
116
+ ) # type: ignore
117
+ return {
118
+ "cer": (self.cer_sum / self.num_sequences),
119
+ "wer": (self.wer_sum / self.num_sequences),
120
+ "corpus_wer": (self.errors_sum / self.total_words_sum),
121
+ }
122
+
123
+ def __str__(self) -> str:
124
+ result = self.compute()
125
+ return " ".join(f"{k}: {100 * v:.2f}%" for k, v in result.items())
126
+
127
+
128
+ class Timer:
129
+ def __init__(self):
130
+ self.total = 0
131
+ self._start_time = None
132
+
133
+ def __enter__(self):
134
+ self._start_time = time.perf_counter()
135
+ return self
136
+
137
+ def __exit__(self, *_):
138
+ self.total += time.perf_counter() - self._start_time
139
+ self._start_time = None
140
+
141
+
142
+ @dataclasses.dataclass
143
+ class _DatasetInfo:
144
+ alias: str
145
+
146
+ name: str
147
+ config: str
148
+ split: str = "test"
149
+
150
+
151
+ _DATASETS = [
152
+ # Long-form datasets from distil-whisper
153
+ _DatasetInfo("rev16", "distil-whisper/rev16", "whisper_subset"),
154
+ _DatasetInfo("earnings21", "distil-whisper/earnings21", "full"),
155
+ _DatasetInfo("earnings22", "distil-whisper/earnings22", "full"),
156
+ _DatasetInfo("tedlium", "distil-whisper/tedlium-long-form", None),
157
+ _DatasetInfo("meanwhile", "distil-whisper/meanwhile", None),
158
+ # Short-form datasets from OpenASR leaderboard
159
+ _DatasetInfo("ami", "hf-audio/esb-datasets-test-only-sorted", "ami"),
160
+ _DatasetInfo(
161
+ "librispeech.clean",
162
+ "hf-audio/esb-datasets-test-only-sorted",
163
+ "librispeech",
164
+ split="test.clean",
165
+ ),
166
+ _DatasetInfo(
167
+ "librispeech.other",
168
+ "hf-audio/esb-datasets-test-only-sorted",
169
+ "librispeech",
170
+ split="test.other",
171
+ ),
172
+ _DatasetInfo("voxpopuli", "hf-audio/esb-datasets-test-only-sorted", "voxpopuli"),
173
+ _DatasetInfo("spgispeech", "hf-audio/esb-datasets-test-only-sorted", "spgispeech"),
174
+ _DatasetInfo("gigaspeech", "hf-audio/esb-datasets-test-only-sorted", "gigaspeech"),
175
+ _DatasetInfo("tedlium-short", "hf-audio/esb-datasets-test-only-sorted", "tedlium"),
176
+ _DatasetInfo(
177
+ "earnings22-short", "hf-audio/esb-datasets-test-only-sorted", "earnings22"
178
+ ),
179
+ ]
180
+ DATASET_MAP = {dataset.alias: dataset for dataset in _DATASETS}
181
+
182
+
183
+ def get_dataset(args) -> Dataset:
184
+ if args.dataset not in DATASET_MAP:
185
+ raise RuntimeError(f"Unknown dataset: {args.dataset}")
186
+
187
+ info = DATASET_MAP[args.dataset]
188
+
189
+ dataset = load_dataset(
190
+ info.name,
191
+ info.config,
192
+ split=info.split,
193
+ cache_dir=args.hf_cache_dir,
194
+ streaming=False,
195
+ token=True,
196
+ )
197
+ dataset = dataset.map(normalize)
198
+ dataset = dataset.filter(is_target_text_in_range, input_columns=["norm_text"])
199
+
200
+ return dataset
201
+
202
+
203
+ @torch.no_grad
204
+ def get_padded_batch(
205
+ audios: list[tuple[torch.Tensor, int]],
206
+ before_padding: float,
207
+ after_padding: float,
208
+ audio_encoder,
209
+ ):
210
+ sample_rate = audio_encoder.sample_rate
211
+
212
+ max_len = 0
213
+ batch = []
214
+ durations = []
215
+ for audio, sr in audios:
216
+ durations.append(audio.shape[-1] / sr)
217
+ audio = julius.resample_frac(audio, int(sr), int(sample_rate))
218
+ audio = torch.nn.functional.pad(
219
+ audio, (int(before_padding * sample_rate), int(after_padding * sample_rate))
220
+ )
221
+ max_len = max(max_len, audio.shape[-1])
222
+ batch.append(audio)
223
+
224
+ target = max_len
225
+ if target % audio_encoder.frame_size != 0:
226
+ target = target + (
227
+ audio_encoder.frame_size - max_len % audio_encoder.frame_size
228
+ )
229
+ padded_batch = torch.stack(
230
+ [
231
+ torch.nn.functional.pad(audio, (0, target - audio.shape[-1]))
232
+ for audio in batch
233
+ ]
234
+ )
235
+ return padded_batch
236
+
237
+
238
+ @torch.no_grad
239
+ def streaming_transcribe(
240
+ padded_batch: torch.Tensor,
241
+ mimi,
242
+ lm_gen,
243
+ ):
244
+ bsz = padded_batch.shape[0]
245
+
246
+ text_tokens_acc = []
247
+
248
+ with mimi.streaming(bsz), lm_gen.streaming(bsz):
249
+ for offset in range(0, padded_batch.shape[-1], mimi.frame_size):
250
+ audio_chunk = padded_batch[:, offset : offset + mimi.frame_size]
251
+ audio_chunk = audio_chunk[:, None, :]
252
+
253
+ audio_tokens = mimi.encode(audio_chunk)
254
+ text_tokens = lm_gen.step(audio_tokens)
255
+ if text_tokens is not None:
256
+ text_tokens_acc.append(text_tokens)
257
+
258
+ return torch.concat(text_tokens_acc, axis=-1)
259
+
260
+
261
+ def run_inference(
262
+ dataset,
263
+ mimi,
264
+ lm_gen,
265
+ tokenizer,
266
+ padding_token_id,
267
+ before_padding_sec,
268
+ after_padding_sec,
269
+ ):
270
+ metrics = AsrMetrics()
271
+ audio_time = 0.0
272
+ inference_timer = Timer()
273
+
274
+ for batch in tqdm.tqdm(dataset.iter(args.batch_size)):
275
+ audio_data = list(
276
+ zip(
277
+ [torch.tensor(x["array"]).float() for x in batch["audio"]],
278
+ [x["sampling_rate"] for x in batch["audio"]],
279
+ )
280
+ )
281
+
282
+ audio_time += sum(audio.shape[-1] / sr for (audio, sr) in audio_data)
283
+
284
+ gt_transcripts = batch["original_text"]
285
+
286
+ padded_batch = get_padded_batch(
287
+ audio_data,
288
+ before_padding=before_padding_sec,
289
+ after_padding=after_padding_sec,
290
+ audio_encoder=mimi,
291
+ )
292
+ padded_batch = padded_batch.cuda()
293
+
294
+ with inference_timer:
295
+ text_tokens = streaming_transcribe(
296
+ padded_batch,
297
+ mimi=mimi,
298
+ lm_gen=lm_gen,
299
+ )
300
+
301
+ for batch_index in range(text_tokens.shape[0]):
302
+ utterance_tokens = text_tokens[batch_index, ...]
303
+ utterance_tokens = utterance_tokens[utterance_tokens > padding_token_id]
304
+ text = tokenizer.decode(utterance_tokens.cpu().numpy().tolist())
305
+ metrics.update(hyp=text, ref=gt_transcripts[batch_index])
306
+
307
+ return metrics, inference_timer.total, audio_time
308
+
309
+
310
+ def main(args):
311
+ torch.set_float32_matmul_precision("high")
312
+
313
+ info = moshi.models.loaders.CheckpointInfo.from_hf_repo(
314
+ args.hf_repo,
315
+ moshi_weights=args.moshi_weight,
316
+ mimi_weights=args.mimi_weight,
317
+ tokenizer=args.tokenizer,
318
+ config_path=args.config_path,
319
+ )
320
+
321
+ mimi = info.get_mimi(device=args.device)
322
+ tokenizer = info.get_text_tokenizer()
323
+ lm = info.get_moshi(
324
+ device=args.device,
325
+ dtype=torch.bfloat16,
326
+ )
327
+ lm_gen = moshi.models.LMGen(lm, temp=0, temp_text=0.0)
328
+ dataset = get_dataset(args)
329
+
330
+ padding_token_id = info.raw_config.get("text_padding_token_id", 3)
331
+ # Putting in some conservative defaults
332
+ audio_silence_prefix_seconds = info.stt_config.get(
333
+ "audio_silence_prefix_seconds", 1.0
334
+ )
335
+ audio_delay_seconds = info.stt_config.get("audio_delay_seconds", 5.0)
336
+
337
+ wer_metric, inference_time, audio_time = run_inference(
338
+ dataset,
339
+ mimi,
340
+ lm_gen,
341
+ tokenizer,
342
+ padding_token_id,
343
+ audio_silence_prefix_seconds,
344
+ audio_delay_seconds + 0.5,
345
+ )
346
+
347
+ print(wer_metric, f"RTF = {audio_time / inference_time:.2f}")
348
+
349
+
350
+ if __name__ == "__main__":
351
+ parser = argparse.ArgumentParser(description="Example streaming STT inference.")
352
+ parser.add_argument(
353
+ "--dataset",
354
+ required=True,
355
+ choices=DATASET_MAP.keys(),
356
+ help="Dataset to run inference on.",
357
+ )
358
+
359
+ parser.add_argument(
360
+ "--hf-repo", type=str, help="HF repo to load the STT model from."
361
+ )
362
+ parser.add_argument("--tokenizer", type=str, help="Path to a local tokenizer file.")
363
+ parser.add_argument(
364
+ "--moshi-weight", type=str, help="Path to a local checkpoint file."
365
+ )
366
+ parser.add_argument(
367
+ "--mimi-weight", type=str, help="Path to a local checkpoint file for Mimi."
368
+ )
369
+ parser.add_argument(
370
+ "--config-path", type=str, help="Path to a local config file.", default=None
371
+ )
372
+ parser.add_argument(
373
+ "--batch-size",
374
+ type=int,
375
+ help="Batch size.",
376
+ default=32,
377
+ )
378
+ parser.add_argument(
379
+ "--device",
380
+ type=str,
381
+ default="cuda",
382
+ help="Device on which to run, defaults to 'cuda'.",
383
+ )
384
+ parser.add_argument("--hf-cache-dir", type=str, help="HuggingFace cache folder.")
385
+ args = parser.parse_args()
386
+
387
+ main(args)
scripts/stt_from_file_mlx.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.12"
3
+ # dependencies = [
4
+ # "huggingface_hub",
5
+ # "moshi_mlx==0.2.12",
6
+ # "numpy",
7
+ # "sentencepiece",
8
+ # "sounddevice",
9
+ # "sphn",
10
+ # ]
11
+ # ///
12
+
13
+ import argparse
14
+ import json
15
+
16
+ import mlx.core as mx
17
+ import mlx.nn as nn
18
+ import sentencepiece
19
+ import sphn
20
+ from huggingface_hub import hf_hub_download
21
+ from moshi_mlx import models, utils
22
+
23
+ if __name__ == "__main__":
24
+ parser = argparse.ArgumentParser()
25
+ parser.add_argument("in_file", help="The file to transcribe.")
26
+ parser.add_argument("--max-steps", default=4096)
27
+ parser.add_argument("--hf-repo")
28
+ parser.add_argument(
29
+ "--vad", action="store_true", help="Enable VAD (Voice Activity Detection)."
30
+ )
31
+ args = parser.parse_args()
32
+
33
+ audio, _ = sphn.read(args.in_file, sample_rate=24000)
34
+ if args.hf_repo is None:
35
+ if args.vad:
36
+ args.hf_repo = "kyutai/stt-1b-en_fr-candle"
37
+ else:
38
+ args.hf_repo = "kyutai/stt-1b-en_fr-mlx"
39
+ lm_config = hf_hub_download(args.hf_repo, "config.json")
40
+ with open(lm_config, "r") as fobj:
41
+ lm_config = json.load(fobj)
42
+ mimi_weights = hf_hub_download(args.hf_repo, lm_config["mimi_name"])
43
+ moshi_name = lm_config.get("moshi_name", "model.safetensors")
44
+ moshi_weights = hf_hub_download(args.hf_repo, moshi_name)
45
+ text_tokenizer = hf_hub_download(args.hf_repo, lm_config["tokenizer_name"])
46
+
47
+ lm_config = models.LmConfig.from_config_dict(lm_config)
48
+ model = models.Lm(lm_config)
49
+ model.set_dtype(mx.bfloat16)
50
+ if moshi_weights.endswith(".q4.safetensors"):
51
+ nn.quantize(model, bits=4, group_size=32)
52
+ elif moshi_weights.endswith(".q8.safetensors"):
53
+ nn.quantize(model, bits=8, group_size=64)
54
+
55
+ print(f"loading model weights from {moshi_weights}")
56
+ if args.hf_repo.endswith("-candle"):
57
+ model.load_pytorch_weights(moshi_weights, lm_config, strict=True)
58
+ else:
59
+ model.load_weights(moshi_weights, strict=True)
60
+
61
+ print(f"loading the text tokenizer from {text_tokenizer}")
62
+ text_tokenizer = sentencepiece.SentencePieceProcessor(text_tokenizer) # type: ignore
63
+
64
+ print(f"loading the audio tokenizer {mimi_weights}")
65
+ audio_tokenizer = models.mimi.Mimi(models.mimi_202407(32))
66
+ audio_tokenizer.load_pytorch_weights(str(mimi_weights), strict=True)
67
+ print("warming up the model")
68
+ model.warmup()
69
+ gen = models.LmGen(
70
+ model=model,
71
+ max_steps=args.max_steps,
72
+ text_sampler=utils.Sampler(top_k=25, temp=0),
73
+ audio_sampler=utils.Sampler(top_k=250, temp=0.8),
74
+ check=False,
75
+ )
76
+
77
+ print(f"starting inference {audio.shape}")
78
+ audio = mx.concat([mx.array(audio), mx.zeros((1, 48000))], axis=-1)
79
+ last_print_was_vad = False
80
+ for start_idx in range(0, audio.shape[-1] // 1920 * 1920, 1920):
81
+ block = audio[:, None, start_idx : start_idx + 1920]
82
+ other_audio_tokens = audio_tokenizer.encode_step(block).transpose(0, 2, 1)
83
+ if args.vad:
84
+ text_token, vad_heads = gen.step_with_extra_heads(other_audio_tokens[0])
85
+ if vad_heads:
86
+ pr_vad = vad_heads[2][0, 0, 0].item()
87
+ if pr_vad > 0.5 and not last_print_was_vad:
88
+ print(" [end of turn detected]")
89
+ last_print_was_vad = True
90
+ else:
91
+ text_token = gen.step(other_audio_tokens[0])
92
+ text_token = text_token[0].item()
93
+ audio_tokens = gen.last_audio_tokens()
94
+ _text = None
95
+ if text_token not in (0, 3):
96
+ _text = text_tokenizer.id_to_piece(text_token) # type: ignore
97
+ _text = _text.replace("▁", " ")
98
+ print(_text, end="", flush=True)
99
+ last_print_was_vad = False
100
+ print()
scripts/stt_from_file_pytorch.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.12"
3
+ # dependencies = [
4
+ # "julius",
5
+ # "librosa",
6
+ # "soundfile",
7
+ # "moshi==0.2.11",
8
+ # ]
9
+ # ///
10
+
11
+ """An example script that illustrates how one can get per-word timestamps from
12
+ Kyutai STT models.
13
+ """
14
+
15
+ import argparse
16
+ import dataclasses
17
+ import itertools
18
+ import math
19
+
20
+ import julius
21
+ import moshi.models
22
+ import sphn
23
+ import time
24
+ import torch
25
+
26
+
27
+ @dataclasses.dataclass
28
+ class TimestampedText:
29
+ text: str
30
+ timestamp: tuple[float, float]
31
+
32
+ def __str__(self):
33
+ return f"{self.text} ({self.timestamp[0]:.2f}:{self.timestamp[1]:.2f})"
34
+
35
+
36
+ def tokens_to_timestamped_text(
37
+ text_tokens,
38
+ tokenizer,
39
+ frame_rate,
40
+ end_of_padding_id,
41
+ padding_token_id,
42
+ offset_seconds,
43
+ ) -> list[TimestampedText]:
44
+ text_tokens = text_tokens.cpu().view(-1)
45
+
46
+ # Normally `end_of_padding` tokens indicate word boundaries.
47
+ # Everything between them should be a single word;
48
+ # the time offset of the those tokens correspond to word start and
49
+ # end timestamps (minus silence prefix and audio delay).
50
+ #
51
+ # However, in rare cases some complexities could arise. Firstly,
52
+ # for words that are said quickly but are represented with
53
+ # multiple tokens, the boundary might be omitted. Secondly,
54
+ # for the very last word the end boundary might not happen.
55
+ # Below is a code snippet that handles those situations a bit
56
+ # more carefully.
57
+
58
+ sequence_timestamps = []
59
+
60
+ def _tstmp(start_position, end_position):
61
+ return (
62
+ max(0, start_position / frame_rate - offset_seconds),
63
+ max(0, end_position / frame_rate - offset_seconds),
64
+ )
65
+
66
+ def _decode(t):
67
+ t = t[t > padding_token_id]
68
+ return tokenizer.decode(t.numpy().tolist())
69
+
70
+ def _decode_segment(start, end):
71
+ nonlocal text_tokens
72
+ nonlocal sequence_timestamps
73
+
74
+ text = _decode(text_tokens[start:end])
75
+ words_inside_segment = text.split()
76
+
77
+ if len(words_inside_segment) == 0:
78
+ return
79
+ if len(words_inside_segment) == 1:
80
+ # Single word within the boundaries, the general case
81
+ sequence_timestamps.append(
82
+ TimestampedText(text=text, timestamp=_tstmp(start, end))
83
+ )
84
+ else:
85
+ # We're in a rare situation where multiple words are so close they are not separated by `end_of_padding`.
86
+ # We tokenize words one-by-one; each word is assigned with as many frames as much tokens it has.
87
+ for adjacent_word in words_inside_segment[:-1]:
88
+ n_tokens = len(tokenizer.encode(adjacent_word))
89
+ sequence_timestamps.append(
90
+ TimestampedText(
91
+ text=adjacent_word, timestamp=_tstmp(start, start + n_tokens)
92
+ )
93
+ )
94
+ start += n_tokens
95
+
96
+ # The last word takes everything until the boundary
97
+ adjacent_word = words_inside_segment[-1]
98
+ sequence_timestamps.append(
99
+ TimestampedText(text=adjacent_word, timestamp=_tstmp(start, end))
100
+ )
101
+
102
+ (segment_boundaries,) = torch.where(text_tokens == end_of_padding_id)
103
+
104
+ if not segment_boundaries.numel():
105
+ return []
106
+
107
+ for i in range(len(segment_boundaries) - 1):
108
+ segment_start = int(segment_boundaries[i]) + 1
109
+ segment_end = int(segment_boundaries[i + 1])
110
+
111
+ _decode_segment(segment_start, segment_end)
112
+
113
+ last_segment_start = segment_boundaries[-1] + 1
114
+
115
+ boundary_token = torch.tensor([tokenizer.eos_id()])
116
+ (end_of_last_segment,) = torch.where(
117
+ torch.isin(text_tokens[last_segment_start:], boundary_token)
118
+ )
119
+
120
+ if not end_of_last_segment.numel():
121
+ # upper-bound either end of the audio or 1 second duration, whicher is smaller
122
+ last_segment_end = min(text_tokens.shape[-1], last_segment_start + frame_rate)
123
+ else:
124
+ last_segment_end = last_segment_start + end_of_last_segment[0]
125
+ _decode_segment(last_segment_start, last_segment_end)
126
+
127
+ return sequence_timestamps
128
+
129
+
130
+ def main(args):
131
+ if args.vad and args.hf_repo is None:
132
+ args.hf_repo = "kyutai/stt-1b-en_fr-candle"
133
+
134
+ info = moshi.models.loaders.CheckpointInfo.from_hf_repo(
135
+ args.hf_repo,
136
+ moshi_weights=args.moshi_weight,
137
+ mimi_weights=args.mimi_weight,
138
+ tokenizer=args.tokenizer,
139
+ config_path=args.config_path,
140
+ )
141
+
142
+ mimi = info.get_mimi(device=args.device)
143
+ tokenizer = info.get_text_tokenizer()
144
+ lm = info.get_moshi(
145
+ device=args.device,
146
+ dtype=torch.bfloat16,
147
+ )
148
+ lm_gen = moshi.models.LMGen(lm, temp=0, temp_text=0.0)
149
+
150
+ audio_silence_prefix_seconds = info.stt_config.get(
151
+ "audio_silence_prefix_seconds", 1.0
152
+ )
153
+ audio_delay_seconds = info.stt_config.get("audio_delay_seconds", 5.0)
154
+ padding_token_id = info.raw_config.get("text_padding_token_id", 3)
155
+
156
+ audio, input_sample_rate = sphn.read(args.in_file)
157
+ audio = torch.from_numpy(audio).to(args.device)
158
+ audio = julius.resample_frac(audio, input_sample_rate, mimi.sample_rate)
159
+ if audio.shape[-1] % mimi.frame_size != 0:
160
+ to_pad = mimi.frame_size - audio.shape[-1] % mimi.frame_size
161
+ audio = torch.nn.functional.pad(audio, (0, to_pad))
162
+
163
+ text_tokens_accum = []
164
+
165
+ n_prefix_chunks = math.ceil(audio_silence_prefix_seconds * mimi.frame_rate)
166
+ n_suffix_chunks = math.ceil(audio_delay_seconds * mimi.frame_rate)
167
+ silence_chunk = torch.zeros(
168
+ (1, 1, mimi.frame_size), dtype=torch.float32, device=args.device
169
+ )
170
+
171
+ chunks = itertools.chain(
172
+ itertools.repeat(silence_chunk, n_prefix_chunks),
173
+ torch.split(audio[:, None], mimi.frame_size, dim=-1),
174
+ itertools.repeat(silence_chunk, n_suffix_chunks),
175
+ )
176
+
177
+ start_time = time.time()
178
+ nchunks = 0
179
+ last_print_was_vad = False
180
+ with mimi.streaming(1), lm_gen.streaming(1):
181
+ for audio_chunk in chunks:
182
+ nchunks += 1
183
+ audio_tokens = mimi.encode(audio_chunk)
184
+ if args.vad:
185
+ text_tokens, vad_heads = lm_gen.step_with_extra_heads(audio_tokens)
186
+ if vad_heads:
187
+ pr_vad = vad_heads[2][0, 0, 0].cpu().item()
188
+ if pr_vad > 0.5 and not last_print_was_vad:
189
+ print(" [end of turn detected]")
190
+ last_print_was_vad = True
191
+ else:
192
+ text_tokens = lm_gen.step(audio_tokens)
193
+ text_token = text_tokens[0, 0, 0].cpu().item()
194
+ if text_token not in (0, 3):
195
+ _text = tokenizer.id_to_piece(text_tokens[0, 0, 0].cpu().item()) # type: ignore
196
+ _text = _text.replace("▁", " ")
197
+ print(_text, end="", flush=True)
198
+ last_print_was_vad = False
199
+ text_tokens_accum.append(text_tokens)
200
+
201
+ utterance_tokens = torch.concat(text_tokens_accum, dim=-1)
202
+ dt = time.time() - start_time
203
+ print(
204
+ f"\nprocessed {nchunks} chunks in {dt:.2f} seconds, steps per second: {nchunks / dt:.2f}"
205
+ )
206
+ timed_text = tokens_to_timestamped_text(
207
+ utterance_tokens,
208
+ tokenizer,
209
+ mimi.frame_rate,
210
+ end_of_padding_id=0,
211
+ padding_token_id=padding_token_id,
212
+ offset_seconds=int(n_prefix_chunks / mimi.frame_rate) + audio_delay_seconds,
213
+ )
214
+
215
+ decoded = " ".join([str(t) for t in timed_text])
216
+ print(decoded)
217
+
218
+
219
+ if __name__ == "__main__":
220
+ parser = argparse.ArgumentParser(description="Example streaming STT w/ timestamps.")
221
+ parser.add_argument("in_file", help="The file to transcribe.")
222
+
223
+ parser.add_argument(
224
+ "--hf-repo", type=str, help="HF repo to load the STT model from. "
225
+ )
226
+ parser.add_argument("--tokenizer", type=str, help="Path to a local tokenizer file.")
227
+ parser.add_argument(
228
+ "--moshi-weight", type=str, help="Path to a local checkpoint file."
229
+ )
230
+ parser.add_argument(
231
+ "--mimi-weight", type=str, help="Path to a local checkpoint file for Mimi."
232
+ )
233
+ parser.add_argument(
234
+ "--config-path", type=str, help="Path to a local config file.", default=None
235
+ )
236
+ parser.add_argument(
237
+ "--vad", action="store_true", help="Enable VAD (Voice Activity Detection)."
238
+ )
239
+ parser.add_argument(
240
+ "--device",
241
+ type=str,
242
+ default="cuda",
243
+ help="Device on which to run, defaults to 'cuda'.",
244
+ )
245
+ args = parser.parse_args()
246
+
247
+ main(args)
scripts/stt_from_file_rust_server.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.12"
3
+ # dependencies = [
4
+ # "msgpack",
5
+ # "numpy",
6
+ # "sphn",
7
+ # "websockets",
8
+ # ]
9
+ # ///
10
+ import argparse
11
+ import asyncio
12
+ import time
13
+
14
+ import msgpack
15
+ import numpy as np
16
+ import sphn
17
+ import websockets
18
+
19
+ SAMPLE_RATE = 24000
20
+ FRAME_SIZE = 1920 # Send data in chunks
21
+
22
+
23
+ def load_and_process_audio(file_path):
24
+ """Load an MP3 file, resample to 24kHz, convert to mono, and extract PCM float32 data."""
25
+ pcm_data, _ = sphn.read(file_path, sample_rate=SAMPLE_RATE)
26
+ return pcm_data[0]
27
+
28
+
29
+ async def receive_messages(websocket):
30
+ transcript = []
31
+
32
+ async for message in websocket:
33
+ data = msgpack.unpackb(message, raw=False)
34
+ if data["type"] == "Step":
35
+ # This message contains the signal from the semantic VAD, and tells us how
36
+ # much audio the server has already processed. We don't use either here.
37
+ continue
38
+ if data["type"] == "Word":
39
+ print(data["text"], end=" ", flush=True)
40
+ transcript.append(
41
+ {
42
+ "text": data["text"],
43
+ "timestamp": [data["start_time"], data["start_time"]],
44
+ }
45
+ )
46
+ if data["type"] == "EndWord":
47
+ if len(transcript) > 0:
48
+ transcript[-1]["timestamp"][1] = data["stop_time"]
49
+ if data["type"] == "Marker":
50
+ # Received marker, stopping stream
51
+ break
52
+
53
+ return transcript
54
+
55
+
56
+ async def send_messages(websocket, rtf: float):
57
+ audio_data = load_and_process_audio(args.in_file)
58
+
59
+ async def send_audio(audio: np.ndarray):
60
+ await websocket.send(
61
+ msgpack.packb(
62
+ {"type": "Audio", "pcm": [float(x) for x in audio]},
63
+ use_single_float=True,
64
+ )
65
+ )
66
+
67
+ # Start with a second of silence.
68
+ # This is needed for the 2.6B model for technical reasons.
69
+ await send_audio([0.0] * SAMPLE_RATE)
70
+
71
+ start_time = time.time()
72
+ for i in range(0, len(audio_data), FRAME_SIZE):
73
+ await send_audio(audio_data[i : i + FRAME_SIZE])
74
+
75
+ expected_send_time = start_time + (i + 1) / SAMPLE_RATE / rtf
76
+ current_time = time.time()
77
+ if current_time < expected_send_time:
78
+ await asyncio.sleep(expected_send_time - current_time)
79
+ else:
80
+ await asyncio.sleep(0.001)
81
+
82
+ for _ in range(5):
83
+ await send_audio([0.0] * SAMPLE_RATE)
84
+
85
+ # Send a marker to indicate the end of the stream.
86
+ await websocket.send(
87
+ msgpack.packb({"type": "Marker", "id": 0}, use_single_float=True)
88
+ )
89
+
90
+ # We'll get back the marker once the corresponding audio has been transcribed,
91
+ # accounting for the delay of the model. That's why we need to send some silence
92
+ # after the marker, because the model will not return the marker immediately.
93
+ for _ in range(35):
94
+ await send_audio([0.0] * SAMPLE_RATE)
95
+
96
+
97
+ async def stream_audio(url: str, api_key: str, rtf: float):
98
+ """Stream audio data to a WebSocket server."""
99
+ headers = {"kyutai-api-key": api_key}
100
+
101
+ # Instead of using the header, you can authenticate by adding `?auth_id={api_key}` to the URL
102
+ async with websockets.connect(url, additional_headers=headers) as websocket:
103
+ send_task = asyncio.create_task(send_messages(websocket, rtf))
104
+ receive_task = asyncio.create_task(receive_messages(websocket))
105
+ _, transcript = await asyncio.gather(send_task, receive_task)
106
+
107
+ return transcript
108
+
109
+
110
+ if __name__ == "__main__":
111
+ parser = argparse.ArgumentParser()
112
+ parser.add_argument("in_file")
113
+ parser.add_argument(
114
+ "--url",
115
+ help="The url of the server to which to send the audio",
116
+ default="ws://127.0.0.1:8080",
117
+ )
118
+ parser.add_argument("--api-key", default="public_token")
119
+ parser.add_argument(
120
+ "--rtf",
121
+ type=float,
122
+ default=1.01,
123
+ help="The real-time factor of how fast to feed in the audio.",
124
+ )
125
+ args = parser.parse_args()
126
+
127
+ url = f"{args.url}/api/asr-streaming"
128
+ transcript = asyncio.run(stream_audio(url, args.api_key, args.rtf))
129
+
130
+ print()
131
+ print()
132
+ for word in transcript:
133
+ print(
134
+ f"{word['timestamp'][0]:7.2f} -{word['timestamp'][1]:7.2f} {word['text']}"
135
+ )
scripts/stt_from_file_with_prompt_pytorch.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """An example script that illustrates how one can prompt Kyutai STT models."""
2
+
3
+ import argparse
4
+ import itertools
5
+ import math
6
+ from collections import deque
7
+
8
+ import julius
9
+ import moshi.models
10
+ import sphn
11
+ import torch
12
+ import tqdm
13
+
14
+
15
+ class PromptHook:
16
+ def __init__(self, tokenizer, prefix, padding_tokens=(0, 3)):
17
+ self.tokenizer = tokenizer
18
+ self.prefix_enforce = deque(self.tokenizer.encode(prefix))
19
+ self.padding_tokens = padding_tokens
20
+
21
+ def on_token(self, token):
22
+ if not self.prefix_enforce:
23
+ return
24
+
25
+ token = token.item()
26
+
27
+ if token in self.padding_tokens:
28
+ pass
29
+ elif token == self.prefix_enforce[0]:
30
+ self.prefix_enforce.popleft()
31
+ else:
32
+ assert False
33
+
34
+ def on_logits(self, logits):
35
+ if not self.prefix_enforce:
36
+ return
37
+
38
+ mask = torch.zeros_like(logits, dtype=torch.bool)
39
+ for t in self.padding_tokens:
40
+ mask[..., t] = True
41
+ mask[..., self.prefix_enforce[0]] = True
42
+
43
+ logits[:] = torch.where(mask, logits, float("-inf"))
44
+
45
+
46
+ def main(args):
47
+ info = moshi.models.loaders.CheckpointInfo.from_hf_repo(
48
+ args.hf_repo,
49
+ moshi_weights=args.moshi_weight,
50
+ mimi_weights=args.mimi_weight,
51
+ tokenizer=args.tokenizer,
52
+ config_path=args.config_path,
53
+ )
54
+
55
+ mimi = info.get_mimi(device=args.device)
56
+ tokenizer = info.get_text_tokenizer()
57
+ lm = info.get_moshi(
58
+ device=args.device,
59
+ dtype=torch.bfloat16,
60
+ )
61
+
62
+ if args.prompt_text:
63
+ prompt_hook = PromptHook(tokenizer, args.prompt_text)
64
+ lm_gen = moshi.models.LMGen(
65
+ lm,
66
+ temp=0,
67
+ temp_text=0.0,
68
+ on_text_hook=prompt_hook.on_token,
69
+ on_text_logits_hook=prompt_hook.on_logits,
70
+ )
71
+ else:
72
+ lm_gen = moshi.models.LMGen(lm, temp=0, temp_text=0.0)
73
+
74
+ audio_silence_prefix_seconds = info.stt_config.get(
75
+ "audio_silence_prefix_seconds", 1.0
76
+ )
77
+ audio_delay_seconds = info.stt_config.get("audio_delay_seconds", 5.0)
78
+ padding_token_id = info.raw_config.get("text_padding_token_id", 3)
79
+
80
+ def _load_and_process(path):
81
+ audio, input_sample_rate = sphn.read(path)
82
+ audio = torch.from_numpy(audio).to(args.device).mean(axis=0, keepdim=True)
83
+ audio = julius.resample_frac(audio, input_sample_rate, mimi.sample_rate)
84
+ if audio.shape[-1] % mimi.frame_size != 0:
85
+ to_pad = mimi.frame_size - audio.shape[-1] % mimi.frame_size
86
+ audio = torch.nn.functional.pad(audio, (0, to_pad))
87
+ return audio
88
+
89
+ n_prefix_chunks = math.ceil(audio_silence_prefix_seconds * mimi.frame_rate)
90
+ n_suffix_chunks = math.ceil(audio_delay_seconds * mimi.frame_rate)
91
+ silence_chunk = torch.zeros(
92
+ (1, 1, mimi.frame_size), dtype=torch.float32, device=args.device
93
+ )
94
+
95
+ audio = _load_and_process(args.file)
96
+ if args.prompt_file:
97
+ audio_prompt = _load_and_process(args.prompt_file)
98
+ else:
99
+ audio_prompt = None
100
+
101
+ chain = [itertools.repeat(silence_chunk, n_prefix_chunks)]
102
+
103
+ if audio_prompt is not None:
104
+ chain.append(torch.split(audio_prompt[:, None, :], mimi.frame_size, dim=-1))
105
+ # adding a bit (0.8s) of silence to separate prompt and the actual audio
106
+ chain.append(itertools.repeat(silence_chunk, 10))
107
+
108
+ chain += [
109
+ torch.split(audio[:, None, :], mimi.frame_size, dim=-1),
110
+ itertools.repeat(silence_chunk, n_suffix_chunks),
111
+ ]
112
+
113
+ chunks = itertools.chain(*chain)
114
+
115
+ text_tokens_accum = []
116
+ with mimi.streaming(1), lm_gen.streaming(1):
117
+ for audio_chunk in tqdm.tqdm(chunks):
118
+ audio_tokens = mimi.encode(audio_chunk)
119
+ text_tokens = lm_gen.step(audio_tokens)
120
+ if text_tokens is not None:
121
+ text_tokens_accum.append(text_tokens)
122
+
123
+ utterance_tokens = torch.concat(text_tokens_accum, dim=-1)
124
+ text_tokens = utterance_tokens.cpu().view(-1)
125
+
126
+ # if we have an audio prompt and we don't want to have it in the transcript,
127
+ # we should cut the corresponding number of frames from the output tokens.
128
+ # However, there is also some amount of padding that happens before it
129
+ # due to silence_prefix and audio_delay. Normally it is ignored in detokenization,
130
+ # but now we should account for it to find the position of the prompt transcript.
131
+ if args.cut_prompt_transcript and audio_prompt is not None:
132
+ prompt_frames = audio_prompt.shape[1] // mimi.frame_size
133
+ no_prompt_offset_seconds = audio_delay_seconds + audio_silence_prefix_seconds
134
+ no_prompt_offset = int(no_prompt_offset_seconds * mimi.frame_rate)
135
+ text_tokens = text_tokens[prompt_frames + no_prompt_offset :]
136
+
137
+ text = tokenizer.decode(
138
+ text_tokens[text_tokens > padding_token_id].numpy().tolist()
139
+ )
140
+
141
+ print(text)
142
+
143
+
144
+ if __name__ == "__main__":
145
+ parser = argparse.ArgumentParser(description="Example streaming STT w/ a prompt.")
146
+ parser.add_argument(
147
+ "--file",
148
+ required=True,
149
+ help="File to transcribe.",
150
+ )
151
+ parser.add_argument(
152
+ "--prompt_file",
153
+ required=False,
154
+ help="Audio of the prompt.",
155
+ )
156
+ parser.add_argument(
157
+ "--prompt_text",
158
+ required=False,
159
+ help="Text of the prompt.",
160
+ )
161
+ parser.add_argument(
162
+ "--cut-prompt-transcript",
163
+ action="store_true",
164
+ help="Cut the prompt from the output transcript",
165
+ )
166
+ parser.add_argument(
167
+ "--hf-repo", type=str, help="HF repo to load the STT model from. "
168
+ )
169
+ parser.add_argument("--tokenizer", type=str, help="Path to a local tokenizer file.")
170
+ parser.add_argument(
171
+ "--moshi-weight", type=str, help="Path to a local checkpoint file."
172
+ )
173
+ parser.add_argument(
174
+ "--mimi-weight", type=str, help="Path to a local checkpoint file for Mimi."
175
+ )
176
+ parser.add_argument(
177
+ "--config-path", type=str, help="Path to a local config file.", default=None
178
+ )
179
+ parser.add_argument(
180
+ "--device",
181
+ type=str,
182
+ default="cuda",
183
+ help="Device on which to run, defaults to 'cuda'.",
184
+ )
185
+ args = parser.parse_args()
186
+
187
+ main(args)
scripts/stt_from_mic_mlx.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.12"
3
+ # dependencies = [
4
+ # "huggingface_hub",
5
+ # "moshi_mlx==0.2.12",
6
+ # "numpy",
7
+ # "rustymimi",
8
+ # "sentencepiece",
9
+ # "sounddevice",
10
+ # ]
11
+ # ///
12
+
13
+ import argparse
14
+ import json
15
+ import queue
16
+
17
+ import mlx.core as mx
18
+ import mlx.nn as nn
19
+ import rustymimi
20
+ import sentencepiece
21
+ import sounddevice as sd
22
+ from huggingface_hub import hf_hub_download
23
+ from moshi_mlx import models, utils
24
+
25
+ if __name__ == "__main__":
26
+ parser = argparse.ArgumentParser()
27
+ parser.add_argument("--max-steps", default=4096)
28
+ parser.add_argument("--hf-repo")
29
+ parser.add_argument(
30
+ "--vad", action="store_true", help="Enable VAD (Voice Activity Detection)."
31
+ )
32
+ args = parser.parse_args()
33
+
34
+ if args.hf_repo is None:
35
+ if args.vad:
36
+ args.hf_repo = "kyutai/stt-1b-en_fr-candle"
37
+ else:
38
+ args.hf_repo = "kyutai/stt-1b-en_fr-mlx"
39
+ lm_config = hf_hub_download(args.hf_repo, "config.json")
40
+ with open(lm_config, "r") as fobj:
41
+ lm_config = json.load(fobj)
42
+ mimi_weights = hf_hub_download(args.hf_repo, lm_config["mimi_name"])
43
+ moshi_name = lm_config.get("moshi_name", "model.safetensors")
44
+ moshi_weights = hf_hub_download(args.hf_repo, moshi_name)
45
+ tokenizer = hf_hub_download(args.hf_repo, lm_config["tokenizer_name"])
46
+
47
+ lm_config = models.LmConfig.from_config_dict(lm_config)
48
+ model = models.Lm(lm_config)
49
+ model.set_dtype(mx.bfloat16)
50
+ if moshi_weights.endswith(".q4.safetensors"):
51
+ nn.quantize(model, bits=4, group_size=32)
52
+ elif moshi_weights.endswith(".q8.safetensors"):
53
+ nn.quantize(model, bits=8, group_size=64)
54
+
55
+ print(f"loading model weights from {moshi_weights}")
56
+ if args.hf_repo.endswith("-candle"):
57
+ model.load_pytorch_weights(moshi_weights, lm_config, strict=True)
58
+ else:
59
+ model.load_weights(moshi_weights, strict=True)
60
+
61
+ print(f"loading the text tokenizer from {tokenizer}")
62
+ text_tokenizer = sentencepiece.SentencePieceProcessor(tokenizer) # type: ignore
63
+
64
+ print(f"loading the audio tokenizer {mimi_weights}")
65
+ generated_codebooks = lm_config.generated_codebooks
66
+ other_codebooks = lm_config.other_codebooks
67
+ mimi_codebooks = max(generated_codebooks, other_codebooks)
68
+ audio_tokenizer = rustymimi.Tokenizer(mimi_weights, num_codebooks=mimi_codebooks) # type: ignore
69
+ print("warming up the model")
70
+ model.warmup()
71
+ gen = models.LmGen(
72
+ model=model,
73
+ max_steps=args.max_steps,
74
+ text_sampler=utils.Sampler(top_k=25, temp=0),
75
+ audio_sampler=utils.Sampler(top_k=250, temp=0.8),
76
+ check=False,
77
+ )
78
+
79
+ block_queue = queue.Queue()
80
+
81
+ def audio_callback(indata, _frames, _time, _status):
82
+ block_queue.put(indata.copy())
83
+
84
+ print("recording audio from microphone, speak to get your words transcribed")
85
+ last_print_was_vad = False
86
+ with sd.InputStream(
87
+ channels=1,
88
+ dtype="float32",
89
+ samplerate=24000,
90
+ blocksize=1920,
91
+ callback=audio_callback,
92
+ ):
93
+ while True:
94
+ block = block_queue.get()
95
+ block = block[None, :, 0]
96
+ other_audio_tokens = audio_tokenizer.encode_step(block[None, 0:1])
97
+ other_audio_tokens = mx.array(other_audio_tokens).transpose(0, 2, 1)[
98
+ :, :, :other_codebooks
99
+ ]
100
+ if args.vad:
101
+ text_token, vad_heads = gen.step_with_extra_heads(other_audio_tokens[0])
102
+ if vad_heads:
103
+ pr_vad = vad_heads[2][0, 0, 0].item()
104
+ if pr_vad > 0.5 and not last_print_was_vad:
105
+ print(" [end of turn detected]")
106
+ last_print_was_vad = True
107
+ else:
108
+ text_token = gen.step(other_audio_tokens[0])
109
+ text_token = text_token[0].item()
110
+ audio_tokens = gen.last_audio_tokens()
111
+ _text = None
112
+ if text_token not in (0, 3):
113
+ _text = text_tokenizer.id_to_piece(text_token) # type: ignore
114
+ _text = _text.replace("▁", " ")
115
+ print(_text, end="", flush=True)
116
+ last_print_was_vad = False
scripts/stt_from_mic_rust_server.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.12"
3
+ # dependencies = [
4
+ # "msgpack",
5
+ # "numpy",
6
+ # "sounddevice",
7
+ # "websockets",
8
+ # ]
9
+ # ///
10
+ import argparse
11
+ import asyncio
12
+ import signal
13
+
14
+ import msgpack
15
+ import numpy as np
16
+ import sounddevice as sd
17
+ import websockets
18
+
19
+ SAMPLE_RATE = 24000
20
+
21
+ # The VAD has several prediction heads, each of which tries to determine whether there
22
+ # has been a pause of a given length. The lengths are 0.5, 1.0, 2.0, and 3.0 seconds.
23
+ # Lower indices predict pauses more aggressively. In Unmute, we use 2.0 seconds = index 2.
24
+ PAUSE_PREDICTION_HEAD_INDEX = 2
25
+
26
+
27
+ async def receive_messages(websocket, show_vad: bool = False):
28
+ """Receive and process messages from the WebSocket server."""
29
+ try:
30
+ speech_started = False
31
+ async for message in websocket:
32
+ data = msgpack.unpackb(message, raw=False)
33
+
34
+ # The Step message only gets sent if the model has semantic VAD available
35
+ if data["type"] == "Step" and show_vad:
36
+ pause_prediction = data["prs"][PAUSE_PREDICTION_HEAD_INDEX]
37
+ if pause_prediction > 0.5 and speech_started:
38
+ print("| ", end="", flush=True)
39
+ speech_started = False
40
+
41
+ elif data["type"] == "Word":
42
+ print(data["text"], end=" ", flush=True)
43
+ speech_started = True
44
+ except websockets.ConnectionClosed:
45
+ print("Connection closed while receiving messages.")
46
+
47
+
48
+ async def send_messages(websocket, audio_queue):
49
+ """Send audio data from microphone to WebSocket server."""
50
+ try:
51
+ # Start by draining the queue to avoid lags
52
+ while not audio_queue.empty():
53
+ await audio_queue.get()
54
+
55
+ print("Starting the transcription")
56
+
57
+ while True:
58
+ audio_data = await audio_queue.get()
59
+ chunk = {"type": "Audio", "pcm": [float(x) for x in audio_data]}
60
+ msg = msgpack.packb(chunk, use_bin_type=True, use_single_float=True)
61
+ await websocket.send(msg)
62
+
63
+ except websockets.ConnectionClosed:
64
+ print("Connection closed while sending messages.")
65
+
66
+
67
+ async def stream_audio(url: str, api_key: str, show_vad: bool):
68
+ """Stream audio data to a WebSocket server."""
69
+ print("Starting microphone recording...")
70
+ print("Press Ctrl+C to stop recording")
71
+ audio_queue = asyncio.Queue()
72
+
73
+ loop = asyncio.get_event_loop()
74
+
75
+ def audio_callback(indata, frames, time, status):
76
+ loop.call_soon_threadsafe(
77
+ audio_queue.put_nowait, indata[:, 0].astype(np.float32).copy()
78
+ )
79
+
80
+ # Start audio stream
81
+ with sd.InputStream(
82
+ samplerate=SAMPLE_RATE,
83
+ channels=1,
84
+ dtype="float32",
85
+ callback=audio_callback,
86
+ blocksize=1920, # 80ms blocks
87
+ ):
88
+ headers = {"kyutai-api-key": api_key}
89
+ # Instead of using the header, you can authenticate by adding `?auth_id={api_key}` to the URL
90
+ async with websockets.connect(url, additional_headers=headers) as websocket:
91
+ send_task = asyncio.create_task(send_messages(websocket, audio_queue))
92
+ receive_task = asyncio.create_task(
93
+ receive_messages(websocket, show_vad=show_vad)
94
+ )
95
+ await asyncio.gather(send_task, receive_task)
96
+
97
+
98
+ if __name__ == "__main__":
99
+ parser = argparse.ArgumentParser(description="Real-time microphone transcription")
100
+ parser.add_argument(
101
+ "--url",
102
+ help="The URL of the server to which to send the audio",
103
+ default="ws://127.0.0.1:8080",
104
+ )
105
+ parser.add_argument("--api-key", default="public_token")
106
+ parser.add_argument(
107
+ "--list-devices", action="store_true", help="List available audio devices"
108
+ )
109
+ parser.add_argument(
110
+ "--device", type=int, help="Input device ID (use --list-devices to see options)"
111
+ )
112
+ parser.add_argument(
113
+ "--show-vad",
114
+ action="store_true",
115
+ help="Visualize the predictions of the semantic voice activity detector with a '|' symbol",
116
+ )
117
+
118
+ args = parser.parse_args()
119
+
120
+ def handle_sigint(signum, frame):
121
+ print("Interrupted by user") # Don't complain about KeyboardInterrupt
122
+ exit(0)
123
+
124
+ signal.signal(signal.SIGINT, handle_sigint)
125
+
126
+ if args.list_devices:
127
+ print("Available audio devices:")
128
+ print(sd.query_devices())
129
+ exit(0)
130
+
131
+ if args.device is not None:
132
+ sd.default.device[0] = args.device # Set input device
133
+
134
+ url = f"{args.url}/api/asr-streaming"
135
+ asyncio.run(stream_audio(url, args.api_key, args.show_vad))
scripts/tts_mlx.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.12"
3
+ # dependencies = [
4
+ # "huggingface_hub",
5
+ # "moshi_mlx==0.2.12",
6
+ # "numpy",
7
+ # "sounddevice",
8
+ # ]
9
+ # ///
10
+
11
+ import argparse
12
+ import json
13
+ import queue
14
+ import sys
15
+ import time
16
+
17
+ import mlx.core as mx
18
+ import mlx.nn as nn
19
+ import numpy as np
20
+ import sentencepiece
21
+ import sounddevice as sd
22
+ import sphn
23
+ from moshi_mlx import models
24
+ from moshi_mlx.client_utils import make_log
25
+ from moshi_mlx.models.tts import (
26
+ DEFAULT_DSM_TTS_REPO,
27
+ DEFAULT_DSM_TTS_VOICE_REPO,
28
+ TTSModel,
29
+ )
30
+ from moshi_mlx.utils.loaders import hf_get
31
+
32
+
33
+ def log(level: str, msg: str):
34
+ print(make_log(level, msg))
35
+
36
+
37
+ def main():
38
+ parser = argparse.ArgumentParser(
39
+ description="Run Kyutai TTS using the MLX implementation"
40
+ )
41
+ parser.add_argument("inp", type=str, help="Input file, use - for stdin")
42
+ parser.add_argument(
43
+ "out", type=str, help="Output file to generate, use - for playing the audio"
44
+ )
45
+ parser.add_argument(
46
+ "--hf-repo",
47
+ type=str,
48
+ default=DEFAULT_DSM_TTS_REPO,
49
+ help="HF repo in which to look for the pretrained models.",
50
+ )
51
+ parser.add_argument(
52
+ "--voice-repo",
53
+ default=DEFAULT_DSM_TTS_VOICE_REPO,
54
+ help="HF repo in which to look for pre-computed voice embeddings.",
55
+ )
56
+ parser.add_argument(
57
+ "--voice", default="expresso/ex03-ex01_happy_001_channel1_334s.wav"
58
+ )
59
+ parser.add_argument(
60
+ "--quantize",
61
+ type=int,
62
+ help="The quantization to be applied, e.g. 8 for 8 bits.",
63
+ )
64
+ args = parser.parse_args()
65
+
66
+ mx.random.seed(299792458)
67
+
68
+ log("info", "retrieving checkpoints")
69
+
70
+ raw_config = hf_get("config.json", args.hf_repo)
71
+ with open(hf_get(raw_config), "r") as fobj:
72
+ raw_config = json.load(fobj)
73
+
74
+ mimi_weights = hf_get(raw_config["mimi_name"], args.hf_repo)
75
+ moshi_name = raw_config.get("moshi_name", "model.safetensors")
76
+ moshi_weights = hf_get(moshi_name, args.hf_repo)
77
+ tokenizer = hf_get(raw_config["tokenizer_name"], args.hf_repo)
78
+ lm_config = models.LmConfig.from_config_dict(raw_config)
79
+ # There is a bug in moshi_mlx <= 0.3.0 handling of the ring kv cache.
80
+ # The following line gets around it for now.
81
+ lm_config.transformer.max_seq_len = lm_config.transformer.context
82
+ model = models.Lm(lm_config)
83
+ model.set_dtype(mx.bfloat16)
84
+
85
+ log("info", f"loading model weights from {moshi_weights}")
86
+ model.load_pytorch_weights(str(moshi_weights), lm_config, strict=True)
87
+
88
+ if args.quantize is not None:
89
+ log("info", f"quantizing model to {args.quantize} bits")
90
+ nn.quantize(model.depformer, bits=args.quantize)
91
+ for layer in model.transformer.layers:
92
+ nn.quantize(layer.self_attn, bits=args.quantize)
93
+ nn.quantize(layer.gating, bits=args.quantize)
94
+
95
+ log("info", f"loading the text tokenizer from {tokenizer}")
96
+ text_tokenizer = sentencepiece.SentencePieceProcessor(str(tokenizer)) # type: ignore
97
+
98
+ log("info", f"loading the audio tokenizer {mimi_weights}")
99
+ generated_codebooks = lm_config.generated_codebooks
100
+ audio_tokenizer = models.mimi.Mimi(models.mimi_202407(generated_codebooks))
101
+ audio_tokenizer.load_pytorch_weights(str(mimi_weights), strict=True)
102
+
103
+ cfg_coef_conditioning = None
104
+ tts_model = TTSModel(
105
+ model,
106
+ audio_tokenizer,
107
+ text_tokenizer,
108
+ voice_repo=args.voice_repo,
109
+ temp=0.6,
110
+ cfg_coef=1,
111
+ max_padding=8,
112
+ initial_padding=2,
113
+ final_padding=2,
114
+ padding_bonus=0,
115
+ raw_config=raw_config,
116
+ )
117
+ if tts_model.valid_cfg_conditionings:
118
+ # Model was trained with CFG distillation.
119
+ cfg_coef_conditioning = tts_model.cfg_coef
120
+ tts_model.cfg_coef = 1.0
121
+ cfg_is_no_text = False
122
+ cfg_is_no_prefix = False
123
+ else:
124
+ cfg_is_no_text = True
125
+ cfg_is_no_prefix = True
126
+ mimi = tts_model.mimi
127
+
128
+ log("info", f"reading input from {args.inp}")
129
+ if args.inp == "-":
130
+ if sys.stdin.isatty(): # Interactive
131
+ print("Enter text to synthesize (Ctrl+D to end input):")
132
+ text_to_tts = sys.stdin.read().strip()
133
+ else:
134
+ with open(args.inp, "r", encoding="utf-8") as fobj:
135
+ text_to_tts = fobj.read().strip()
136
+
137
+ all_entries = [tts_model.prepare_script([text_to_tts])]
138
+ if tts_model.multi_speaker:
139
+ voices = [tts_model.get_voice_path(args.voice)]
140
+ else:
141
+ voices = []
142
+ all_attributes = [
143
+ tts_model.make_condition_attributes(voices, cfg_coef_conditioning)
144
+ ]
145
+
146
+ wav_frames = queue.Queue()
147
+ _frames_cnt = 0
148
+
149
+ def _on_frame(frame):
150
+ nonlocal _frames_cnt
151
+ if (frame == -1).any():
152
+ return
153
+ _pcm = tts_model.mimi.decode_step(frame[:, :, None])
154
+ _pcm = np.array(mx.clip(_pcm[0, 0], -1, 1))
155
+ wav_frames.put_nowait(_pcm)
156
+ _frames_cnt += 1
157
+ print(f"generated {_frames_cnt / 12.5:.2f}s", end="\r", flush=True)
158
+
159
+ def run():
160
+ log("info", "starting the inference loop")
161
+ begin = time.time()
162
+ result = tts_model.generate(
163
+ all_entries,
164
+ all_attributes,
165
+ cfg_is_no_prefix=cfg_is_no_prefix,
166
+ cfg_is_no_text=cfg_is_no_text,
167
+ on_frame=_on_frame,
168
+ )
169
+ frames = mx.concat(result.frames, axis=-1)
170
+ total_duration = frames.shape[0] * frames.shape[-1] / mimi.frame_rate
171
+ time_taken = time.time() - begin
172
+ total_speed = total_duration / time_taken
173
+ log("info", f"[LM] took {time_taken:.2f}s, total speed {total_speed:.2f}x")
174
+ return result
175
+
176
+ if args.out == "-":
177
+
178
+ def audio_callback(outdata, _a, _b, _c):
179
+ try:
180
+ pcm_data = wav_frames.get(block=False)
181
+ outdata[:, 0] = pcm_data
182
+ except queue.Empty:
183
+ outdata[:] = 0
184
+
185
+ with sd.OutputStream(
186
+ samplerate=mimi.sample_rate,
187
+ blocksize=1920,
188
+ channels=1,
189
+ callback=audio_callback,
190
+ ):
191
+ run()
192
+ time.sleep(3)
193
+ while True:
194
+ if wav_frames.qsize() == 0:
195
+ break
196
+ time.sleep(1)
197
+ else:
198
+ run()
199
+ frames = []
200
+ while True:
201
+ try:
202
+ frames.append(wav_frames.get_nowait())
203
+ except queue.Empty:
204
+ break
205
+ wav = np.concat(frames, -1)
206
+ sphn.write_wav(args.out, wav, mimi.sample_rate)
207
+
208
+
209
+ if __name__ == "__main__":
210
+ main()
scripts/tts_mlx_streaming.py ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.12"
3
+ # dependencies = [
4
+ # "huggingface_hub",
5
+ # "moshi_mlx==0.2.12",
6
+ # "numpy",
7
+ # "sounddevice",
8
+ # ]
9
+ # ///
10
+
11
+ import argparse
12
+ from dataclasses import dataclass
13
+ import json
14
+ import queue
15
+ import sys
16
+ import time
17
+
18
+ import mlx.core as mx
19
+ import mlx.nn as nn
20
+ import numpy as np
21
+ import sentencepiece
22
+ import sounddevice as sd
23
+ import sphn
24
+ import typing as tp
25
+ from moshi_mlx import models
26
+ from moshi_mlx.models.generate import LmGen
27
+ from moshi_mlx.client_utils import make_log
28
+ from moshi_mlx.modules.conditioner import (
29
+ ConditionAttributes,
30
+ ConditionTensor,
31
+ dropout_all_conditions,
32
+ )
33
+ from moshi_mlx.utils.sampling import Sampler
34
+ from moshi_mlx.models.tts import (
35
+ Entry,
36
+ DEFAULT_DSM_TTS_REPO,
37
+ DEFAULT_DSM_TTS_VOICE_REPO,
38
+ TTSModel,
39
+ script_to_entries,
40
+ )
41
+ from moshi_mlx.utils.loaders import hf_get
42
+
43
+
44
+ def prepare_script(model: TTSModel, script: str, first_turn: bool) -> list[Entry]:
45
+ multi_speaker = first_turn and model.multi_speaker
46
+ return script_to_entries(
47
+ model.tokenizer,
48
+ model.machine.token_ids,
49
+ model.mimi.frame_rate,
50
+ [script],
51
+ multi_speaker=multi_speaker,
52
+ padding_between=1,
53
+ )
54
+
55
+
56
+ def _make_null(
57
+ all_attributes: tp.Sequence[ConditionAttributes],
58
+ ) -> list[ConditionAttributes]:
59
+ # When using CFG, returns the null conditions.
60
+ return dropout_all_conditions(all_attributes)
61
+
62
+
63
+ @dataclass
64
+ class TTSGen:
65
+ tts_model: TTSModel
66
+ attributes: tp.Sequence[ConditionAttributes]
67
+ on_frame: tp.Optional[tp.Callable[[mx.array], None]] = None
68
+
69
+ def __post_init__(self):
70
+ tts_model = self.tts_model
71
+ attributes = self.attributes
72
+ self.offset = 0
73
+ self.state = self.tts_model.machine.new_state([])
74
+
75
+ if tts_model.cfg_coef != 1.0:
76
+ if tts_model.valid_cfg_conditionings:
77
+ raise ValueError(
78
+ "This model does not support direct CFG, but was trained with "
79
+ "CFG distillation. Pass instead `cfg_coef` to `make_condition_attributes`."
80
+ )
81
+ nulled = _make_null(attributes)
82
+ attributes = list(attributes) + nulled
83
+
84
+ assert tts_model.lm.condition_provider is not None
85
+ self.ct = None
86
+ self.cross_attention_src = None
87
+ for _attr in attributes:
88
+ for _key, _value in _attr.text.items():
89
+ _ct = tts_model.lm.condition_provider.condition_tensor(_key, _value)
90
+ if self.ct is None:
91
+ self.ct = _ct
92
+ else:
93
+ self.ct = ConditionTensor(self.ct.tensor + _ct.tensor)
94
+ for _key, _value in _attr.tensor.items():
95
+ _conditioner = tts_model.lm.condition_provider.conditioners[_key]
96
+ _ca_src = _conditioner.condition(_value)
97
+ if self.cross_attention_src is None:
98
+ self.cross_attention_src = _ca_src
99
+ else:
100
+ raise ValueError("multiple cross-attention conditioners")
101
+
102
+ def _on_audio_hook(audio_tokens):
103
+ delays = tts_model.lm.delays
104
+ for q in range(audio_tokens.shape[0]):
105
+ delay = delays[q]
106
+ if self.offset < delay + tts_model.delay_steps:
107
+ audio_tokens[q] = tts_model.machine.token_ids.zero
108
+
109
+ def _on_text_hook(text_tokens):
110
+ tokens = text_tokens.tolist()
111
+ out_tokens = []
112
+ for token in tokens:
113
+ out_token, _ = tts_model.machine.process(self.offset, self.state, token)
114
+ out_tokens.append(out_token)
115
+ text_tokens[:] = mx.array(out_tokens, dtype=mx.int64)
116
+
117
+ self.lm_gen = LmGen(
118
+ tts_model.lm,
119
+ max_steps=tts_model.max_gen_length,
120
+ text_sampler=Sampler(temp=tts_model.temp),
121
+ audio_sampler=Sampler(temp=tts_model.temp),
122
+ cfg_coef=tts_model.cfg_coef,
123
+ on_text_hook=_on_text_hook,
124
+ on_audio_hook=_on_audio_hook,
125
+ # TODO(laurent):
126
+ # cfg_is_masked_until=cfg_is_masked_until,
127
+ # cfg_is_no_text=cfg_is_no_text,
128
+ )
129
+
130
+ def process_last(self):
131
+ while len(self.state.entries) > 0 or self.state.end_step is not None:
132
+ self._step()
133
+ additional_steps = (
134
+ self.tts_model.delay_steps + max(self.tts_model.lm.delays) + 8
135
+ )
136
+ for _ in range(additional_steps):
137
+ self._step()
138
+
139
+ def process(self):
140
+ while len(self.state.entries) > self.tts_model.machine.second_stream_ahead:
141
+ self._step()
142
+
143
+ def _step(self):
144
+ missing = self.tts_model.lm.n_q - self.tts_model.lm.dep_q
145
+ missing = self.tts_model.lm.n_q - self.tts_model.lm.dep_q
146
+ input_tokens = (
147
+ mx.ones((1, missing), dtype=mx.int64)
148
+ * self.tts_model.machine.token_ids.zero
149
+ )
150
+ self.lm_gen.step(
151
+ input_tokens, ct=self.ct, cross_attention_src=self.cross_attention_src
152
+ )
153
+ frame = self.lm_gen.last_audio_tokens()
154
+ self.offset += 1
155
+ if frame is not None:
156
+ if self.on_frame is not None:
157
+ self.on_frame(frame)
158
+
159
+ def append_entry(self, entry):
160
+ self.state.entries.append(entry)
161
+
162
+
163
+ def log(level: str, msg: str):
164
+ print(make_log(level, msg))
165
+
166
+
167
+ def main():
168
+ parser = argparse.ArgumentParser(
169
+ description="Run Kyutai TTS using the MLX implementation"
170
+ )
171
+ parser.add_argument(
172
+ "out", type=str, help="Output file to generate, use - for playing the audio"
173
+ )
174
+ parser.add_argument(
175
+ "--hf-repo",
176
+ type=str,
177
+ default=DEFAULT_DSM_TTS_REPO,
178
+ help="HF repo in which to look for the pretrained models.",
179
+ )
180
+ parser.add_argument(
181
+ "--voice-repo",
182
+ default=DEFAULT_DSM_TTS_VOICE_REPO,
183
+ help="HF repo in which to look for pre-computed voice embeddings.",
184
+ )
185
+ parser.add_argument(
186
+ "--voice", default="expresso/ex03-ex01_happy_001_channel1_334s.wav"
187
+ )
188
+ parser.add_argument(
189
+ "--quantize",
190
+ type=int,
191
+ help="The quantization to be applied, e.g. 8 for 8 bits.",
192
+ )
193
+ args = parser.parse_args()
194
+
195
+ mx.random.seed(299792458)
196
+
197
+ log("info", "retrieving checkpoints")
198
+
199
+ raw_config = hf_get("config.json", args.hf_repo)
200
+ with open(hf_get(raw_config), "r") as fobj:
201
+ raw_config = json.load(fobj)
202
+
203
+ mimi_weights = hf_get(raw_config["mimi_name"], args.hf_repo)
204
+ moshi_name = raw_config.get("moshi_name", "model.safetensors")
205
+ moshi_weights = hf_get(moshi_name, args.hf_repo)
206
+ tokenizer = hf_get(raw_config["tokenizer_name"], args.hf_repo)
207
+ lm_config = models.LmConfig.from_config_dict(raw_config)
208
+ # There is a bug in moshi_mlx <= 0.3.0 handling of the ring kv cache.
209
+ # The following line gets around it for now.
210
+ lm_config.transformer.max_seq_len = lm_config.transformer.context
211
+ model = models.Lm(lm_config)
212
+ model.set_dtype(mx.bfloat16)
213
+
214
+ log("info", f"loading model weights from {moshi_weights}")
215
+ model.load_pytorch_weights(str(moshi_weights), lm_config, strict=True)
216
+
217
+ if args.quantize is not None:
218
+ log("info", f"quantizing model to {args.quantize} bits")
219
+ nn.quantize(model.depformer, bits=args.quantize)
220
+ for layer in model.transformer.layers:
221
+ nn.quantize(layer.self_attn, bits=args.quantize)
222
+ nn.quantize(layer.gating, bits=args.quantize)
223
+
224
+ log("info", f"loading the text tokenizer from {tokenizer}")
225
+ text_tokenizer = sentencepiece.SentencePieceProcessor(str(tokenizer)) # type: ignore
226
+
227
+ log("info", f"loading the audio tokenizer {mimi_weights}")
228
+ generated_codebooks = lm_config.generated_codebooks
229
+ audio_tokenizer = models.mimi.Mimi(models.mimi_202407(generated_codebooks))
230
+ audio_tokenizer.load_pytorch_weights(str(mimi_weights), strict=True)
231
+
232
+ cfg_coef_conditioning = None
233
+ tts_model = TTSModel(
234
+ model,
235
+ audio_tokenizer,
236
+ text_tokenizer,
237
+ voice_repo=args.voice_repo,
238
+ temp=0.6,
239
+ cfg_coef=1,
240
+ max_padding=8,
241
+ initial_padding=2,
242
+ final_padding=2,
243
+ padding_bonus=0,
244
+ raw_config=raw_config,
245
+ )
246
+ if tts_model.valid_cfg_conditionings:
247
+ # Model was trained with CFG distillation.
248
+ cfg_coef_conditioning = tts_model.cfg_coef
249
+ tts_model.cfg_coef = 1.0
250
+ mimi = tts_model.mimi
251
+
252
+ log("info", "reading input from stdin")
253
+
254
+ if tts_model.multi_speaker:
255
+ voices = [tts_model.get_voice_path(args.voice)]
256
+ else:
257
+ voices = []
258
+ all_attributes = [
259
+ tts_model.make_condition_attributes(voices, cfg_coef_conditioning)
260
+ ]
261
+
262
+ wav_frames = queue.Queue()
263
+
264
+ def _on_frame(frame):
265
+ if (frame == -1).any():
266
+ return
267
+ _pcm = tts_model.mimi.decode_step(frame[:, :, None])
268
+ _pcm = np.array(mx.clip(_pcm[0, 0], -1, 1))
269
+ wav_frames.put_nowait(_pcm)
270
+
271
+ gen = TTSGen(tts_model, all_attributes, on_frame=_on_frame)
272
+
273
+ def run():
274
+ log("info", "starting the inference loop")
275
+ first_turn = True
276
+ for line in sys.stdin:
277
+ entries = prepare_script(tts_model, line.strip(), first_turn=first_turn)
278
+ first_turn = False
279
+ for entry in entries:
280
+ gen.append_entry(entry)
281
+ gen.process()
282
+ gen.process_last()
283
+
284
+ if args.out == "-":
285
+
286
+ def audio_callback(outdata, _a, _b, _c):
287
+ try:
288
+ pcm_data = wav_frames.get(block=False)
289
+ outdata[:, 0] = pcm_data
290
+ except queue.Empty:
291
+ outdata[:] = 0
292
+
293
+ with sd.OutputStream(
294
+ samplerate=mimi.sample_rate,
295
+ blocksize=1920,
296
+ channels=1,
297
+ callback=audio_callback,
298
+ ):
299
+ run()
300
+ while True:
301
+ if wav_frames.qsize() == 0:
302
+ break
303
+ time.sleep(1)
304
+ else:
305
+ run()
306
+ frames = []
307
+ while True:
308
+ try:
309
+ frames.append(wav_frames.get_nowait())
310
+ except queue.Empty:
311
+ break
312
+ wav = np.concat(frames, -1)
313
+ sphn.write_wav(args.out, wav, mimi.sample_rate)
314
+
315
+
316
+ if __name__ == "__main__":
317
+ main()
scripts/tts_pytorch.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.12"
3
+ # dependencies = [
4
+ # "moshi==0.2.11",
5
+ # "torch",
6
+ # "sphn",
7
+ # "sounddevice",
8
+ # ]
9
+ # ///
10
+ import argparse
11
+ import sys
12
+
13
+ import numpy as np
14
+ import queue
15
+ import sphn
16
+ import time
17
+ import torch
18
+ from moshi.models.loaders import CheckpointInfo
19
+ from moshi.models.tts import DEFAULT_DSM_TTS_REPO, DEFAULT_DSM_TTS_VOICE_REPO, TTSModel
20
+
21
+
22
+ def main():
23
+ parser = argparse.ArgumentParser(
24
+ description="Run Kyutai TTS using the PyTorch implementation"
25
+ )
26
+ parser.add_argument("inp", type=str, help="Input file, use - for stdin.")
27
+ parser.add_argument(
28
+ "out", type=str, help="Output file to generate, use - for playing the audio"
29
+ )
30
+ parser.add_argument(
31
+ "--hf-repo",
32
+ type=str,
33
+ default=DEFAULT_DSM_TTS_REPO,
34
+ help="HF repo in which to look for the pretrained models.",
35
+ )
36
+ parser.add_argument(
37
+ "--voice-repo",
38
+ default=DEFAULT_DSM_TTS_VOICE_REPO,
39
+ help="HF repo in which to look for pre-computed voice embeddings.",
40
+ )
41
+ parser.add_argument(
42
+ "--voice",
43
+ default="expresso/ex03-ex01_happy_001_channel1_334s.wav",
44
+ help="The voice to use, relative to the voice repo root. "
45
+ f"See {DEFAULT_DSM_TTS_VOICE_REPO}",
46
+ )
47
+ parser.add_argument(
48
+ "--device",
49
+ type=str,
50
+ default="cuda",
51
+ help="Device on which to run, defaults to 'cuda'.",
52
+ )
53
+ args = parser.parse_args()
54
+
55
+ print("Loading model...")
56
+ checkpoint_info = CheckpointInfo.from_hf_repo(args.hf_repo)
57
+ tts_model = TTSModel.from_checkpoint_info(
58
+ checkpoint_info, n_q=32, temp=0.6, device=args.device
59
+ )
60
+
61
+ if args.inp == "-":
62
+ if sys.stdin.isatty(): # Interactive
63
+ print("Enter text to synthesize (Ctrl+D to end input):")
64
+ text = sys.stdin.read().strip()
65
+ else:
66
+ with open(args.inp, "r", encoding="utf-8") as fobj:
67
+ text = fobj.read().strip()
68
+
69
+ # If you want to make a dialog, you can pass more than one turn [text_speaker_1, text_speaker_2, text_2_speaker_1, ...]
70
+ entries = tts_model.prepare_script([text], padding_between=1)
71
+ if args.voice.endswith(".safetensors"):
72
+ voice_path = args.voice
73
+ else:
74
+ voice_path = tts_model.get_voice_path(args.voice)
75
+ # CFG coef goes here because the model was trained with CFG distillation,
76
+ # so it's not _actually_ doing CFG at inference time.
77
+ # Also, if you are generating a dialog, you should have two voices in the list.
78
+ condition_attributes = tts_model.make_condition_attributes(
79
+ [voice_path], cfg_coef=2.0
80
+ )
81
+ _frames_cnt = 0
82
+
83
+ if args.out == "-":
84
+ # Stream the audio to the speakers using sounddevice.
85
+ import sounddevice as sd
86
+
87
+ pcms = queue.Queue()
88
+
89
+ def _on_frame(frame):
90
+ nonlocal _frames_cnt
91
+ if (frame != -1).all():
92
+ pcm = tts_model.mimi.decode(frame[:, 1:, :]).cpu().numpy()
93
+ pcms.put_nowait(np.clip(pcm[0, 0], -1, 1))
94
+ _frames_cnt += 1
95
+ print(f"generated {_frames_cnt / 12.5:.2f}s", end="\r", flush=True)
96
+
97
+ def audio_callback(outdata, _a, _b, _c):
98
+ try:
99
+ pcm_data = pcms.get(block=False)
100
+ outdata[:, 0] = pcm_data
101
+ except queue.Empty:
102
+ outdata[:] = 0
103
+
104
+ with sd.OutputStream(
105
+ samplerate=tts_model.mimi.sample_rate,
106
+ blocksize=1920,
107
+ channels=1,
108
+ callback=audio_callback,
109
+ ):
110
+ with tts_model.mimi.streaming(1):
111
+ tts_model.generate(
112
+ [entries], [condition_attributes], on_frame=_on_frame
113
+ )
114
+ time.sleep(3)
115
+ while True:
116
+ if pcms.qsize() == 0:
117
+ break
118
+ time.sleep(1)
119
+ else:
120
+
121
+ def _on_frame(frame):
122
+ nonlocal _frames_cnt
123
+ if (frame != -1).all():
124
+ _frames_cnt += 1
125
+ print(f"generated {_frames_cnt / 12.5:.2f}s", end="\r", flush=True)
126
+
127
+ start_time = time.time()
128
+ result = tts_model.generate(
129
+ [entries], [condition_attributes], on_frame=_on_frame
130
+ )
131
+ print(f"\nTotal time: {time.time() - start_time:.2f}s")
132
+ with tts_model.mimi.streaming(1), torch.no_grad():
133
+ pcms = []
134
+ for frame in result.frames[tts_model.delay_steps :]:
135
+ pcm = tts_model.mimi.decode(frame[:, 1:, :]).cpu().numpy()
136
+ pcms.append(np.clip(pcm[0, 0], -1, 1))
137
+ pcm = np.concatenate(pcms, axis=-1)
138
+ sphn.write_wav(args.out, pcm, tts_model.mimi.sample_rate)
139
+
140
+
141
+ if __name__ == "__main__":
142
+ main()
scripts/tts_pytorch_streaming.py ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.12"
3
+ # dependencies = [
4
+ # "moshi==0.2.11",
5
+ # "torch",
6
+ # "sphn",
7
+ # "sounddevice",
8
+ # ]
9
+ # ///
10
+ import argparse
11
+ from dataclasses import dataclass
12
+ import sys
13
+
14
+ import numpy as np
15
+ import queue
16
+ import sphn
17
+ import time
18
+ import torch
19
+ import typing as tp
20
+ from moshi.models.loaders import CheckpointInfo
21
+ from moshi.conditioners import dropout_all_conditions
22
+ from moshi.models.lm import LMGen
23
+ from moshi.models.tts import (
24
+ Entry,
25
+ DEFAULT_DSM_TTS_REPO,
26
+ DEFAULT_DSM_TTS_VOICE_REPO,
27
+ TTSModel,
28
+ ConditionAttributes,
29
+ script_to_entries,
30
+ )
31
+
32
+
33
+ def prepare_script(model: TTSModel, script: str, first_turn: bool) -> list[Entry]:
34
+ multi_speaker = first_turn and model.multi_speaker
35
+ return script_to_entries(
36
+ model.tokenizer,
37
+ model.machine.token_ids,
38
+ model.mimi.frame_rate,
39
+ [script],
40
+ multi_speaker=multi_speaker,
41
+ padding_between=1,
42
+ )
43
+
44
+
45
+ def _make_null(
46
+ all_attributes: tp.Sequence[ConditionAttributes],
47
+ ) -> list[ConditionAttributes]:
48
+ # When using CFG, returns the null conditions.
49
+ return dropout_all_conditions(all_attributes)
50
+
51
+
52
+ @dataclass
53
+ class TTSGen:
54
+ tts_model: TTSModel
55
+ attributes: tp.Sequence[ConditionAttributes]
56
+ on_frame: tp.Optional[tp.Callable[[torch.Tensor], None]] = None
57
+
58
+ def __post_init__(self):
59
+ tts_model = self.tts_model
60
+ attributes = self.attributes
61
+ self.offset = 0
62
+ self.state = self.tts_model.machine.new_state([])
63
+ if tts_model.cfg_coef != 1.0:
64
+ if tts_model.valid_cfg_conditionings:
65
+ raise ValueError(
66
+ "This model does not support direct CFG, but was trained with "
67
+ "CFG distillation. Pass instead `cfg_coef` to `make_condition_attributes`."
68
+ )
69
+ nulled = _make_null(attributes)
70
+ attributes = list(attributes) + nulled
71
+
72
+ assert tts_model.lm.condition_provider is not None
73
+ prepared = tts_model.lm.condition_provider.prepare(attributes)
74
+ condition_tensors = tts_model.lm.condition_provider(prepared)
75
+
76
+ def _on_text_logits_hook(text_logits):
77
+ if tts_model.padding_bonus:
78
+ text_logits[..., tts_model.machine.token_ids.pad] += (
79
+ tts_model.padding_bonus
80
+ )
81
+ return text_logits
82
+
83
+ def _on_audio_hook(audio_tokens):
84
+ audio_offset = tts_model.lm.audio_offset
85
+ delays = tts_model.lm.delays
86
+ for q in range(audio_tokens.shape[1]):
87
+ delay = delays[q + audio_offset]
88
+ if self.offset < delay + tts_model.delay_steps:
89
+ audio_tokens[:, q] = tts_model.machine.token_ids.zero
90
+
91
+ def _on_text_hook(text_tokens):
92
+ tokens = text_tokens.tolist()
93
+ out_tokens = []
94
+ for token in tokens:
95
+ out_token, _ = tts_model.machine.process(self.offset, self.state, token)
96
+ out_tokens.append(out_token)
97
+ text_tokens[:] = torch.tensor(
98
+ out_tokens, dtype=torch.long, device=text_tokens.device
99
+ )
100
+
101
+ tts_model.lm.dep_q = tts_model.n_q
102
+ self.lm_gen = LMGen(
103
+ tts_model.lm,
104
+ temp=tts_model.temp,
105
+ temp_text=tts_model.temp,
106
+ cfg_coef=tts_model.cfg_coef,
107
+ condition_tensors=condition_tensors,
108
+ on_text_logits_hook=_on_text_logits_hook,
109
+ on_text_hook=_on_text_hook,
110
+ on_audio_hook=_on_audio_hook,
111
+ cfg_is_masked_until=None,
112
+ cfg_is_no_text=True,
113
+ )
114
+ self.lm_gen.streaming_forever(1)
115
+
116
+ def process_last(self):
117
+ while len(self.state.entries) > 0 or self.state.end_step is not None:
118
+ self._step()
119
+ additional_steps = (
120
+ self.tts_model.delay_steps + max(self.tts_model.lm.delays) + 8
121
+ )
122
+ for _ in range(additional_steps):
123
+ self._step()
124
+
125
+ def process(self):
126
+ while len(self.state.entries) > self.tts_model.machine.second_stream_ahead:
127
+ self._step()
128
+
129
+ def _step(self):
130
+ missing = self.tts_model.lm.n_q - self.tts_model.lm.dep_q
131
+ input_tokens = torch.full(
132
+ (1, missing, 1),
133
+ self.tts_model.machine.token_ids.zero,
134
+ dtype=torch.long,
135
+ device=self.tts_model.lm.device,
136
+ )
137
+ frame = self.lm_gen.step(input_tokens)
138
+ self.offset += 1
139
+ if frame is not None:
140
+ if self.on_frame is not None:
141
+ self.on_frame(frame)
142
+
143
+ def append_entry(self, entry):
144
+ self.state.entries.append(entry)
145
+
146
+
147
+ @torch.no_grad()
148
+ def main():
149
+ parser = argparse.ArgumentParser(
150
+ description="Run Kyutai TTS using the PyTorch implementation"
151
+ )
152
+ parser.add_argument(
153
+ "out", type=str, help="Output file to generate, use - for playing the audio"
154
+ )
155
+ parser.add_argument(
156
+ "--hf-repo",
157
+ type=str,
158
+ default=DEFAULT_DSM_TTS_REPO,
159
+ help="HF repo in which to look for the pretrained models.",
160
+ )
161
+ parser.add_argument(
162
+ "--voice-repo",
163
+ default=DEFAULT_DSM_TTS_VOICE_REPO,
164
+ help="HF repo in which to look for pre-computed voice embeddings.",
165
+ )
166
+ parser.add_argument(
167
+ "--voice",
168
+ default="expresso/ex03-ex01_happy_001_channel1_334s.wav",
169
+ help="The voice to use, relative to the voice repo root. "
170
+ f"See {DEFAULT_DSM_TTS_VOICE_REPO}",
171
+ )
172
+ parser.add_argument(
173
+ "--device",
174
+ type=str,
175
+ default="cuda",
176
+ help="Device on which to run, defaults to 'cuda'.",
177
+ )
178
+ args = parser.parse_args()
179
+
180
+ print("Loading model...")
181
+ checkpoint_info = CheckpointInfo.from_hf_repo(args.hf_repo)
182
+ tts_model = TTSModel.from_checkpoint_info(
183
+ checkpoint_info, n_q=32, temp=0.6, device=args.device
184
+ )
185
+
186
+ if args.voice.endswith(".safetensors"):
187
+ voice_path = args.voice
188
+ else:
189
+ voice_path = tts_model.get_voice_path(args.voice)
190
+ # CFG coef goes here because the model was trained with CFG distillation,
191
+ # so it's not _actually_ doing CFG at inference time.
192
+ # Also, if you are generating a dialog, you should have two voices in the list.
193
+ condition_attributes = tts_model.make_condition_attributes(
194
+ [voice_path], cfg_coef=2.0
195
+ )
196
+
197
+ if sys.stdin.isatty(): # Interactive
198
+ print("Enter text to synthesize (Ctrl+D to end input):")
199
+
200
+ if args.out == "-":
201
+ # Stream the audio to the speakers using sounddevice.
202
+ import sounddevice as sd
203
+
204
+ pcms = queue.Queue()
205
+
206
+ def _on_frame(frame):
207
+ if (frame != -1).all():
208
+ pcm = tts_model.mimi.decode(frame[:, 1:, :]).cpu().numpy()
209
+ pcms.put_nowait(np.clip(pcm[0, 0], -1, 1))
210
+
211
+ def audio_callback(outdata, _a, _b, _c):
212
+ try:
213
+ pcm_data = pcms.get(block=False)
214
+ outdata[:, 0] = pcm_data
215
+ except queue.Empty:
216
+ outdata[:] = 0
217
+
218
+ gen = TTSGen(tts_model, [condition_attributes], on_frame=_on_frame)
219
+
220
+ with sd.OutputStream(
221
+ samplerate=tts_model.mimi.sample_rate,
222
+ blocksize=1920,
223
+ channels=1,
224
+ callback=audio_callback,
225
+ ) and tts_model.mimi.streaming(1):
226
+ first_turn = True
227
+ for line in sys.stdin:
228
+ entries = prepare_script(tts_model, line.strip(), first_turn=first_turn)
229
+ first_turn = False
230
+ for entry in entries:
231
+ gen.append_entry(entry)
232
+ gen.process()
233
+ gen.process_last()
234
+ while True:
235
+ if pcms.qsize() == 0:
236
+ break
237
+ time.sleep(1)
238
+ else:
239
+ pcms = []
240
+
241
+ def _on_frame(frame: torch.Tensor):
242
+ if (frame != -1).all():
243
+ pcm = tts_model.mimi.decode(frame[:, 1:, :]).cpu().numpy()
244
+ pcms.append(np.clip(pcm[0, 0]))
245
+
246
+ gen = TTSGen(tts_model, [condition_attributes], on_frame=_on_frame)
247
+ with tts_model.mimi.streaming(1):
248
+ first_turn = True
249
+ for line in sys.stdin:
250
+ entries = prepare_script(tts_model, line.strip(), first_turn=first_turn)
251
+ first_turn = False
252
+ for entry in entries:
253
+ gen.append_entry(entry)
254
+ gen.process()
255
+ gen.process_last()
256
+ pcm = np.concatenate(pcms, axis=-1)
257
+ sphn.write_wav(args.out, pcm, tts_model.mimi.sample_rate)
258
+
259
+
260
+ if __name__ == "__main__":
261
+ main()
scripts/tts_rust_server.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.12"
3
+ # dependencies = [
4
+ # "msgpack",
5
+ # "numpy",
6
+ # "sphn",
7
+ # "websockets",
8
+ # "sounddevice",
9
+ # "tqdm",
10
+ # ]
11
+ # ///
12
+ import argparse
13
+ import asyncio
14
+ import sys
15
+ from urllib.parse import urlencode
16
+
17
+ import msgpack
18
+ import numpy as np
19
+ import sphn
20
+ import tqdm
21
+ import websockets
22
+
23
+ SAMPLE_RATE = 24000
24
+
25
+ TTS_TEXT = "Hello, this is a test of the moshi text to speech system, this should result in some nicely sounding generated voice."
26
+ DEFAULT_DSM_TTS_VOICE_REPO = "kyutai/tts-voices"
27
+ AUTH_TOKEN = "public_token"
28
+
29
+
30
+ async def receive_messages(websocket: websockets.ClientConnection, output_queue):
31
+ with tqdm.tqdm(desc="Receiving audio", unit=" seconds generated") as pbar:
32
+ accumulated_samples = 0
33
+ last_seconds = 0
34
+
35
+ async for message_bytes in websocket:
36
+ msg = msgpack.unpackb(message_bytes)
37
+
38
+ if msg["type"] == "Audio":
39
+ pcm = np.array(msg["pcm"]).astype(np.float32)
40
+ await output_queue.put(pcm)
41
+
42
+ accumulated_samples += len(msg["pcm"])
43
+ current_seconds = accumulated_samples // SAMPLE_RATE
44
+ if current_seconds > last_seconds:
45
+ pbar.update(current_seconds - last_seconds)
46
+ last_seconds = current_seconds
47
+
48
+ print("End of audio.")
49
+ await output_queue.put(None) # Signal end of audio
50
+
51
+
52
+ async def output_audio(out: str, output_queue: asyncio.Queue[np.ndarray | None]):
53
+ if out == "-":
54
+ # This will fail with "OSError: PortAudio library not found" on servers with no
55
+ # audio output, so only import if the user requests it.
56
+ import sounddevice as sd
57
+
58
+ should_exit = False
59
+
60
+ def audio_callback(outdata, _a, _b, _c):
61
+ nonlocal should_exit
62
+
63
+ try:
64
+ pcm_data = output_queue.get_nowait()
65
+ if pcm_data is not None:
66
+ outdata[:, 0] = pcm_data
67
+ else:
68
+ should_exit = True
69
+ outdata[:] = 0
70
+ except asyncio.QueueEmpty:
71
+ outdata[:] = 0
72
+
73
+ with sd.OutputStream(
74
+ samplerate=SAMPLE_RATE,
75
+ blocksize=1920,
76
+ channels=1,
77
+ callback=audio_callback,
78
+ ):
79
+ while True:
80
+ if should_exit:
81
+ break
82
+ await asyncio.sleep(1)
83
+ else:
84
+ frames = []
85
+ while True:
86
+ item = await output_queue.get()
87
+ if item is None:
88
+ break
89
+ frames.append(item)
90
+
91
+ sphn.write_wav(out, np.concat(frames, -1), SAMPLE_RATE)
92
+ print(f"Saved audio to {out}")
93
+
94
+
95
+ async def read_lines_from_stdin():
96
+ reader = asyncio.StreamReader()
97
+ protocol = asyncio.StreamReaderProtocol(reader)
98
+ loop = asyncio.get_running_loop()
99
+ await loop.connect_read_pipe(lambda: protocol, sys.stdin)
100
+ while True:
101
+ line = await reader.readline()
102
+ if not line:
103
+ break
104
+ yield line.decode().rstrip()
105
+
106
+
107
+ async def read_lines_from_file(path: str):
108
+ queue = asyncio.Queue()
109
+ loop = asyncio.get_running_loop()
110
+
111
+ def producer():
112
+ with open(path, "r", encoding="utf-8") as f:
113
+ for line in f:
114
+ asyncio.run_coroutine_threadsafe(queue.put(line), loop)
115
+ asyncio.run_coroutine_threadsafe(queue.put(None), loop)
116
+
117
+ await asyncio.to_thread(producer)
118
+ while True:
119
+ line = await queue.get()
120
+ if line is None:
121
+ break
122
+ yield line
123
+
124
+
125
+ async def get_lines(source: str):
126
+ if source == "-":
127
+ async for line in read_lines_from_stdin():
128
+ yield line
129
+ else:
130
+ async for line in read_lines_from_file(source):
131
+ yield line
132
+
133
+
134
+ async def websocket_client():
135
+ parser = argparse.ArgumentParser(description="Use the TTS streaming API")
136
+ parser.add_argument("inp", type=str, help="Input file, use - for stdin.")
137
+ parser.add_argument(
138
+ "out", type=str, help="Output file to generate, use - for playing the audio"
139
+ )
140
+ parser.add_argument(
141
+ "--voice",
142
+ default="expresso/ex03-ex01_happy_001_channel1_334s.wav",
143
+ help="The voice to use, relative to the voice repo root. "
144
+ f"See {DEFAULT_DSM_TTS_VOICE_REPO}",
145
+ )
146
+ parser.add_argument(
147
+ "--url",
148
+ help="The URL of the server to which to send the audio",
149
+ default="ws://127.0.0.1:8080",
150
+ )
151
+ parser.add_argument("--api-key", default="public_token")
152
+ args = parser.parse_args()
153
+
154
+ params = {"voice": args.voice, "format": "PcmMessagePack"}
155
+ uri = f"{args.url}/api/tts_streaming?{urlencode(params)}"
156
+ print(uri)
157
+
158
+ if args.inp == "-":
159
+ if sys.stdin.isatty(): # Interactive
160
+ print("Enter text to synthesize (Ctrl+D to end input):")
161
+ headers = {"kyutai-api-key": args.api_key}
162
+
163
+ # For clients that don't support the `additional_headers` parameter when connecting
164
+ # (notably: JS libraries like react-use-websocket),
165
+ # you can also provide the API key in the query string with the "auth_id" key,
166
+ # i.e. adding "&auth_id=public_token" at the end of `uri`
167
+ async with websockets.connect(uri, additional_headers=headers) as websocket:
168
+ print("connected")
169
+
170
+ async def send_loop():
171
+ print("go send")
172
+ async for line in get_lines(args.inp):
173
+ for word in line.split():
174
+ await websocket.send(msgpack.packb({"type": "Text", "text": word}))
175
+ await websocket.send(msgpack.packb({"type": "Eos"}))
176
+
177
+ output_queue = asyncio.Queue()
178
+ receive_task = asyncio.create_task(receive_messages(websocket, output_queue))
179
+ output_audio_task = asyncio.create_task(output_audio(args.out, output_queue))
180
+ send_task = asyncio.create_task(send_loop())
181
+ await asyncio.gather(receive_task, output_audio_task, send_task)
182
+
183
+
184
+ if __name__ == "__main__":
185
+ asyncio.run(websocket_client())
stt-rs/Cargo.lock ADDED
@@ -0,0 +1,3746 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is automatically @generated by Cargo.
2
+ # It is not intended for manual editing.
3
+ version = 4
4
+
5
+ [[package]]
6
+ name = "addr2line"
7
+ version = "0.24.2"
8
+ source = "registry+https://github.com/rust-lang/crates.io-index"
9
+ checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
10
+ dependencies = [
11
+ "gimli",
12
+ ]
13
+
14
+ [[package]]
15
+ name = "adler2"
16
+ version = "2.0.1"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+ checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
19
+
20
+ [[package]]
21
+ name = "aho-corasick"
22
+ version = "1.1.3"
23
+ source = "registry+https://github.com/rust-lang/crates.io-index"
24
+ checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
25
+ dependencies = [
26
+ "memchr",
27
+ ]
28
+
29
+ [[package]]
30
+ name = "anstream"
31
+ version = "0.6.19"
32
+ source = "registry+https://github.com/rust-lang/crates.io-index"
33
+ checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933"
34
+ dependencies = [
35
+ "anstyle",
36
+ "anstyle-parse",
37
+ "anstyle-query",
38
+ "anstyle-wincon",
39
+ "colorchoice",
40
+ "is_terminal_polyfill",
41
+ "utf8parse",
42
+ ]
43
+
44
+ [[package]]
45
+ name = "anstyle"
46
+ version = "1.0.11"
47
+ source = "registry+https://github.com/rust-lang/crates.io-index"
48
+ checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
49
+
50
+ [[package]]
51
+ name = "anstyle-parse"
52
+ version = "0.2.7"
53
+ source = "registry+https://github.com/rust-lang/crates.io-index"
54
+ checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
55
+ dependencies = [
56
+ "utf8parse",
57
+ ]
58
+
59
+ [[package]]
60
+ name = "anstyle-query"
61
+ version = "1.1.3"
62
+ source = "registry+https://github.com/rust-lang/crates.io-index"
63
+ checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9"
64
+ dependencies = [
65
+ "windows-sys 0.59.0",
66
+ ]
67
+
68
+ [[package]]
69
+ name = "anstyle-wincon"
70
+ version = "3.0.9"
71
+ source = "registry+https://github.com/rust-lang/crates.io-index"
72
+ checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882"
73
+ dependencies = [
74
+ "anstyle",
75
+ "once_cell_polyfill",
76
+ "windows-sys 0.59.0",
77
+ ]
78
+
79
+ [[package]]
80
+ name = "anyhow"
81
+ version = "1.0.98"
82
+ source = "registry+https://github.com/rust-lang/crates.io-index"
83
+ checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
84
+
85
+ [[package]]
86
+ name = "arbitrary"
87
+ version = "1.4.1"
88
+ source = "registry+https://github.com/rust-lang/crates.io-index"
89
+ checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223"
90
+ dependencies = [
91
+ "derive_arbitrary",
92
+ ]
93
+
94
+ [[package]]
95
+ name = "arrayvec"
96
+ version = "0.7.6"
97
+ source = "registry+https://github.com/rust-lang/crates.io-index"
98
+ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
99
+
100
+ [[package]]
101
+ name = "atomic-waker"
102
+ version = "1.1.2"
103
+ source = "registry+https://github.com/rust-lang/crates.io-index"
104
+ checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
105
+
106
+ [[package]]
107
+ name = "audiopus_sys"
108
+ version = "0.2.2"
109
+ source = "registry+https://github.com/rust-lang/crates.io-index"
110
+ checksum = "62314a1546a2064e033665d658e88c620a62904be945f8147e6b16c3db9f8651"
111
+ dependencies = [
112
+ "cmake",
113
+ "log",
114
+ "pkg-config",
115
+ ]
116
+
117
+ [[package]]
118
+ name = "autocfg"
119
+ version = "1.5.0"
120
+ source = "registry+https://github.com/rust-lang/crates.io-index"
121
+ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
122
+
123
+ [[package]]
124
+ name = "backtrace"
125
+ version = "0.3.75"
126
+ source = "registry+https://github.com/rust-lang/crates.io-index"
127
+ checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
128
+ dependencies = [
129
+ "addr2line",
130
+ "cfg-if",
131
+ "libc",
132
+ "miniz_oxide",
133
+ "object",
134
+ "rustc-demangle",
135
+ "windows-targets 0.52.6",
136
+ ]
137
+
138
+ [[package]]
139
+ name = "base64"
140
+ version = "0.22.1"
141
+ source = "registry+https://github.com/rust-lang/crates.io-index"
142
+ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
143
+
144
+ [[package]]
145
+ name = "bindgen_cuda"
146
+ version = "0.1.5"
147
+ source = "registry+https://github.com/rust-lang/crates.io-index"
148
+ checksum = "1f8489af5b7d17a81bffe37e0f4d6e1e4de87c87329d05447f22c35d95a1227d"
149
+ dependencies = [
150
+ "glob",
151
+ "num_cpus",
152
+ "rayon",
153
+ ]
154
+
155
+ [[package]]
156
+ name = "bit-set"
157
+ version = "0.5.3"
158
+ source = "registry+https://github.com/rust-lang/crates.io-index"
159
+ checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
160
+ dependencies = [
161
+ "bit-vec",
162
+ ]
163
+
164
+ [[package]]
165
+ name = "bit-vec"
166
+ version = "0.6.3"
167
+ source = "registry+https://github.com/rust-lang/crates.io-index"
168
+ checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
169
+
170
+ [[package]]
171
+ name = "bitflags"
172
+ version = "1.3.2"
173
+ source = "registry+https://github.com/rust-lang/crates.io-index"
174
+ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
175
+
176
+ [[package]]
177
+ name = "bitflags"
178
+ version = "2.9.1"
179
+ source = "registry+https://github.com/rust-lang/crates.io-index"
180
+ checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
181
+
182
+ [[package]]
183
+ name = "block"
184
+ version = "0.1.6"
185
+ source = "registry+https://github.com/rust-lang/crates.io-index"
186
+ checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a"
187
+
188
+ [[package]]
189
+ name = "bumpalo"
190
+ version = "3.18.1"
191
+ source = "registry+https://github.com/rust-lang/crates.io-index"
192
+ checksum = "793db76d6187cd04dff33004d8e6c9cc4e05cd330500379d2394209271b4aeee"
193
+
194
+ [[package]]
195
+ name = "bytemuck"
196
+ version = "1.23.1"
197
+ source = "registry+https://github.com/rust-lang/crates.io-index"
198
+ checksum = "5c76a5792e44e4abe34d3abf15636779261d45a7450612059293d1d2cfc63422"
199
+ dependencies = [
200
+ "bytemuck_derive",
201
+ ]
202
+
203
+ [[package]]
204
+ name = "bytemuck_derive"
205
+ version = "1.9.3"
206
+ source = "registry+https://github.com/rust-lang/crates.io-index"
207
+ checksum = "7ecc273b49b3205b83d648f0690daa588925572cc5063745bfe547fe7ec8e1a1"
208
+ dependencies = [
209
+ "proc-macro2",
210
+ "quote",
211
+ "syn 2.0.103",
212
+ ]
213
+
214
+ [[package]]
215
+ name = "byteorder"
216
+ version = "1.5.0"
217
+ source = "registry+https://github.com/rust-lang/crates.io-index"
218
+ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
219
+
220
+ [[package]]
221
+ name = "bytes"
222
+ version = "1.10.1"
223
+ source = "registry+https://github.com/rust-lang/crates.io-index"
224
+ checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
225
+
226
+ [[package]]
227
+ name = "candle-core"
228
+ version = "0.9.1"
229
+ source = "registry+https://github.com/rust-lang/crates.io-index"
230
+ checksum = "a9f51e2ecf6efe9737af8f993433c839f956d2b6ed4fd2dd4a7c6d8b0fa667ff"
231
+ dependencies = [
232
+ "byteorder",
233
+ "candle-kernels",
234
+ "candle-metal-kernels",
235
+ "cudarc",
236
+ "gemm 0.17.1",
237
+ "half",
238
+ "memmap2",
239
+ "metal 0.27.0",
240
+ "num-traits",
241
+ "num_cpus",
242
+ "rand",
243
+ "rand_distr",
244
+ "rayon",
245
+ "safetensors",
246
+ "thiserror 1.0.69",
247
+ "ug",
248
+ "ug-cuda",
249
+ "ug-metal",
250
+ "yoke 0.7.5",
251
+ "zip",
252
+ ]
253
+
254
+ [[package]]
255
+ name = "candle-kernels"
256
+ version = "0.9.1"
257
+ source = "registry+https://github.com/rust-lang/crates.io-index"
258
+ checksum = "9fcd989c2143aa754370b5bfee309e35fbd259e83d9ecf7a73d23d8508430775"
259
+ dependencies = [
260
+ "bindgen_cuda",
261
+ ]
262
+
263
+ [[package]]
264
+ name = "candle-metal-kernels"
265
+ version = "0.9.1"
266
+ source = "registry+https://github.com/rust-lang/crates.io-index"
267
+ checksum = "9a323ee9c813707f73b6e59300661b354a70410f31fe4135170c4eda8a061534"
268
+ dependencies = [
269
+ "half",
270
+ "metal 0.27.0",
271
+ "once_cell",
272
+ "thiserror 1.0.69",
273
+ "tracing",
274
+ ]
275
+
276
+ [[package]]
277
+ name = "candle-nn"
278
+ version = "0.9.1"
279
+ source = "registry+https://github.com/rust-lang/crates.io-index"
280
+ checksum = "c1980d53280c8f9e2c6cbe1785855d7ff8010208b46e21252b978badf13ad69d"
281
+ dependencies = [
282
+ "candle-core",
283
+ "candle-metal-kernels",
284
+ "half",
285
+ "metal 0.27.0",
286
+ "num-traits",
287
+ "rayon",
288
+ "safetensors",
289
+ "serde",
290
+ "thiserror 1.0.69",
291
+ ]
292
+
293
+ [[package]]
294
+ name = "candle-transformers"
295
+ version = "0.9.1"
296
+ source = "registry+https://github.com/rust-lang/crates.io-index"
297
+ checksum = "186cb80045dbe47e0b387ea6d3e906f02fb3056297080d9922984c90e90a72b0"
298
+ dependencies = [
299
+ "byteorder",
300
+ "candle-core",
301
+ "candle-nn",
302
+ "fancy-regex",
303
+ "num-traits",
304
+ "rand",
305
+ "rayon",
306
+ "serde",
307
+ "serde_json",
308
+ "serde_plain",
309
+ "tracing",
310
+ ]
311
+
312
+ [[package]]
313
+ name = "cc"
314
+ version = "1.2.27"
315
+ source = "registry+https://github.com/rust-lang/crates.io-index"
316
+ checksum = "d487aa071b5f64da6f19a3e848e3578944b726ee5a4854b82172f02aa876bfdc"
317
+ dependencies = [
318
+ "shlex",
319
+ ]
320
+
321
+ [[package]]
322
+ name = "cfg-if"
323
+ version = "1.0.1"
324
+ source = "registry+https://github.com/rust-lang/crates.io-index"
325
+ checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
326
+
327
+ [[package]]
328
+ name = "clap"
329
+ version = "4.5.40"
330
+ source = "registry+https://github.com/rust-lang/crates.io-index"
331
+ checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f"
332
+ dependencies = [
333
+ "clap_builder",
334
+ "clap_derive",
335
+ ]
336
+
337
+ [[package]]
338
+ name = "clap_builder"
339
+ version = "4.5.40"
340
+ source = "registry+https://github.com/rust-lang/crates.io-index"
341
+ checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e"
342
+ dependencies = [
343
+ "anstream",
344
+ "anstyle",
345
+ "clap_lex",
346
+ "strsim",
347
+ ]
348
+
349
+ [[package]]
350
+ name = "clap_derive"
351
+ version = "4.5.40"
352
+ source = "registry+https://github.com/rust-lang/crates.io-index"
353
+ checksum = "d2c7947ae4cc3d851207c1adb5b5e260ff0cca11446b1d6d1423788e442257ce"
354
+ dependencies = [
355
+ "heck",
356
+ "proc-macro2",
357
+ "quote",
358
+ "syn 2.0.103",
359
+ ]
360
+
361
+ [[package]]
362
+ name = "clap_lex"
363
+ version = "0.7.5"
364
+ source = "registry+https://github.com/rust-lang/crates.io-index"
365
+ checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
366
+
367
+ [[package]]
368
+ name = "cmake"
369
+ version = "0.1.54"
370
+ source = "registry+https://github.com/rust-lang/crates.io-index"
371
+ checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0"
372
+ dependencies = [
373
+ "cc",
374
+ ]
375
+
376
+ [[package]]
377
+ name = "colorchoice"
378
+ version = "1.0.4"
379
+ source = "registry+https://github.com/rust-lang/crates.io-index"
380
+ checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
381
+
382
+ [[package]]
383
+ name = "console"
384
+ version = "0.15.11"
385
+ source = "registry+https://github.com/rust-lang/crates.io-index"
386
+ checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8"
387
+ dependencies = [
388
+ "encode_unicode",
389
+ "libc",
390
+ "once_cell",
391
+ "unicode-width",
392
+ "windows-sys 0.59.0",
393
+ ]
394
+
395
+ [[package]]
396
+ name = "core-foundation"
397
+ version = "0.9.4"
398
+ source = "registry+https://github.com/rust-lang/crates.io-index"
399
+ checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f"
400
+ dependencies = [
401
+ "core-foundation-sys",
402
+ "libc",
403
+ ]
404
+
405
+ [[package]]
406
+ name = "core-foundation-sys"
407
+ version = "0.8.7"
408
+ source = "registry+https://github.com/rust-lang/crates.io-index"
409
+ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
410
+
411
+ [[package]]
412
+ name = "core-graphics-types"
413
+ version = "0.1.3"
414
+ source = "registry+https://github.com/rust-lang/crates.io-index"
415
+ checksum = "45390e6114f68f718cc7a830514a96f903cccd70d02a8f6d9f643ac4ba45afaf"
416
+ dependencies = [
417
+ "bitflags 1.3.2",
418
+ "core-foundation",
419
+ "libc",
420
+ ]
421
+
422
+ [[package]]
423
+ name = "crc32fast"
424
+ version = "1.4.2"
425
+ source = "registry+https://github.com/rust-lang/crates.io-index"
426
+ checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
427
+ dependencies = [
428
+ "cfg-if",
429
+ ]
430
+
431
+ [[package]]
432
+ name = "crossbeam-deque"
433
+ version = "0.8.6"
434
+ source = "registry+https://github.com/rust-lang/crates.io-index"
435
+ checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
436
+ dependencies = [
437
+ "crossbeam-epoch",
438
+ "crossbeam-utils",
439
+ ]
440
+
441
+ [[package]]
442
+ name = "crossbeam-epoch"
443
+ version = "0.9.18"
444
+ source = "registry+https://github.com/rust-lang/crates.io-index"
445
+ checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
446
+ dependencies = [
447
+ "crossbeam-utils",
448
+ ]
449
+
450
+ [[package]]
451
+ name = "crossbeam-utils"
452
+ version = "0.8.21"
453
+ source = "registry+https://github.com/rust-lang/crates.io-index"
454
+ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
455
+
456
+ [[package]]
457
+ name = "crunchy"
458
+ version = "0.2.3"
459
+ source = "registry+https://github.com/rust-lang/crates.io-index"
460
+ checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929"
461
+
462
+ [[package]]
463
+ name = "cudarc"
464
+ version = "0.16.4"
465
+ source = "registry+https://github.com/rust-lang/crates.io-index"
466
+ checksum = "f9574894139a982bf26fbb44473a9d416c015e779c51ef0fbc0789f1a1c17b25"
467
+ dependencies = [
468
+ "half",
469
+ "libloading",
470
+ ]
471
+
472
+ [[package]]
473
+ name = "derive_arbitrary"
474
+ version = "1.4.1"
475
+ source = "registry+https://github.com/rust-lang/crates.io-index"
476
+ checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800"
477
+ dependencies = [
478
+ "proc-macro2",
479
+ "quote",
480
+ "syn 2.0.103",
481
+ ]
482
+
483
+ [[package]]
484
+ name = "dirs"
485
+ version = "6.0.0"
486
+ source = "registry+https://github.com/rust-lang/crates.io-index"
487
+ checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e"
488
+ dependencies = [
489
+ "dirs-sys",
490
+ ]
491
+
492
+ [[package]]
493
+ name = "dirs-sys"
494
+ version = "0.5.0"
495
+ source = "registry+https://github.com/rust-lang/crates.io-index"
496
+ checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab"
497
+ dependencies = [
498
+ "libc",
499
+ "option-ext",
500
+ "redox_users",
501
+ "windows-sys 0.60.2",
502
+ ]
503
+
504
+ [[package]]
505
+ name = "displaydoc"
506
+ version = "0.2.5"
507
+ source = "registry+https://github.com/rust-lang/crates.io-index"
508
+ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
509
+ dependencies = [
510
+ "proc-macro2",
511
+ "quote",
512
+ "syn 2.0.103",
513
+ ]
514
+
515
+ [[package]]
516
+ name = "dyn-stack"
517
+ version = "0.10.0"
518
+ source = "registry+https://github.com/rust-lang/crates.io-index"
519
+ checksum = "56e53799688f5632f364f8fb387488dd05db9fe45db7011be066fc20e7027f8b"
520
+ dependencies = [
521
+ "bytemuck",
522
+ "reborrow",
523
+ ]
524
+
525
+ [[package]]
526
+ name = "dyn-stack"
527
+ version = "0.13.0"
528
+ source = "registry+https://github.com/rust-lang/crates.io-index"
529
+ checksum = "490bd48eb68fffcfed519b4edbfd82c69cbe741d175b84f0e0cbe8c57cbe0bdd"
530
+ dependencies = [
531
+ "bytemuck",
532
+ ]
533
+
534
+ [[package]]
535
+ name = "either"
536
+ version = "1.15.0"
537
+ source = "registry+https://github.com/rust-lang/crates.io-index"
538
+ checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
539
+
540
+ [[package]]
541
+ name = "encode_unicode"
542
+ version = "1.0.0"
543
+ source = "registry+https://github.com/rust-lang/crates.io-index"
544
+ checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
545
+
546
+ [[package]]
547
+ name = "encoding_rs"
548
+ version = "0.8.35"
549
+ source = "registry+https://github.com/rust-lang/crates.io-index"
550
+ checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
551
+ dependencies = [
552
+ "cfg-if",
553
+ ]
554
+
555
+ [[package]]
556
+ name = "enum-as-inner"
557
+ version = "0.6.1"
558
+ source = "registry+https://github.com/rust-lang/crates.io-index"
559
+ checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc"
560
+ dependencies = [
561
+ "heck",
562
+ "proc-macro2",
563
+ "quote",
564
+ "syn 2.0.103",
565
+ ]
566
+
567
+ [[package]]
568
+ name = "equivalent"
569
+ version = "1.0.2"
570
+ source = "registry+https://github.com/rust-lang/crates.io-index"
571
+ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
572
+
573
+ [[package]]
574
+ name = "errno"
575
+ version = "0.3.12"
576
+ source = "registry+https://github.com/rust-lang/crates.io-index"
577
+ checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18"
578
+ dependencies = [
579
+ "libc",
580
+ "windows-sys 0.59.0",
581
+ ]
582
+
583
+ [[package]]
584
+ name = "extended"
585
+ version = "0.1.0"
586
+ source = "registry+https://github.com/rust-lang/crates.io-index"
587
+ checksum = "af9673d8203fcb076b19dfd17e38b3d4ae9f44959416ea532ce72415a6020365"
588
+
589
+ [[package]]
590
+ name = "fancy-regex"
591
+ version = "0.13.0"
592
+ source = "registry+https://github.com/rust-lang/crates.io-index"
593
+ checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
594
+ dependencies = [
595
+ "bit-set",
596
+ "regex-automata",
597
+ "regex-syntax",
598
+ ]
599
+
600
+ [[package]]
601
+ name = "fastrand"
602
+ version = "2.3.0"
603
+ source = "registry+https://github.com/rust-lang/crates.io-index"
604
+ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
605
+
606
+ [[package]]
607
+ name = "flate2"
608
+ version = "1.1.2"
609
+ source = "registry+https://github.com/rust-lang/crates.io-index"
610
+ checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d"
611
+ dependencies = [
612
+ "crc32fast",
613
+ "miniz_oxide",
614
+ ]
615
+
616
+ [[package]]
617
+ name = "fnv"
618
+ version = "1.0.7"
619
+ source = "registry+https://github.com/rust-lang/crates.io-index"
620
+ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
621
+
622
+ [[package]]
623
+ name = "foreign-types"
624
+ version = "0.3.2"
625
+ source = "registry+https://github.com/rust-lang/crates.io-index"
626
+ checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
627
+ dependencies = [
628
+ "foreign-types-shared 0.1.1",
629
+ ]
630
+
631
+ [[package]]
632
+ name = "foreign-types"
633
+ version = "0.5.0"
634
+ source = "registry+https://github.com/rust-lang/crates.io-index"
635
+ checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965"
636
+ dependencies = [
637
+ "foreign-types-macros",
638
+ "foreign-types-shared 0.3.1",
639
+ ]
640
+
641
+ [[package]]
642
+ name = "foreign-types-macros"
643
+ version = "0.2.3"
644
+ source = "registry+https://github.com/rust-lang/crates.io-index"
645
+ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742"
646
+ dependencies = [
647
+ "proc-macro2",
648
+ "quote",
649
+ "syn 2.0.103",
650
+ ]
651
+
652
+ [[package]]
653
+ name = "foreign-types-shared"
654
+ version = "0.1.1"
655
+ source = "registry+https://github.com/rust-lang/crates.io-index"
656
+ checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
657
+
658
+ [[package]]
659
+ name = "foreign-types-shared"
660
+ version = "0.3.1"
661
+ source = "registry+https://github.com/rust-lang/crates.io-index"
662
+ checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b"
663
+
664
+ [[package]]
665
+ name = "form_urlencoded"
666
+ version = "1.2.1"
667
+ source = "registry+https://github.com/rust-lang/crates.io-index"
668
+ checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456"
669
+ dependencies = [
670
+ "percent-encoding",
671
+ ]
672
+
673
+ [[package]]
674
+ name = "futures"
675
+ version = "0.3.31"
676
+ source = "registry+https://github.com/rust-lang/crates.io-index"
677
+ checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
678
+ dependencies = [
679
+ "futures-channel",
680
+ "futures-core",
681
+ "futures-executor",
682
+ "futures-io",
683
+ "futures-sink",
684
+ "futures-task",
685
+ "futures-util",
686
+ ]
687
+
688
+ [[package]]
689
+ name = "futures-channel"
690
+ version = "0.3.31"
691
+ source = "registry+https://github.com/rust-lang/crates.io-index"
692
+ checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
693
+ dependencies = [
694
+ "futures-core",
695
+ "futures-sink",
696
+ ]
697
+
698
+ [[package]]
699
+ name = "futures-core"
700
+ version = "0.3.31"
701
+ source = "registry+https://github.com/rust-lang/crates.io-index"
702
+ checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
703
+
704
+ [[package]]
705
+ name = "futures-executor"
706
+ version = "0.3.31"
707
+ source = "registry+https://github.com/rust-lang/crates.io-index"
708
+ checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
709
+ dependencies = [
710
+ "futures-core",
711
+ "futures-task",
712
+ "futures-util",
713
+ ]
714
+
715
+ [[package]]
716
+ name = "futures-io"
717
+ version = "0.3.31"
718
+ source = "registry+https://github.com/rust-lang/crates.io-index"
719
+ checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
720
+
721
+ [[package]]
722
+ name = "futures-macro"
723
+ version = "0.3.31"
724
+ source = "registry+https://github.com/rust-lang/crates.io-index"
725
+ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
726
+ dependencies = [
727
+ "proc-macro2",
728
+ "quote",
729
+ "syn 2.0.103",
730
+ ]
731
+
732
+ [[package]]
733
+ name = "futures-sink"
734
+ version = "0.3.31"
735
+ source = "registry+https://github.com/rust-lang/crates.io-index"
736
+ checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
737
+
738
+ [[package]]
739
+ name = "futures-task"
740
+ version = "0.3.31"
741
+ source = "registry+https://github.com/rust-lang/crates.io-index"
742
+ checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
743
+
744
+ [[package]]
745
+ name = "futures-util"
746
+ version = "0.3.31"
747
+ source = "registry+https://github.com/rust-lang/crates.io-index"
748
+ checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
749
+ dependencies = [
750
+ "futures-channel",
751
+ "futures-core",
752
+ "futures-io",
753
+ "futures-macro",
754
+ "futures-sink",
755
+ "futures-task",
756
+ "memchr",
757
+ "pin-project-lite",
758
+ "pin-utils",
759
+ "slab",
760
+ ]
761
+
762
+ [[package]]
763
+ name = "gemm"
764
+ version = "0.17.1"
765
+ source = "registry+https://github.com/rust-lang/crates.io-index"
766
+ checksum = "6ab24cc62135b40090e31a76a9b2766a501979f3070fa27f689c27ec04377d32"
767
+ dependencies = [
768
+ "dyn-stack 0.10.0",
769
+ "gemm-c32 0.17.1",
770
+ "gemm-c64 0.17.1",
771
+ "gemm-common 0.17.1",
772
+ "gemm-f16 0.17.1",
773
+ "gemm-f32 0.17.1",
774
+ "gemm-f64 0.17.1",
775
+ "num-complex",
776
+ "num-traits",
777
+ "paste",
778
+ "raw-cpuid 10.7.0",
779
+ "seq-macro",
780
+ ]
781
+
782
+ [[package]]
783
+ name = "gemm"
784
+ version = "0.18.2"
785
+ source = "registry+https://github.com/rust-lang/crates.io-index"
786
+ checksum = "ab96b703d31950f1aeddded248bc95543c9efc7ac9c4a21fda8703a83ee35451"
787
+ dependencies = [
788
+ "dyn-stack 0.13.0",
789
+ "gemm-c32 0.18.2",
790
+ "gemm-c64 0.18.2",
791
+ "gemm-common 0.18.2",
792
+ "gemm-f16 0.18.2",
793
+ "gemm-f32 0.18.2",
794
+ "gemm-f64 0.18.2",
795
+ "num-complex",
796
+ "num-traits",
797
+ "paste",
798
+ "raw-cpuid 11.5.0",
799
+ "seq-macro",
800
+ ]
801
+
802
+ [[package]]
803
+ name = "gemm-c32"
804
+ version = "0.17.1"
805
+ source = "registry+https://github.com/rust-lang/crates.io-index"
806
+ checksum = "b9c030d0b983d1e34a546b86e08f600c11696fde16199f971cd46c12e67512c0"
807
+ dependencies = [
808
+ "dyn-stack 0.10.0",
809
+ "gemm-common 0.17.1",
810
+ "num-complex",
811
+ "num-traits",
812
+ "paste",
813
+ "raw-cpuid 10.7.0",
814
+ "seq-macro",
815
+ ]
816
+
817
+ [[package]]
818
+ name = "gemm-c32"
819
+ version = "0.18.2"
820
+ source = "registry+https://github.com/rust-lang/crates.io-index"
821
+ checksum = "f6db9fd9f40421d00eea9dd0770045a5603b8d684654816637732463f4073847"
822
+ dependencies = [
823
+ "dyn-stack 0.13.0",
824
+ "gemm-common 0.18.2",
825
+ "num-complex",
826
+ "num-traits",
827
+ "paste",
828
+ "raw-cpuid 11.5.0",
829
+ "seq-macro",
830
+ ]
831
+
832
+ [[package]]
833
+ name = "gemm-c64"
834
+ version = "0.17.1"
835
+ source = "registry+https://github.com/rust-lang/crates.io-index"
836
+ checksum = "fbb5f2e79fefb9693d18e1066a557b4546cd334b226beadc68b11a8f9431852a"
837
+ dependencies = [
838
+ "dyn-stack 0.10.0",
839
+ "gemm-common 0.17.1",
840
+ "num-complex",
841
+ "num-traits",
842
+ "paste",
843
+ "raw-cpuid 10.7.0",
844
+ "seq-macro",
845
+ ]
846
+
847
+ [[package]]
848
+ name = "gemm-c64"
849
+ version = "0.18.2"
850
+ source = "registry+https://github.com/rust-lang/crates.io-index"
851
+ checksum = "dfcad8a3d35a43758330b635d02edad980c1e143dc2f21e6fd25f9e4eada8edf"
852
+ dependencies = [
853
+ "dyn-stack 0.13.0",
854
+ "gemm-common 0.18.2",
855
+ "num-complex",
856
+ "num-traits",
857
+ "paste",
858
+ "raw-cpuid 11.5.0",
859
+ "seq-macro",
860
+ ]
861
+
862
+ [[package]]
863
+ name = "gemm-common"
864
+ version = "0.17.1"
865
+ source = "registry+https://github.com/rust-lang/crates.io-index"
866
+ checksum = "a2e7ea062c987abcd8db95db917b4ffb4ecdfd0668471d8dc54734fdff2354e8"
867
+ dependencies = [
868
+ "bytemuck",
869
+ "dyn-stack 0.10.0",
870
+ "half",
871
+ "num-complex",
872
+ "num-traits",
873
+ "once_cell",
874
+ "paste",
875
+ "pulp 0.18.22",
876
+ "raw-cpuid 10.7.0",
877
+ "rayon",
878
+ "seq-macro",
879
+ "sysctl 0.5.5",
880
+ ]
881
+
882
+ [[package]]
883
+ name = "gemm-common"
884
+ version = "0.18.2"
885
+ source = "registry+https://github.com/rust-lang/crates.io-index"
886
+ checksum = "a352d4a69cbe938b9e2a9cb7a3a63b7e72f9349174a2752a558a8a563510d0f3"
887
+ dependencies = [
888
+ "bytemuck",
889
+ "dyn-stack 0.13.0",
890
+ "half",
891
+ "libm",
892
+ "num-complex",
893
+ "num-traits",
894
+ "once_cell",
895
+ "paste",
896
+ "pulp 0.21.5",
897
+ "raw-cpuid 11.5.0",
898
+ "rayon",
899
+ "seq-macro",
900
+ "sysctl 0.6.0",
901
+ ]
902
+
903
+ [[package]]
904
+ name = "gemm-f16"
905
+ version = "0.17.1"
906
+ source = "registry+https://github.com/rust-lang/crates.io-index"
907
+ checksum = "7ca4c06b9b11952071d317604acb332e924e817bd891bec8dfb494168c7cedd4"
908
+ dependencies = [
909
+ "dyn-stack 0.10.0",
910
+ "gemm-common 0.17.1",
911
+ "gemm-f32 0.17.1",
912
+ "half",
913
+ "num-complex",
914
+ "num-traits",
915
+ "paste",
916
+ "raw-cpuid 10.7.0",
917
+ "rayon",
918
+ "seq-macro",
919
+ ]
920
+
921
+ [[package]]
922
+ name = "gemm-f16"
923
+ version = "0.18.2"
924
+ source = "registry+https://github.com/rust-lang/crates.io-index"
925
+ checksum = "cff95ae3259432f3c3410eaa919033cd03791d81cebd18018393dc147952e109"
926
+ dependencies = [
927
+ "dyn-stack 0.13.0",
928
+ "gemm-common 0.18.2",
929
+ "gemm-f32 0.18.2",
930
+ "half",
931
+ "num-complex",
932
+ "num-traits",
933
+ "paste",
934
+ "raw-cpuid 11.5.0",
935
+ "rayon",
936
+ "seq-macro",
937
+ ]
938
+
939
+ [[package]]
940
+ name = "gemm-f32"
941
+ version = "0.17.1"
942
+ source = "registry+https://github.com/rust-lang/crates.io-index"
943
+ checksum = "e9a69f51aaefbd9cf12d18faf273d3e982d9d711f60775645ed5c8047b4ae113"
944
+ dependencies = [
945
+ "dyn-stack 0.10.0",
946
+ "gemm-common 0.17.1",
947
+ "num-complex",
948
+ "num-traits",
949
+ "paste",
950
+ "raw-cpuid 10.7.0",
951
+ "seq-macro",
952
+ ]
953
+
954
+ [[package]]
955
+ name = "gemm-f32"
956
+ version = "0.18.2"
957
+ source = "registry+https://github.com/rust-lang/crates.io-index"
958
+ checksum = "bc8d3d4385393304f407392f754cd2dc4b315d05063f62cf09f47b58de276864"
959
+ dependencies = [
960
+ "dyn-stack 0.13.0",
961
+ "gemm-common 0.18.2",
962
+ "num-complex",
963
+ "num-traits",
964
+ "paste",
965
+ "raw-cpuid 11.5.0",
966
+ "seq-macro",
967
+ ]
968
+
969
+ [[package]]
970
+ name = "gemm-f64"
971
+ version = "0.17.1"
972
+ source = "registry+https://github.com/rust-lang/crates.io-index"
973
+ checksum = "aa397a48544fadf0b81ec8741e5c0fba0043008113f71f2034def1935645d2b0"
974
+ dependencies = [
975
+ "dyn-stack 0.10.0",
976
+ "gemm-common 0.17.1",
977
+ "num-complex",
978
+ "num-traits",
979
+ "paste",
980
+ "raw-cpuid 10.7.0",
981
+ "seq-macro",
982
+ ]
983
+
984
+ [[package]]
985
+ name = "gemm-f64"
986
+ version = "0.18.2"
987
+ source = "registry+https://github.com/rust-lang/crates.io-index"
988
+ checksum = "35b2a4f76ce4b8b16eadc11ccf2e083252d8237c1b589558a49b0183545015bd"
989
+ dependencies = [
990
+ "dyn-stack 0.13.0",
991
+ "gemm-common 0.18.2",
992
+ "num-complex",
993
+ "num-traits",
994
+ "paste",
995
+ "raw-cpuid 11.5.0",
996
+ "seq-macro",
997
+ ]
998
+
999
+ [[package]]
1000
+ name = "getrandom"
1001
+ version = "0.2.16"
1002
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1003
+ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
1004
+ dependencies = [
1005
+ "cfg-if",
1006
+ "libc",
1007
+ "wasi 0.11.1+wasi-snapshot-preview1",
1008
+ ]
1009
+
1010
+ [[package]]
1011
+ name = "getrandom"
1012
+ version = "0.3.3"
1013
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1014
+ checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
1015
+ dependencies = [
1016
+ "cfg-if",
1017
+ "libc",
1018
+ "r-efi",
1019
+ "wasi 0.14.2+wasi-0.2.4",
1020
+ ]
1021
+
1022
+ [[package]]
1023
+ name = "gimli"
1024
+ version = "0.31.1"
1025
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1026
+ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
1027
+
1028
+ [[package]]
1029
+ name = "glob"
1030
+ version = "0.3.2"
1031
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1032
+ checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
1033
+
1034
+ [[package]]
1035
+ name = "h2"
1036
+ version = "0.4.10"
1037
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1038
+ checksum = "a9421a676d1b147b16b82c9225157dc629087ef8ec4d5e2960f9437a90dac0a5"
1039
+ dependencies = [
1040
+ "atomic-waker",
1041
+ "bytes",
1042
+ "fnv",
1043
+ "futures-core",
1044
+ "futures-sink",
1045
+ "http",
1046
+ "indexmap",
1047
+ "slab",
1048
+ "tokio",
1049
+ "tokio-util 0.7.15",
1050
+ "tracing",
1051
+ ]
1052
+
1053
+ [[package]]
1054
+ name = "half"
1055
+ version = "2.6.0"
1056
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1057
+ checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9"
1058
+ dependencies = [
1059
+ "bytemuck",
1060
+ "cfg-if",
1061
+ "crunchy",
1062
+ "num-traits",
1063
+ "rand",
1064
+ "rand_distr",
1065
+ ]
1066
+
1067
+ [[package]]
1068
+ name = "hashbrown"
1069
+ version = "0.15.4"
1070
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1071
+ checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
1072
+
1073
+ [[package]]
1074
+ name = "heck"
1075
+ version = "0.5.0"
1076
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1077
+ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
1078
+
1079
+ [[package]]
1080
+ name = "hermit-abi"
1081
+ version = "0.5.2"
1082
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1083
+ checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
1084
+
1085
+ [[package]]
1086
+ name = "hf-hub"
1087
+ version = "0.4.3"
1088
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1089
+ checksum = "629d8f3bbeda9d148036d6b0de0a3ab947abd08ce90626327fc3547a49d59d97"
1090
+ dependencies = [
1091
+ "dirs",
1092
+ "futures",
1093
+ "http",
1094
+ "indicatif",
1095
+ "libc",
1096
+ "log",
1097
+ "native-tls",
1098
+ "num_cpus",
1099
+ "rand",
1100
+ "reqwest",
1101
+ "serde",
1102
+ "serde_json",
1103
+ "thiserror 2.0.12",
1104
+ "tokio",
1105
+ "ureq",
1106
+ "windows-sys 0.60.2",
1107
+ ]
1108
+
1109
+ [[package]]
1110
+ name = "http"
1111
+ version = "1.3.1"
1112
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1113
+ checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565"
1114
+ dependencies = [
1115
+ "bytes",
1116
+ "fnv",
1117
+ "itoa",
1118
+ ]
1119
+
1120
+ [[package]]
1121
+ name = "http-body"
1122
+ version = "1.0.1"
1123
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1124
+ checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
1125
+ dependencies = [
1126
+ "bytes",
1127
+ "http",
1128
+ ]
1129
+
1130
+ [[package]]
1131
+ name = "http-body-util"
1132
+ version = "0.1.3"
1133
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1134
+ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
1135
+ dependencies = [
1136
+ "bytes",
1137
+ "futures-core",
1138
+ "http",
1139
+ "http-body",
1140
+ "pin-project-lite",
1141
+ ]
1142
+
1143
+ [[package]]
1144
+ name = "httparse"
1145
+ version = "1.10.1"
1146
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1147
+ checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
1148
+
1149
+ [[package]]
1150
+ name = "hyper"
1151
+ version = "1.6.0"
1152
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1153
+ checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80"
1154
+ dependencies = [
1155
+ "bytes",
1156
+ "futures-channel",
1157
+ "futures-util",
1158
+ "h2",
1159
+ "http",
1160
+ "http-body",
1161
+ "httparse",
1162
+ "itoa",
1163
+ "pin-project-lite",
1164
+ "smallvec",
1165
+ "tokio",
1166
+ "want",
1167
+ ]
1168
+
1169
+ [[package]]
1170
+ name = "hyper-rustls"
1171
+ version = "0.27.7"
1172
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1173
+ checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
1174
+ dependencies = [
1175
+ "http",
1176
+ "hyper",
1177
+ "hyper-util",
1178
+ "rustls",
1179
+ "rustls-pki-types",
1180
+ "tokio",
1181
+ "tokio-rustls",
1182
+ "tower-service",
1183
+ ]
1184
+
1185
+ [[package]]
1186
+ name = "hyper-tls"
1187
+ version = "0.6.0"
1188
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1189
+ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
1190
+ dependencies = [
1191
+ "bytes",
1192
+ "http-body-util",
1193
+ "hyper",
1194
+ "hyper-util",
1195
+ "native-tls",
1196
+ "tokio",
1197
+ "tokio-native-tls",
1198
+ "tower-service",
1199
+ ]
1200
+
1201
+ [[package]]
1202
+ name = "hyper-util"
1203
+ version = "0.1.14"
1204
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1205
+ checksum = "dc2fdfdbff08affe55bb779f33b053aa1fe5dd5b54c257343c17edfa55711bdb"
1206
+ dependencies = [
1207
+ "base64",
1208
+ "bytes",
1209
+ "futures-channel",
1210
+ "futures-core",
1211
+ "futures-util",
1212
+ "http",
1213
+ "http-body",
1214
+ "hyper",
1215
+ "ipnet",
1216
+ "libc",
1217
+ "percent-encoding",
1218
+ "pin-project-lite",
1219
+ "socket2",
1220
+ "system-configuration",
1221
+ "tokio",
1222
+ "tower-service",
1223
+ "tracing",
1224
+ "windows-registry",
1225
+ ]
1226
+
1227
+ [[package]]
1228
+ name = "icu_collections"
1229
+ version = "2.0.0"
1230
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1231
+ checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47"
1232
+ dependencies = [
1233
+ "displaydoc",
1234
+ "potential_utf",
1235
+ "yoke 0.8.0",
1236
+ "zerofrom",
1237
+ "zerovec",
1238
+ ]
1239
+
1240
+ [[package]]
1241
+ name = "icu_locale_core"
1242
+ version = "2.0.0"
1243
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1244
+ checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a"
1245
+ dependencies = [
1246
+ "displaydoc",
1247
+ "litemap",
1248
+ "tinystr",
1249
+ "writeable",
1250
+ "zerovec",
1251
+ ]
1252
+
1253
+ [[package]]
1254
+ name = "icu_normalizer"
1255
+ version = "2.0.0"
1256
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1257
+ checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979"
1258
+ dependencies = [
1259
+ "displaydoc",
1260
+ "icu_collections",
1261
+ "icu_normalizer_data",
1262
+ "icu_properties",
1263
+ "icu_provider",
1264
+ "smallvec",
1265
+ "zerovec",
1266
+ ]
1267
+
1268
+ [[package]]
1269
+ name = "icu_normalizer_data"
1270
+ version = "2.0.0"
1271
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1272
+ checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3"
1273
+
1274
+ [[package]]
1275
+ name = "icu_properties"
1276
+ version = "2.0.1"
1277
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1278
+ checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b"
1279
+ dependencies = [
1280
+ "displaydoc",
1281
+ "icu_collections",
1282
+ "icu_locale_core",
1283
+ "icu_properties_data",
1284
+ "icu_provider",
1285
+ "potential_utf",
1286
+ "zerotrie",
1287
+ "zerovec",
1288
+ ]
1289
+
1290
+ [[package]]
1291
+ name = "icu_properties_data"
1292
+ version = "2.0.1"
1293
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1294
+ checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632"
1295
+
1296
+ [[package]]
1297
+ name = "icu_provider"
1298
+ version = "2.0.0"
1299
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1300
+ checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af"
1301
+ dependencies = [
1302
+ "displaydoc",
1303
+ "icu_locale_core",
1304
+ "stable_deref_trait",
1305
+ "tinystr",
1306
+ "writeable",
1307
+ "yoke 0.8.0",
1308
+ "zerofrom",
1309
+ "zerotrie",
1310
+ "zerovec",
1311
+ ]
1312
+
1313
+ [[package]]
1314
+ name = "idna"
1315
+ version = "1.0.3"
1316
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1317
+ checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e"
1318
+ dependencies = [
1319
+ "idna_adapter",
1320
+ "smallvec",
1321
+ "utf8_iter",
1322
+ ]
1323
+
1324
+ [[package]]
1325
+ name = "idna_adapter"
1326
+ version = "1.2.1"
1327
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1328
+ checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344"
1329
+ dependencies = [
1330
+ "icu_normalizer",
1331
+ "icu_properties",
1332
+ ]
1333
+
1334
+ [[package]]
1335
+ name = "indexmap"
1336
+ version = "2.9.0"
1337
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1338
+ checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
1339
+ dependencies = [
1340
+ "equivalent",
1341
+ "hashbrown",
1342
+ ]
1343
+
1344
+ [[package]]
1345
+ name = "indicatif"
1346
+ version = "0.17.11"
1347
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1348
+ checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235"
1349
+ dependencies = [
1350
+ "console",
1351
+ "number_prefix",
1352
+ "portable-atomic",
1353
+ "unicode-width",
1354
+ "web-time",
1355
+ ]
1356
+
1357
+ [[package]]
1358
+ name = "ipnet"
1359
+ version = "2.11.0"
1360
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1361
+ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
1362
+
1363
+ [[package]]
1364
+ name = "iri-string"
1365
+ version = "0.7.8"
1366
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1367
+ checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2"
1368
+ dependencies = [
1369
+ "memchr",
1370
+ "serde",
1371
+ ]
1372
+
1373
+ [[package]]
1374
+ name = "is_terminal_polyfill"
1375
+ version = "1.70.1"
1376
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1377
+ checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
1378
+
1379
+ [[package]]
1380
+ name = "itertools"
1381
+ version = "0.10.5"
1382
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1383
+ checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
1384
+ dependencies = [
1385
+ "either",
1386
+ ]
1387
+
1388
+ [[package]]
1389
+ name = "itoa"
1390
+ version = "1.0.15"
1391
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1392
+ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
1393
+
1394
+ [[package]]
1395
+ name = "js-sys"
1396
+ version = "0.3.77"
1397
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1398
+ checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
1399
+ dependencies = [
1400
+ "once_cell",
1401
+ "wasm-bindgen",
1402
+ ]
1403
+
1404
+ [[package]]
1405
+ name = "kaudio"
1406
+ version = "0.2.1"
1407
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1408
+ checksum = "03fa91d027e02814ae876667542dd1b7cf91cb12f511ef29ea00022d7463699e"
1409
+ dependencies = [
1410
+ "byteorder",
1411
+ "futures-util",
1412
+ "ogg",
1413
+ "opus",
1414
+ "regex",
1415
+ "rubato",
1416
+ "serde",
1417
+ "serde_json",
1418
+ "symphonia",
1419
+ "thiserror 2.0.12",
1420
+ "tokio",
1421
+ ]
1422
+
1423
+ [[package]]
1424
+ name = "kyutai-stt-rs"
1425
+ version = "0.1.0"
1426
+ dependencies = [
1427
+ "anyhow",
1428
+ "candle-core",
1429
+ "candle-nn",
1430
+ "candle-transformers",
1431
+ "clap",
1432
+ "hf-hub",
1433
+ "kaudio",
1434
+ "moshi",
1435
+ "sentencepiece",
1436
+ "serde",
1437
+ "serde_json",
1438
+ ]
1439
+
1440
+ [[package]]
1441
+ name = "lazy_static"
1442
+ version = "1.5.0"
1443
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1444
+ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
1445
+
1446
+ [[package]]
1447
+ name = "libc"
1448
+ version = "0.2.174"
1449
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1450
+ checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
1451
+
1452
+ [[package]]
1453
+ name = "libloading"
1454
+ version = "0.8.8"
1455
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1456
+ checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
1457
+ dependencies = [
1458
+ "cfg-if",
1459
+ "windows-targets 0.53.2",
1460
+ ]
1461
+
1462
+ [[package]]
1463
+ name = "libm"
1464
+ version = "0.2.15"
1465
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1466
+ checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
1467
+
1468
+ [[package]]
1469
+ name = "libredox"
1470
+ version = "0.1.3"
1471
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1472
+ checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d"
1473
+ dependencies = [
1474
+ "bitflags 2.9.1",
1475
+ "libc",
1476
+ ]
1477
+
1478
+ [[package]]
1479
+ name = "linux-raw-sys"
1480
+ version = "0.9.4"
1481
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1482
+ checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
1483
+
1484
+ [[package]]
1485
+ name = "litemap"
1486
+ version = "0.8.0"
1487
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1488
+ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
1489
+
1490
+ [[package]]
1491
+ name = "lock_api"
1492
+ version = "0.4.13"
1493
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1494
+ checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
1495
+ dependencies = [
1496
+ "autocfg",
1497
+ "scopeguard",
1498
+ ]
1499
+
1500
+ [[package]]
1501
+ name = "log"
1502
+ version = "0.4.27"
1503
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1504
+ checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
1505
+
1506
+ [[package]]
1507
+ name = "malloc_buf"
1508
+ version = "0.0.6"
1509
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1510
+ checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb"
1511
+ dependencies = [
1512
+ "libc",
1513
+ ]
1514
+
1515
+ [[package]]
1516
+ name = "memchr"
1517
+ version = "2.7.5"
1518
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1519
+ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
1520
+
1521
+ [[package]]
1522
+ name = "memmap2"
1523
+ version = "0.9.5"
1524
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1525
+ checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f"
1526
+ dependencies = [
1527
+ "libc",
1528
+ "stable_deref_trait",
1529
+ ]
1530
+
1531
+ [[package]]
1532
+ name = "metal"
1533
+ version = "0.27.0"
1534
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1535
+ checksum = "c43f73953f8cbe511f021b58f18c3ce1c3d1ae13fe953293e13345bf83217f25"
1536
+ dependencies = [
1537
+ "bitflags 2.9.1",
1538
+ "block",
1539
+ "core-graphics-types",
1540
+ "foreign-types 0.5.0",
1541
+ "log",
1542
+ "objc",
1543
+ "paste",
1544
+ ]
1545
+
1546
+ [[package]]
1547
+ name = "metal"
1548
+ version = "0.29.0"
1549
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1550
+ checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21"
1551
+ dependencies = [
1552
+ "bitflags 2.9.1",
1553
+ "block",
1554
+ "core-graphics-types",
1555
+ "foreign-types 0.5.0",
1556
+ "log",
1557
+ "objc",
1558
+ "paste",
1559
+ ]
1560
+
1561
+ [[package]]
1562
+ name = "mime"
1563
+ version = "0.3.17"
1564
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1565
+ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
1566
+
1567
+ [[package]]
1568
+ name = "miniz_oxide"
1569
+ version = "0.8.9"
1570
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1571
+ checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
1572
+ dependencies = [
1573
+ "adler2",
1574
+ ]
1575
+
1576
+ [[package]]
1577
+ name = "mio"
1578
+ version = "1.0.4"
1579
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1580
+ checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
1581
+ dependencies = [
1582
+ "libc",
1583
+ "wasi 0.11.1+wasi-snapshot-preview1",
1584
+ "windows-sys 0.59.0",
1585
+ ]
1586
+
1587
+ [[package]]
1588
+ name = "moshi"
1589
+ version = "0.6.1"
1590
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1591
+ checksum = "f72457c4b5dfbd77f67af691b470ed92ff2a71908d610498b19c82e638cd8ae2"
1592
+ dependencies = [
1593
+ "candle-core",
1594
+ "candle-nn",
1595
+ "candle-transformers",
1596
+ "rayon",
1597
+ "serde",
1598
+ "tracing",
1599
+ ]
1600
+
1601
+ [[package]]
1602
+ name = "native-tls"
1603
+ version = "0.2.14"
1604
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1605
+ checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e"
1606
+ dependencies = [
1607
+ "libc",
1608
+ "log",
1609
+ "openssl",
1610
+ "openssl-probe",
1611
+ "openssl-sys",
1612
+ "schannel",
1613
+ "security-framework",
1614
+ "security-framework-sys",
1615
+ "tempfile",
1616
+ ]
1617
+
1618
+ [[package]]
1619
+ name = "num"
1620
+ version = "0.4.3"
1621
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1622
+ checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
1623
+ dependencies = [
1624
+ "num-bigint",
1625
+ "num-complex",
1626
+ "num-integer",
1627
+ "num-iter",
1628
+ "num-rational",
1629
+ "num-traits",
1630
+ ]
1631
+
1632
+ [[package]]
1633
+ name = "num-bigint"
1634
+ version = "0.4.6"
1635
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1636
+ checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
1637
+ dependencies = [
1638
+ "num-integer",
1639
+ "num-traits",
1640
+ ]
1641
+
1642
+ [[package]]
1643
+ name = "num-complex"
1644
+ version = "0.4.6"
1645
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1646
+ checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
1647
+ dependencies = [
1648
+ "bytemuck",
1649
+ "num-traits",
1650
+ ]
1651
+
1652
+ [[package]]
1653
+ name = "num-derive"
1654
+ version = "0.4.2"
1655
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1656
+ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
1657
+ dependencies = [
1658
+ "proc-macro2",
1659
+ "quote",
1660
+ "syn 2.0.103",
1661
+ ]
1662
+
1663
+ [[package]]
1664
+ name = "num-integer"
1665
+ version = "0.1.46"
1666
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1667
+ checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
1668
+ dependencies = [
1669
+ "num-traits",
1670
+ ]
1671
+
1672
+ [[package]]
1673
+ name = "num-iter"
1674
+ version = "0.1.45"
1675
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1676
+ checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
1677
+ dependencies = [
1678
+ "autocfg",
1679
+ "num-integer",
1680
+ "num-traits",
1681
+ ]
1682
+
1683
+ [[package]]
1684
+ name = "num-rational"
1685
+ version = "0.4.2"
1686
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1687
+ checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
1688
+ dependencies = [
1689
+ "num-bigint",
1690
+ "num-integer",
1691
+ "num-traits",
1692
+ ]
1693
+
1694
+ [[package]]
1695
+ name = "num-traits"
1696
+ version = "0.2.19"
1697
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1698
+ checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
1699
+ dependencies = [
1700
+ "autocfg",
1701
+ "libm",
1702
+ ]
1703
+
1704
+ [[package]]
1705
+ name = "num_cpus"
1706
+ version = "1.17.0"
1707
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1708
+ checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
1709
+ dependencies = [
1710
+ "hermit-abi",
1711
+ "libc",
1712
+ ]
1713
+
1714
+ [[package]]
1715
+ name = "num_enum"
1716
+ version = "0.7.3"
1717
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1718
+ checksum = "4e613fc340b2220f734a8595782c551f1250e969d87d3be1ae0579e8d4065179"
1719
+ dependencies = [
1720
+ "num_enum_derive",
1721
+ ]
1722
+
1723
+ [[package]]
1724
+ name = "num_enum_derive"
1725
+ version = "0.7.3"
1726
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1727
+ checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56"
1728
+ dependencies = [
1729
+ "proc-macro-crate",
1730
+ "proc-macro2",
1731
+ "quote",
1732
+ "syn 2.0.103",
1733
+ ]
1734
+
1735
+ [[package]]
1736
+ name = "number_prefix"
1737
+ version = "0.4.0"
1738
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1739
+ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
1740
+
1741
+ [[package]]
1742
+ name = "objc"
1743
+ version = "0.2.7"
1744
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1745
+ checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1"
1746
+ dependencies = [
1747
+ "malloc_buf",
1748
+ "objc_exception",
1749
+ ]
1750
+
1751
+ [[package]]
1752
+ name = "objc_exception"
1753
+ version = "0.1.2"
1754
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1755
+ checksum = "ad970fb455818ad6cba4c122ad012fae53ae8b4795f86378bce65e4f6bab2ca4"
1756
+ dependencies = [
1757
+ "cc",
1758
+ ]
1759
+
1760
+ [[package]]
1761
+ name = "object"
1762
+ version = "0.36.7"
1763
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1764
+ checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
1765
+ dependencies = [
1766
+ "memchr",
1767
+ ]
1768
+
1769
+ [[package]]
1770
+ name = "ogg"
1771
+ version = "0.9.2"
1772
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1773
+ checksum = "fdab8dcd8d4052eaacaf8fb07a3ccd9a6e26efadb42878a413c68fc4af1dee2b"
1774
+ dependencies = [
1775
+ "byteorder",
1776
+ "bytes",
1777
+ "futures-core",
1778
+ "futures-io",
1779
+ "pin-project",
1780
+ "tokio",
1781
+ "tokio-util 0.6.10",
1782
+ ]
1783
+
1784
+ [[package]]
1785
+ name = "once_cell"
1786
+ version = "1.21.3"
1787
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1788
+ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
1789
+
1790
+ [[package]]
1791
+ name = "once_cell_polyfill"
1792
+ version = "1.70.1"
1793
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1794
+ checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
1795
+
1796
+ [[package]]
1797
+ name = "openssl"
1798
+ version = "0.10.73"
1799
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1800
+ checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8"
1801
+ dependencies = [
1802
+ "bitflags 2.9.1",
1803
+ "cfg-if",
1804
+ "foreign-types 0.3.2",
1805
+ "libc",
1806
+ "once_cell",
1807
+ "openssl-macros",
1808
+ "openssl-sys",
1809
+ ]
1810
+
1811
+ [[package]]
1812
+ name = "openssl-macros"
1813
+ version = "0.1.1"
1814
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1815
+ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
1816
+ dependencies = [
1817
+ "proc-macro2",
1818
+ "quote",
1819
+ "syn 2.0.103",
1820
+ ]
1821
+
1822
+ [[package]]
1823
+ name = "openssl-probe"
1824
+ version = "0.1.6"
1825
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1826
+ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
1827
+
1828
+ [[package]]
1829
+ name = "openssl-sys"
1830
+ version = "0.9.109"
1831
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1832
+ checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571"
1833
+ dependencies = [
1834
+ "cc",
1835
+ "libc",
1836
+ "pkg-config",
1837
+ "vcpkg",
1838
+ ]
1839
+
1840
+ [[package]]
1841
+ name = "option-ext"
1842
+ version = "0.2.0"
1843
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1844
+ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
1845
+
1846
+ [[package]]
1847
+ name = "opus"
1848
+ version = "0.3.0"
1849
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1850
+ checksum = "6526409b274a7e98e55ff59d96aafd38e6cd34d46b7dbbc32ce126dffcd75e8e"
1851
+ dependencies = [
1852
+ "audiopus_sys",
1853
+ "libc",
1854
+ ]
1855
+
1856
+ [[package]]
1857
+ name = "parking_lot"
1858
+ version = "0.12.4"
1859
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1860
+ checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13"
1861
+ dependencies = [
1862
+ "lock_api",
1863
+ "parking_lot_core",
1864
+ ]
1865
+
1866
+ [[package]]
1867
+ name = "parking_lot_core"
1868
+ version = "0.9.11"
1869
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1870
+ checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
1871
+ dependencies = [
1872
+ "cfg-if",
1873
+ "libc",
1874
+ "redox_syscall",
1875
+ "smallvec",
1876
+ "windows-targets 0.52.6",
1877
+ ]
1878
+
1879
+ [[package]]
1880
+ name = "paste"
1881
+ version = "1.0.15"
1882
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1883
+ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
1884
+
1885
+ [[package]]
1886
+ name = "percent-encoding"
1887
+ version = "2.3.1"
1888
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1889
+ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
1890
+
1891
+ [[package]]
1892
+ name = "pin-project"
1893
+ version = "1.1.10"
1894
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1895
+ checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a"
1896
+ dependencies = [
1897
+ "pin-project-internal",
1898
+ ]
1899
+
1900
+ [[package]]
1901
+ name = "pin-project-internal"
1902
+ version = "1.1.10"
1903
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1904
+ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
1905
+ dependencies = [
1906
+ "proc-macro2",
1907
+ "quote",
1908
+ "syn 2.0.103",
1909
+ ]
1910
+
1911
+ [[package]]
1912
+ name = "pin-project-lite"
1913
+ version = "0.2.16"
1914
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1915
+ checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
1916
+
1917
+ [[package]]
1918
+ name = "pin-utils"
1919
+ version = "0.1.0"
1920
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1921
+ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
1922
+
1923
+ [[package]]
1924
+ name = "pkg-config"
1925
+ version = "0.3.32"
1926
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1927
+ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
1928
+
1929
+ [[package]]
1930
+ name = "portable-atomic"
1931
+ version = "1.11.1"
1932
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1933
+ checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483"
1934
+
1935
+ [[package]]
1936
+ name = "potential_utf"
1937
+ version = "0.1.2"
1938
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1939
+ checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585"
1940
+ dependencies = [
1941
+ "zerovec",
1942
+ ]
1943
+
1944
+ [[package]]
1945
+ name = "ppv-lite86"
1946
+ version = "0.2.21"
1947
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1948
+ checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
1949
+ dependencies = [
1950
+ "zerocopy",
1951
+ ]
1952
+
1953
+ [[package]]
1954
+ name = "primal-check"
1955
+ version = "0.3.4"
1956
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1957
+ checksum = "dc0d895b311e3af9902528fbb8f928688abbd95872819320517cc24ca6b2bd08"
1958
+ dependencies = [
1959
+ "num-integer",
1960
+ ]
1961
+
1962
+ [[package]]
1963
+ name = "proc-macro-crate"
1964
+ version = "3.3.0"
1965
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1966
+ checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35"
1967
+ dependencies = [
1968
+ "toml_edit",
1969
+ ]
1970
+
1971
+ [[package]]
1972
+ name = "proc-macro2"
1973
+ version = "1.0.95"
1974
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1975
+ checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
1976
+ dependencies = [
1977
+ "unicode-ident",
1978
+ ]
1979
+
1980
+ [[package]]
1981
+ name = "prost"
1982
+ version = "0.11.9"
1983
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1984
+ checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd"
1985
+ dependencies = [
1986
+ "bytes",
1987
+ "prost-derive",
1988
+ ]
1989
+
1990
+ [[package]]
1991
+ name = "prost-derive"
1992
+ version = "0.11.9"
1993
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1994
+ checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4"
1995
+ dependencies = [
1996
+ "anyhow",
1997
+ "itertools",
1998
+ "proc-macro2",
1999
+ "quote",
2000
+ "syn 1.0.109",
2001
+ ]
2002
+
2003
+ [[package]]
2004
+ name = "pulp"
2005
+ version = "0.18.22"
2006
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2007
+ checksum = "a0a01a0dc67cf4558d279f0c25b0962bd08fc6dec0137699eae304103e882fe6"
2008
+ dependencies = [
2009
+ "bytemuck",
2010
+ "libm",
2011
+ "num-complex",
2012
+ "reborrow",
2013
+ ]
2014
+
2015
+ [[package]]
2016
+ name = "pulp"
2017
+ version = "0.21.5"
2018
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2019
+ checksum = "96b86df24f0a7ddd5e4b95c94fc9ed8a98f1ca94d3b01bdce2824097e7835907"
2020
+ dependencies = [
2021
+ "bytemuck",
2022
+ "cfg-if",
2023
+ "libm",
2024
+ "num-complex",
2025
+ "reborrow",
2026
+ "version_check",
2027
+ ]
2028
+
2029
+ [[package]]
2030
+ name = "quote"
2031
+ version = "1.0.40"
2032
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2033
+ checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
2034
+ dependencies = [
2035
+ "proc-macro2",
2036
+ ]
2037
+
2038
+ [[package]]
2039
+ name = "r-efi"
2040
+ version = "5.3.0"
2041
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2042
+ checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
2043
+
2044
+ [[package]]
2045
+ name = "rand"
2046
+ version = "0.9.1"
2047
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2048
+ checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
2049
+ dependencies = [
2050
+ "rand_chacha",
2051
+ "rand_core",
2052
+ ]
2053
+
2054
+ [[package]]
2055
+ name = "rand_chacha"
2056
+ version = "0.9.0"
2057
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2058
+ checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
2059
+ dependencies = [
2060
+ "ppv-lite86",
2061
+ "rand_core",
2062
+ ]
2063
+
2064
+ [[package]]
2065
+ name = "rand_core"
2066
+ version = "0.9.3"
2067
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2068
+ checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
2069
+ dependencies = [
2070
+ "getrandom 0.3.3",
2071
+ ]
2072
+
2073
+ [[package]]
2074
+ name = "rand_distr"
2075
+ version = "0.5.1"
2076
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2077
+ checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463"
2078
+ dependencies = [
2079
+ "num-traits",
2080
+ "rand",
2081
+ ]
2082
+
2083
+ [[package]]
2084
+ name = "raw-cpuid"
2085
+ version = "10.7.0"
2086
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2087
+ checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332"
2088
+ dependencies = [
2089
+ "bitflags 1.3.2",
2090
+ ]
2091
+
2092
+ [[package]]
2093
+ name = "raw-cpuid"
2094
+ version = "11.5.0"
2095
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2096
+ checksum = "c6df7ab838ed27997ba19a4664507e6f82b41fe6e20be42929332156e5e85146"
2097
+ dependencies = [
2098
+ "bitflags 2.9.1",
2099
+ ]
2100
+
2101
+ [[package]]
2102
+ name = "rayon"
2103
+ version = "1.10.0"
2104
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2105
+ checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
2106
+ dependencies = [
2107
+ "either",
2108
+ "rayon-core",
2109
+ ]
2110
+
2111
+ [[package]]
2112
+ name = "rayon-core"
2113
+ version = "1.12.1"
2114
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2115
+ checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
2116
+ dependencies = [
2117
+ "crossbeam-deque",
2118
+ "crossbeam-utils",
2119
+ ]
2120
+
2121
+ [[package]]
2122
+ name = "realfft"
2123
+ version = "3.5.0"
2124
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2125
+ checksum = "f821338fddb99d089116342c46e9f1fbf3828dba077674613e734e01d6ea8677"
2126
+ dependencies = [
2127
+ "rustfft",
2128
+ ]
2129
+
2130
+ [[package]]
2131
+ name = "reborrow"
2132
+ version = "0.5.5"
2133
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2134
+ checksum = "03251193000f4bd3b042892be858ee50e8b3719f2b08e5833ac4353724632430"
2135
+
2136
+ [[package]]
2137
+ name = "redox_syscall"
2138
+ version = "0.5.13"
2139
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2140
+ checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6"
2141
+ dependencies = [
2142
+ "bitflags 2.9.1",
2143
+ ]
2144
+
2145
+ [[package]]
2146
+ name = "redox_users"
2147
+ version = "0.5.0"
2148
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2149
+ checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b"
2150
+ dependencies = [
2151
+ "getrandom 0.2.16",
2152
+ "libredox",
2153
+ "thiserror 2.0.12",
2154
+ ]
2155
+
2156
+ [[package]]
2157
+ name = "regex"
2158
+ version = "1.11.1"
2159
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2160
+ checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
2161
+ dependencies = [
2162
+ "aho-corasick",
2163
+ "memchr",
2164
+ "regex-automata",
2165
+ "regex-syntax",
2166
+ ]
2167
+
2168
+ [[package]]
2169
+ name = "regex-automata"
2170
+ version = "0.4.9"
2171
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2172
+ checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
2173
+ dependencies = [
2174
+ "aho-corasick",
2175
+ "memchr",
2176
+ "regex-syntax",
2177
+ ]
2178
+
2179
+ [[package]]
2180
+ name = "regex-syntax"
2181
+ version = "0.8.5"
2182
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2183
+ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
2184
+
2185
+ [[package]]
2186
+ name = "reqwest"
2187
+ version = "0.12.20"
2188
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2189
+ checksum = "eabf4c97d9130e2bf606614eb937e86edac8292eaa6f422f995d7e8de1eb1813"
2190
+ dependencies = [
2191
+ "base64",
2192
+ "bytes",
2193
+ "encoding_rs",
2194
+ "futures-core",
2195
+ "futures-util",
2196
+ "h2",
2197
+ "http",
2198
+ "http-body",
2199
+ "http-body-util",
2200
+ "hyper",
2201
+ "hyper-rustls",
2202
+ "hyper-tls",
2203
+ "hyper-util",
2204
+ "js-sys",
2205
+ "log",
2206
+ "mime",
2207
+ "native-tls",
2208
+ "percent-encoding",
2209
+ "pin-project-lite",
2210
+ "rustls-pki-types",
2211
+ "serde",
2212
+ "serde_json",
2213
+ "serde_urlencoded",
2214
+ "sync_wrapper",
2215
+ "tokio",
2216
+ "tokio-native-tls",
2217
+ "tokio-util 0.7.15",
2218
+ "tower",
2219
+ "tower-http",
2220
+ "tower-service",
2221
+ "url",
2222
+ "wasm-bindgen",
2223
+ "wasm-bindgen-futures",
2224
+ "wasm-streams",
2225
+ "web-sys",
2226
+ ]
2227
+
2228
+ [[package]]
2229
+ name = "ring"
2230
+ version = "0.17.14"
2231
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2232
+ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
2233
+ dependencies = [
2234
+ "cc",
2235
+ "cfg-if",
2236
+ "getrandom 0.2.16",
2237
+ "libc",
2238
+ "untrusted",
2239
+ "windows-sys 0.52.0",
2240
+ ]
2241
+
2242
+ [[package]]
2243
+ name = "rubato"
2244
+ version = "0.15.0"
2245
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2246
+ checksum = "b5d18b486e7d29a408ef3f825bc1327d8f87af091c987ca2f5b734625940e234"
2247
+ dependencies = [
2248
+ "num-complex",
2249
+ "num-integer",
2250
+ "num-traits",
2251
+ "realfft",
2252
+ ]
2253
+
2254
+ [[package]]
2255
+ name = "rustc-demangle"
2256
+ version = "0.1.25"
2257
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2258
+ checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"
2259
+
2260
+ [[package]]
2261
+ name = "rustfft"
2262
+ version = "6.4.0"
2263
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2264
+ checksum = "c6f140db74548f7c9d7cce60912c9ac414e74df5e718dc947d514b051b42f3f4"
2265
+ dependencies = [
2266
+ "num-complex",
2267
+ "num-integer",
2268
+ "num-traits",
2269
+ "primal-check",
2270
+ "strength_reduce",
2271
+ "transpose",
2272
+ ]
2273
+
2274
+ [[package]]
2275
+ name = "rustix"
2276
+ version = "1.0.7"
2277
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2278
+ checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266"
2279
+ dependencies = [
2280
+ "bitflags 2.9.1",
2281
+ "errno",
2282
+ "libc",
2283
+ "linux-raw-sys",
2284
+ "windows-sys 0.59.0",
2285
+ ]
2286
+
2287
+ [[package]]
2288
+ name = "rustls"
2289
+ version = "0.23.28"
2290
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2291
+ checksum = "7160e3e10bf4535308537f3c4e1641468cd0e485175d6163087c0393c7d46643"
2292
+ dependencies = [
2293
+ "log",
2294
+ "once_cell",
2295
+ "ring",
2296
+ "rustls-pki-types",
2297
+ "rustls-webpki",
2298
+ "subtle",
2299
+ "zeroize",
2300
+ ]
2301
+
2302
+ [[package]]
2303
+ name = "rustls-pki-types"
2304
+ version = "1.12.0"
2305
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2306
+ checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79"
2307
+ dependencies = [
2308
+ "zeroize",
2309
+ ]
2310
+
2311
+ [[package]]
2312
+ name = "rustls-webpki"
2313
+ version = "0.103.3"
2314
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2315
+ checksum = "e4a72fe2bcf7a6ac6fd7d0b9e5cb68aeb7d4c0a0271730218b3e92d43b4eb435"
2316
+ dependencies = [
2317
+ "ring",
2318
+ "rustls-pki-types",
2319
+ "untrusted",
2320
+ ]
2321
+
2322
+ [[package]]
2323
+ name = "rustversion"
2324
+ version = "1.0.21"
2325
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2326
+ checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d"
2327
+
2328
+ [[package]]
2329
+ name = "ryu"
2330
+ version = "1.0.20"
2331
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2332
+ checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
2333
+
2334
+ [[package]]
2335
+ name = "safetensors"
2336
+ version = "0.4.5"
2337
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2338
+ checksum = "44560c11236a6130a46ce36c836a62936dc81ebf8c36a37947423571be0e55b6"
2339
+ dependencies = [
2340
+ "serde",
2341
+ "serde_json",
2342
+ ]
2343
+
2344
+ [[package]]
2345
+ name = "same-file"
2346
+ version = "1.0.6"
2347
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2348
+ checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
2349
+ dependencies = [
2350
+ "winapi-util",
2351
+ ]
2352
+
2353
+ [[package]]
2354
+ name = "schannel"
2355
+ version = "0.1.27"
2356
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2357
+ checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d"
2358
+ dependencies = [
2359
+ "windows-sys 0.59.0",
2360
+ ]
2361
+
2362
+ [[package]]
2363
+ name = "scopeguard"
2364
+ version = "1.2.0"
2365
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2366
+ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
2367
+
2368
+ [[package]]
2369
+ name = "security-framework"
2370
+ version = "2.11.1"
2371
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2372
+ checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
2373
+ dependencies = [
2374
+ "bitflags 2.9.1",
2375
+ "core-foundation",
2376
+ "core-foundation-sys",
2377
+ "libc",
2378
+ "security-framework-sys",
2379
+ ]
2380
+
2381
+ [[package]]
2382
+ name = "security-framework-sys"
2383
+ version = "2.14.0"
2384
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2385
+ checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32"
2386
+ dependencies = [
2387
+ "core-foundation-sys",
2388
+ "libc",
2389
+ ]
2390
+
2391
+ [[package]]
2392
+ name = "sentencepiece"
2393
+ version = "0.11.3"
2394
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2395
+ checksum = "286451da14703923eeb9d5e9d7717a15cbf236c037923fb7a6ff911ca45f4124"
2396
+ dependencies = [
2397
+ "libc",
2398
+ "num-derive",
2399
+ "num-traits",
2400
+ "prost",
2401
+ "prost-derive",
2402
+ "sentencepiece-sys",
2403
+ "thiserror 1.0.69",
2404
+ ]
2405
+
2406
+ [[package]]
2407
+ name = "sentencepiece-sys"
2408
+ version = "0.11.3"
2409
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2410
+ checksum = "a721500103a50c284cd3908cca6c435fcc6a260a1cead830a040f904a89234fb"
2411
+ dependencies = [
2412
+ "cc",
2413
+ "cmake",
2414
+ "pkg-config",
2415
+ ]
2416
+
2417
+ [[package]]
2418
+ name = "seq-macro"
2419
+ version = "0.3.6"
2420
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2421
+ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
2422
+
2423
+ [[package]]
2424
+ name = "serde"
2425
+ version = "1.0.219"
2426
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2427
+ checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
2428
+ dependencies = [
2429
+ "serde_derive",
2430
+ ]
2431
+
2432
+ [[package]]
2433
+ name = "serde_derive"
2434
+ version = "1.0.219"
2435
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2436
+ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
2437
+ dependencies = [
2438
+ "proc-macro2",
2439
+ "quote",
2440
+ "syn 2.0.103",
2441
+ ]
2442
+
2443
+ [[package]]
2444
+ name = "serde_json"
2445
+ version = "1.0.140"
2446
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2447
+ checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
2448
+ dependencies = [
2449
+ "itoa",
2450
+ "memchr",
2451
+ "ryu",
2452
+ "serde",
2453
+ ]
2454
+
2455
+ [[package]]
2456
+ name = "serde_plain"
2457
+ version = "1.0.2"
2458
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2459
+ checksum = "9ce1fc6db65a611022b23a0dec6975d63fb80a302cb3388835ff02c097258d50"
2460
+ dependencies = [
2461
+ "serde",
2462
+ ]
2463
+
2464
+ [[package]]
2465
+ name = "serde_urlencoded"
2466
+ version = "0.7.1"
2467
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2468
+ checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
2469
+ dependencies = [
2470
+ "form_urlencoded",
2471
+ "itoa",
2472
+ "ryu",
2473
+ "serde",
2474
+ ]
2475
+
2476
+ [[package]]
2477
+ name = "shlex"
2478
+ version = "1.3.0"
2479
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2480
+ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
2481
+
2482
+ [[package]]
2483
+ name = "signal-hook-registry"
2484
+ version = "1.4.5"
2485
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2486
+ checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410"
2487
+ dependencies = [
2488
+ "libc",
2489
+ ]
2490
+
2491
+ [[package]]
2492
+ name = "slab"
2493
+ version = "0.4.10"
2494
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2495
+ checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d"
2496
+
2497
+ [[package]]
2498
+ name = "smallvec"
2499
+ version = "1.15.1"
2500
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2501
+ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
2502
+
2503
+ [[package]]
2504
+ name = "socket2"
2505
+ version = "0.5.10"
2506
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2507
+ checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
2508
+ dependencies = [
2509
+ "libc",
2510
+ "windows-sys 0.52.0",
2511
+ ]
2512
+
2513
+ [[package]]
2514
+ name = "socks"
2515
+ version = "0.3.4"
2516
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2517
+ checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b"
2518
+ dependencies = [
2519
+ "byteorder",
2520
+ "libc",
2521
+ "winapi",
2522
+ ]
2523
+
2524
+ [[package]]
2525
+ name = "stable_deref_trait"
2526
+ version = "1.2.0"
2527
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2528
+ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
2529
+
2530
+ [[package]]
2531
+ name = "strength_reduce"
2532
+ version = "0.2.4"
2533
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2534
+ checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82"
2535
+
2536
+ [[package]]
2537
+ name = "strsim"
2538
+ version = "0.11.1"
2539
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2540
+ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
2541
+
2542
+ [[package]]
2543
+ name = "subtle"
2544
+ version = "2.6.1"
2545
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2546
+ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
2547
+
2548
+ [[package]]
2549
+ name = "symphonia"
2550
+ version = "0.5.4"
2551
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2552
+ checksum = "815c942ae7ee74737bb00f965fa5b5a2ac2ce7b6c01c0cc169bbeaf7abd5f5a9"
2553
+ dependencies = [
2554
+ "lazy_static",
2555
+ "symphonia-bundle-flac",
2556
+ "symphonia-bundle-mp3",
2557
+ "symphonia-codec-aac",
2558
+ "symphonia-codec-adpcm",
2559
+ "symphonia-codec-alac",
2560
+ "symphonia-codec-pcm",
2561
+ "symphonia-codec-vorbis",
2562
+ "symphonia-core",
2563
+ "symphonia-format-caf",
2564
+ "symphonia-format-isomp4",
2565
+ "symphonia-format-mkv",
2566
+ "symphonia-format-ogg",
2567
+ "symphonia-format-riff",
2568
+ "symphonia-metadata",
2569
+ ]
2570
+
2571
+ [[package]]
2572
+ name = "symphonia-bundle-flac"
2573
+ version = "0.5.4"
2574
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2575
+ checksum = "72e34f34298a7308d4397a6c7fbf5b84c5d491231ce3dd379707ba673ab3bd97"
2576
+ dependencies = [
2577
+ "log",
2578
+ "symphonia-core",
2579
+ "symphonia-metadata",
2580
+ "symphonia-utils-xiph",
2581
+ ]
2582
+
2583
+ [[package]]
2584
+ name = "symphonia-bundle-mp3"
2585
+ version = "0.5.4"
2586
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2587
+ checksum = "c01c2aae70f0f1fb096b6f0ff112a930b1fb3626178fba3ae68b09dce71706d4"
2588
+ dependencies = [
2589
+ "lazy_static",
2590
+ "log",
2591
+ "symphonia-core",
2592
+ "symphonia-metadata",
2593
+ ]
2594
+
2595
+ [[package]]
2596
+ name = "symphonia-codec-aac"
2597
+ version = "0.5.4"
2598
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2599
+ checksum = "cdbf25b545ad0d3ee3e891ea643ad115aff4ca92f6aec472086b957a58522f70"
2600
+ dependencies = [
2601
+ "lazy_static",
2602
+ "log",
2603
+ "symphonia-core",
2604
+ ]
2605
+
2606
+ [[package]]
2607
+ name = "symphonia-codec-adpcm"
2608
+ version = "0.5.4"
2609
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2610
+ checksum = "c94e1feac3327cd616e973d5be69ad36b3945f16b06f19c6773fc3ac0b426a0f"
2611
+ dependencies = [
2612
+ "log",
2613
+ "symphonia-core",
2614
+ ]
2615
+
2616
+ [[package]]
2617
+ name = "symphonia-codec-alac"
2618
+ version = "0.5.4"
2619
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2620
+ checksum = "2d8a6666649a08412906476a8b0efd9b9733e241180189e9f92b09c08d0e38f3"
2621
+ dependencies = [
2622
+ "log",
2623
+ "symphonia-core",
2624
+ ]
2625
+
2626
+ [[package]]
2627
+ name = "symphonia-codec-pcm"
2628
+ version = "0.5.4"
2629
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2630
+ checksum = "f395a67057c2ebc5e84d7bb1be71cce1a7ba99f64e0f0f0e303a03f79116f89b"
2631
+ dependencies = [
2632
+ "log",
2633
+ "symphonia-core",
2634
+ ]
2635
+
2636
+ [[package]]
2637
+ name = "symphonia-codec-vorbis"
2638
+ version = "0.5.4"
2639
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2640
+ checksum = "5a98765fb46a0a6732b007f7e2870c2129b6f78d87db7987e6533c8f164a9f30"
2641
+ dependencies = [
2642
+ "log",
2643
+ "symphonia-core",
2644
+ "symphonia-utils-xiph",
2645
+ ]
2646
+
2647
+ [[package]]
2648
+ name = "symphonia-core"
2649
+ version = "0.5.4"
2650
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2651
+ checksum = "798306779e3dc7d5231bd5691f5a813496dc79d3f56bf82e25789f2094e022c3"
2652
+ dependencies = [
2653
+ "arrayvec",
2654
+ "bitflags 1.3.2",
2655
+ "bytemuck",
2656
+ "lazy_static",
2657
+ "log",
2658
+ ]
2659
+
2660
+ [[package]]
2661
+ name = "symphonia-format-caf"
2662
+ version = "0.5.4"
2663
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2664
+ checksum = "e43c99c696a388295a29fe71b133079f5d8b18041cf734c5459c35ad9097af50"
2665
+ dependencies = [
2666
+ "log",
2667
+ "symphonia-core",
2668
+ "symphonia-metadata",
2669
+ ]
2670
+
2671
+ [[package]]
2672
+ name = "symphonia-format-isomp4"
2673
+ version = "0.5.4"
2674
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2675
+ checksum = "abfdf178d697e50ce1e5d9b982ba1b94c47218e03ec35022d9f0e071a16dc844"
2676
+ dependencies = [
2677
+ "encoding_rs",
2678
+ "log",
2679
+ "symphonia-core",
2680
+ "symphonia-metadata",
2681
+ "symphonia-utils-xiph",
2682
+ ]
2683
+
2684
+ [[package]]
2685
+ name = "symphonia-format-mkv"
2686
+ version = "0.5.4"
2687
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2688
+ checksum = "1bb43471a100f7882dc9937395bd5ebee8329298e766250b15b3875652fe3d6f"
2689
+ dependencies = [
2690
+ "lazy_static",
2691
+ "log",
2692
+ "symphonia-core",
2693
+ "symphonia-metadata",
2694
+ "symphonia-utils-xiph",
2695
+ ]
2696
+
2697
+ [[package]]
2698
+ name = "symphonia-format-ogg"
2699
+ version = "0.5.4"
2700
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2701
+ checksum = "ada3505789516bcf00fc1157c67729eded428b455c27ca370e41f4d785bfa931"
2702
+ dependencies = [
2703
+ "log",
2704
+ "symphonia-core",
2705
+ "symphonia-metadata",
2706
+ "symphonia-utils-xiph",
2707
+ ]
2708
+
2709
+ [[package]]
2710
+ name = "symphonia-format-riff"
2711
+ version = "0.5.4"
2712
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2713
+ checksum = "05f7be232f962f937f4b7115cbe62c330929345434c834359425e043bfd15f50"
2714
+ dependencies = [
2715
+ "extended",
2716
+ "log",
2717
+ "symphonia-core",
2718
+ "symphonia-metadata",
2719
+ ]
2720
+
2721
+ [[package]]
2722
+ name = "symphonia-metadata"
2723
+ version = "0.5.4"
2724
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2725
+ checksum = "bc622b9841a10089c5b18e99eb904f4341615d5aa55bbf4eedde1be721a4023c"
2726
+ dependencies = [
2727
+ "encoding_rs",
2728
+ "lazy_static",
2729
+ "log",
2730
+ "symphonia-core",
2731
+ ]
2732
+
2733
+ [[package]]
2734
+ name = "symphonia-utils-xiph"
2735
+ version = "0.5.4"
2736
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2737
+ checksum = "484472580fa49991afda5f6550ece662237b00c6f562c7d9638d1b086ed010fe"
2738
+ dependencies = [
2739
+ "symphonia-core",
2740
+ "symphonia-metadata",
2741
+ ]
2742
+
2743
+ [[package]]
2744
+ name = "syn"
2745
+ version = "1.0.109"
2746
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2747
+ checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
2748
+ dependencies = [
2749
+ "proc-macro2",
2750
+ "quote",
2751
+ "unicode-ident",
2752
+ ]
2753
+
2754
+ [[package]]
2755
+ name = "syn"
2756
+ version = "2.0.103"
2757
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2758
+ checksum = "e4307e30089d6fd6aff212f2da3a1f9e32f3223b1f010fb09b7c95f90f3ca1e8"
2759
+ dependencies = [
2760
+ "proc-macro2",
2761
+ "quote",
2762
+ "unicode-ident",
2763
+ ]
2764
+
2765
+ [[package]]
2766
+ name = "sync_wrapper"
2767
+ version = "1.0.2"
2768
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2769
+ checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
2770
+ dependencies = [
2771
+ "futures-core",
2772
+ ]
2773
+
2774
+ [[package]]
2775
+ name = "synstructure"
2776
+ version = "0.13.2"
2777
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2778
+ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
2779
+ dependencies = [
2780
+ "proc-macro2",
2781
+ "quote",
2782
+ "syn 2.0.103",
2783
+ ]
2784
+
2785
+ [[package]]
2786
+ name = "sysctl"
2787
+ version = "0.5.5"
2788
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2789
+ checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea"
2790
+ dependencies = [
2791
+ "bitflags 2.9.1",
2792
+ "byteorder",
2793
+ "enum-as-inner",
2794
+ "libc",
2795
+ "thiserror 1.0.69",
2796
+ "walkdir",
2797
+ ]
2798
+
2799
+ [[package]]
2800
+ name = "sysctl"
2801
+ version = "0.6.0"
2802
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2803
+ checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc"
2804
+ dependencies = [
2805
+ "bitflags 2.9.1",
2806
+ "byteorder",
2807
+ "enum-as-inner",
2808
+ "libc",
2809
+ "thiserror 1.0.69",
2810
+ "walkdir",
2811
+ ]
2812
+
2813
+ [[package]]
2814
+ name = "system-configuration"
2815
+ version = "0.6.1"
2816
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2817
+ checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
2818
+ dependencies = [
2819
+ "bitflags 2.9.1",
2820
+ "core-foundation",
2821
+ "system-configuration-sys",
2822
+ ]
2823
+
2824
+ [[package]]
2825
+ name = "system-configuration-sys"
2826
+ version = "0.6.0"
2827
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2828
+ checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4"
2829
+ dependencies = [
2830
+ "core-foundation-sys",
2831
+ "libc",
2832
+ ]
2833
+
2834
+ [[package]]
2835
+ name = "tempfile"
2836
+ version = "3.20.0"
2837
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2838
+ checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1"
2839
+ dependencies = [
2840
+ "fastrand",
2841
+ "getrandom 0.3.3",
2842
+ "once_cell",
2843
+ "rustix",
2844
+ "windows-sys 0.59.0",
2845
+ ]
2846
+
2847
+ [[package]]
2848
+ name = "thiserror"
2849
+ version = "1.0.69"
2850
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2851
+ checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
2852
+ dependencies = [
2853
+ "thiserror-impl 1.0.69",
2854
+ ]
2855
+
2856
+ [[package]]
2857
+ name = "thiserror"
2858
+ version = "2.0.12"
2859
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2860
+ checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708"
2861
+ dependencies = [
2862
+ "thiserror-impl 2.0.12",
2863
+ ]
2864
+
2865
+ [[package]]
2866
+ name = "thiserror-impl"
2867
+ version = "1.0.69"
2868
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2869
+ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
2870
+ dependencies = [
2871
+ "proc-macro2",
2872
+ "quote",
2873
+ "syn 2.0.103",
2874
+ ]
2875
+
2876
+ [[package]]
2877
+ name = "thiserror-impl"
2878
+ version = "2.0.12"
2879
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2880
+ checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d"
2881
+ dependencies = [
2882
+ "proc-macro2",
2883
+ "quote",
2884
+ "syn 2.0.103",
2885
+ ]
2886
+
2887
+ [[package]]
2888
+ name = "tinystr"
2889
+ version = "0.8.1"
2890
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2891
+ checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b"
2892
+ dependencies = [
2893
+ "displaydoc",
2894
+ "zerovec",
2895
+ ]
2896
+
2897
+ [[package]]
2898
+ name = "tokio"
2899
+ version = "1.45.1"
2900
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2901
+ checksum = "75ef51a33ef1da925cea3e4eb122833cb377c61439ca401b770f54902b806779"
2902
+ dependencies = [
2903
+ "backtrace",
2904
+ "bytes",
2905
+ "libc",
2906
+ "mio",
2907
+ "parking_lot",
2908
+ "pin-project-lite",
2909
+ "signal-hook-registry",
2910
+ "socket2",
2911
+ "tokio-macros",
2912
+ "windows-sys 0.52.0",
2913
+ ]
2914
+
2915
+ [[package]]
2916
+ name = "tokio-macros"
2917
+ version = "2.5.0"
2918
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2919
+ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
2920
+ dependencies = [
2921
+ "proc-macro2",
2922
+ "quote",
2923
+ "syn 2.0.103",
2924
+ ]
2925
+
2926
+ [[package]]
2927
+ name = "tokio-native-tls"
2928
+ version = "0.3.1"
2929
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2930
+ checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
2931
+ dependencies = [
2932
+ "native-tls",
2933
+ "tokio",
2934
+ ]
2935
+
2936
+ [[package]]
2937
+ name = "tokio-rustls"
2938
+ version = "0.26.2"
2939
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2940
+ checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b"
2941
+ dependencies = [
2942
+ "rustls",
2943
+ "tokio",
2944
+ ]
2945
+
2946
+ [[package]]
2947
+ name = "tokio-util"
2948
+ version = "0.6.10"
2949
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2950
+ checksum = "36943ee01a6d67977dd3f84a5a1d2efeb4ada3a1ae771cadfaa535d9d9fc6507"
2951
+ dependencies = [
2952
+ "bytes",
2953
+ "futures-core",
2954
+ "futures-io",
2955
+ "futures-sink",
2956
+ "log",
2957
+ "pin-project-lite",
2958
+ "tokio",
2959
+ ]
2960
+
2961
+ [[package]]
2962
+ name = "tokio-util"
2963
+ version = "0.7.15"
2964
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2965
+ checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df"
2966
+ dependencies = [
2967
+ "bytes",
2968
+ "futures-core",
2969
+ "futures-sink",
2970
+ "pin-project-lite",
2971
+ "tokio",
2972
+ ]
2973
+
2974
+ [[package]]
2975
+ name = "toml_datetime"
2976
+ version = "0.6.11"
2977
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2978
+ checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
2979
+
2980
+ [[package]]
2981
+ name = "toml_edit"
2982
+ version = "0.22.27"
2983
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2984
+ checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
2985
+ dependencies = [
2986
+ "indexmap",
2987
+ "toml_datetime",
2988
+ "winnow",
2989
+ ]
2990
+
2991
+ [[package]]
2992
+ name = "tower"
2993
+ version = "0.5.2"
2994
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2995
+ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
2996
+ dependencies = [
2997
+ "futures-core",
2998
+ "futures-util",
2999
+ "pin-project-lite",
3000
+ "sync_wrapper",
3001
+ "tokio",
3002
+ "tower-layer",
3003
+ "tower-service",
3004
+ ]
3005
+
3006
+ [[package]]
3007
+ name = "tower-http"
3008
+ version = "0.6.6"
3009
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3010
+ checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2"
3011
+ dependencies = [
3012
+ "bitflags 2.9.1",
3013
+ "bytes",
3014
+ "futures-util",
3015
+ "http",
3016
+ "http-body",
3017
+ "iri-string",
3018
+ "pin-project-lite",
3019
+ "tower",
3020
+ "tower-layer",
3021
+ "tower-service",
3022
+ ]
3023
+
3024
+ [[package]]
3025
+ name = "tower-layer"
3026
+ version = "0.3.3"
3027
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3028
+ checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
3029
+
3030
+ [[package]]
3031
+ name = "tower-service"
3032
+ version = "0.3.3"
3033
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3034
+ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
3035
+
3036
+ [[package]]
3037
+ name = "tracing"
3038
+ version = "0.1.41"
3039
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3040
+ checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
3041
+ dependencies = [
3042
+ "pin-project-lite",
3043
+ "tracing-attributes",
3044
+ "tracing-core",
3045
+ ]
3046
+
3047
+ [[package]]
3048
+ name = "tracing-attributes"
3049
+ version = "0.1.30"
3050
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3051
+ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
3052
+ dependencies = [
3053
+ "proc-macro2",
3054
+ "quote",
3055
+ "syn 2.0.103",
3056
+ ]
3057
+
3058
+ [[package]]
3059
+ name = "tracing-core"
3060
+ version = "0.1.34"
3061
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3062
+ checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678"
3063
+ dependencies = [
3064
+ "once_cell",
3065
+ ]
3066
+
3067
+ [[package]]
3068
+ name = "transpose"
3069
+ version = "0.2.3"
3070
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3071
+ checksum = "1ad61aed86bc3faea4300c7aee358b4c6d0c8d6ccc36524c96e4c92ccf26e77e"
3072
+ dependencies = [
3073
+ "num-integer",
3074
+ "strength_reduce",
3075
+ ]
3076
+
3077
+ [[package]]
3078
+ name = "try-lock"
3079
+ version = "0.2.5"
3080
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3081
+ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
3082
+
3083
+ [[package]]
3084
+ name = "ug"
3085
+ version = "0.4.0"
3086
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3087
+ checksum = "90b70b37e9074642bc5f60bb23247fd072a84314ca9e71cdf8527593406a0dd3"
3088
+ dependencies = [
3089
+ "gemm 0.18.2",
3090
+ "half",
3091
+ "libloading",
3092
+ "memmap2",
3093
+ "num",
3094
+ "num-traits",
3095
+ "num_cpus",
3096
+ "rayon",
3097
+ "safetensors",
3098
+ "serde",
3099
+ "thiserror 1.0.69",
3100
+ "tracing",
3101
+ "yoke 0.7.5",
3102
+ ]
3103
+
3104
+ [[package]]
3105
+ name = "ug-cuda"
3106
+ version = "0.4.0"
3107
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3108
+ checksum = "14053653d0b7fa7b21015aa9a62edc8af2f60aa6f9c54e66386ecce55f22ed29"
3109
+ dependencies = [
3110
+ "cudarc",
3111
+ "half",
3112
+ "serde",
3113
+ "thiserror 1.0.69",
3114
+ "ug",
3115
+ ]
3116
+
3117
+ [[package]]
3118
+ name = "ug-metal"
3119
+ version = "0.4.0"
3120
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3121
+ checksum = "76daec3c7a32a1b4a0e3307b6b057fa067aa64e750713987410a2c402e5cd731"
3122
+ dependencies = [
3123
+ "half",
3124
+ "metal 0.29.0",
3125
+ "objc",
3126
+ "serde",
3127
+ "thiserror 1.0.69",
3128
+ "ug",
3129
+ ]
3130
+
3131
+ [[package]]
3132
+ name = "unicode-ident"
3133
+ version = "1.0.18"
3134
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3135
+ checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
3136
+
3137
+ [[package]]
3138
+ name = "unicode-width"
3139
+ version = "0.2.1"
3140
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3141
+ checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c"
3142
+
3143
+ [[package]]
3144
+ name = "untrusted"
3145
+ version = "0.9.0"
3146
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3147
+ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
3148
+
3149
+ [[package]]
3150
+ name = "ureq"
3151
+ version = "2.12.1"
3152
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3153
+ checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d"
3154
+ dependencies = [
3155
+ "base64",
3156
+ "flate2",
3157
+ "log",
3158
+ "native-tls",
3159
+ "once_cell",
3160
+ "rustls",
3161
+ "rustls-pki-types",
3162
+ "serde",
3163
+ "serde_json",
3164
+ "socks",
3165
+ "url",
3166
+ "webpki-roots 0.26.11",
3167
+ ]
3168
+
3169
+ [[package]]
3170
+ name = "url"
3171
+ version = "2.5.4"
3172
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3173
+ checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60"
3174
+ dependencies = [
3175
+ "form_urlencoded",
3176
+ "idna",
3177
+ "percent-encoding",
3178
+ ]
3179
+
3180
+ [[package]]
3181
+ name = "utf8_iter"
3182
+ version = "1.0.4"
3183
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3184
+ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
3185
+
3186
+ [[package]]
3187
+ name = "utf8parse"
3188
+ version = "0.2.2"
3189
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3190
+ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
3191
+
3192
+ [[package]]
3193
+ name = "vcpkg"
3194
+ version = "0.2.15"
3195
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3196
+ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
3197
+
3198
+ [[package]]
3199
+ name = "version_check"
3200
+ version = "0.9.5"
3201
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3202
+ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
3203
+
3204
+ [[package]]
3205
+ name = "walkdir"
3206
+ version = "2.5.0"
3207
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3208
+ checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
3209
+ dependencies = [
3210
+ "same-file",
3211
+ "winapi-util",
3212
+ ]
3213
+
3214
+ [[package]]
3215
+ name = "want"
3216
+ version = "0.3.1"
3217
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3218
+ checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
3219
+ dependencies = [
3220
+ "try-lock",
3221
+ ]
3222
+
3223
+ [[package]]
3224
+ name = "wasi"
3225
+ version = "0.11.1+wasi-snapshot-preview1"
3226
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3227
+ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
3228
+
3229
+ [[package]]
3230
+ name = "wasi"
3231
+ version = "0.14.2+wasi-0.2.4"
3232
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3233
+ checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
3234
+ dependencies = [
3235
+ "wit-bindgen-rt",
3236
+ ]
3237
+
3238
+ [[package]]
3239
+ name = "wasm-bindgen"
3240
+ version = "0.2.100"
3241
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3242
+ checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
3243
+ dependencies = [
3244
+ "cfg-if",
3245
+ "once_cell",
3246
+ "rustversion",
3247
+ "wasm-bindgen-macro",
3248
+ ]
3249
+
3250
+ [[package]]
3251
+ name = "wasm-bindgen-backend"
3252
+ version = "0.2.100"
3253
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3254
+ checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
3255
+ dependencies = [
3256
+ "bumpalo",
3257
+ "log",
3258
+ "proc-macro2",
3259
+ "quote",
3260
+ "syn 2.0.103",
3261
+ "wasm-bindgen-shared",
3262
+ ]
3263
+
3264
+ [[package]]
3265
+ name = "wasm-bindgen-futures"
3266
+ version = "0.4.50"
3267
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3268
+ checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
3269
+ dependencies = [
3270
+ "cfg-if",
3271
+ "js-sys",
3272
+ "once_cell",
3273
+ "wasm-bindgen",
3274
+ "web-sys",
3275
+ ]
3276
+
3277
+ [[package]]
3278
+ name = "wasm-bindgen-macro"
3279
+ version = "0.2.100"
3280
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3281
+ checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
3282
+ dependencies = [
3283
+ "quote",
3284
+ "wasm-bindgen-macro-support",
3285
+ ]
3286
+
3287
+ [[package]]
3288
+ name = "wasm-bindgen-macro-support"
3289
+ version = "0.2.100"
3290
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3291
+ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
3292
+ dependencies = [
3293
+ "proc-macro2",
3294
+ "quote",
3295
+ "syn 2.0.103",
3296
+ "wasm-bindgen-backend",
3297
+ "wasm-bindgen-shared",
3298
+ ]
3299
+
3300
+ [[package]]
3301
+ name = "wasm-bindgen-shared"
3302
+ version = "0.2.100"
3303
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3304
+ checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
3305
+ dependencies = [
3306
+ "unicode-ident",
3307
+ ]
3308
+
3309
+ [[package]]
3310
+ name = "wasm-streams"
3311
+ version = "0.4.2"
3312
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3313
+ checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
3314
+ dependencies = [
3315
+ "futures-util",
3316
+ "js-sys",
3317
+ "wasm-bindgen",
3318
+ "wasm-bindgen-futures",
3319
+ "web-sys",
3320
+ ]
3321
+
3322
+ [[package]]
3323
+ name = "web-sys"
3324
+ version = "0.3.77"
3325
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3326
+ checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2"
3327
+ dependencies = [
3328
+ "js-sys",
3329
+ "wasm-bindgen",
3330
+ ]
3331
+
3332
+ [[package]]
3333
+ name = "web-time"
3334
+ version = "1.1.0"
3335
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3336
+ checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
3337
+ dependencies = [
3338
+ "js-sys",
3339
+ "wasm-bindgen",
3340
+ ]
3341
+
3342
+ [[package]]
3343
+ name = "webpki-roots"
3344
+ version = "0.26.11"
3345
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3346
+ checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
3347
+ dependencies = [
3348
+ "webpki-roots 1.0.0",
3349
+ ]
3350
+
3351
+ [[package]]
3352
+ name = "webpki-roots"
3353
+ version = "1.0.0"
3354
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3355
+ checksum = "2853738d1cc4f2da3a225c18ec6c3721abb31961096e9dbf5ab35fa88b19cfdb"
3356
+ dependencies = [
3357
+ "rustls-pki-types",
3358
+ ]
3359
+
3360
+ [[package]]
3361
+ name = "winapi"
3362
+ version = "0.3.9"
3363
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3364
+ checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
3365
+ dependencies = [
3366
+ "winapi-i686-pc-windows-gnu",
3367
+ "winapi-x86_64-pc-windows-gnu",
3368
+ ]
3369
+
3370
+ [[package]]
3371
+ name = "winapi-i686-pc-windows-gnu"
3372
+ version = "0.4.0"
3373
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3374
+ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
3375
+
3376
+ [[package]]
3377
+ name = "winapi-util"
3378
+ version = "0.1.9"
3379
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3380
+ checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
3381
+ dependencies = [
3382
+ "windows-sys 0.59.0",
3383
+ ]
3384
+
3385
+ [[package]]
3386
+ name = "winapi-x86_64-pc-windows-gnu"
3387
+ version = "0.4.0"
3388
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3389
+ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
3390
+
3391
+ [[package]]
3392
+ name = "windows-link"
3393
+ version = "0.1.3"
3394
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3395
+ checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
3396
+
3397
+ [[package]]
3398
+ name = "windows-registry"
3399
+ version = "0.5.3"
3400
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3401
+ checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e"
3402
+ dependencies = [
3403
+ "windows-link",
3404
+ "windows-result",
3405
+ "windows-strings",
3406
+ ]
3407
+
3408
+ [[package]]
3409
+ name = "windows-result"
3410
+ version = "0.3.4"
3411
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3412
+ checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
3413
+ dependencies = [
3414
+ "windows-link",
3415
+ ]
3416
+
3417
+ [[package]]
3418
+ name = "windows-strings"
3419
+ version = "0.4.2"
3420
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3421
+ checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
3422
+ dependencies = [
3423
+ "windows-link",
3424
+ ]
3425
+
3426
+ [[package]]
3427
+ name = "windows-sys"
3428
+ version = "0.52.0"
3429
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3430
+ checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
3431
+ dependencies = [
3432
+ "windows-targets 0.52.6",
3433
+ ]
3434
+
3435
+ [[package]]
3436
+ name = "windows-sys"
3437
+ version = "0.59.0"
3438
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3439
+ checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
3440
+ dependencies = [
3441
+ "windows-targets 0.52.6",
3442
+ ]
3443
+
3444
+ [[package]]
3445
+ name = "windows-sys"
3446
+ version = "0.60.2"
3447
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3448
+ checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
3449
+ dependencies = [
3450
+ "windows-targets 0.53.2",
3451
+ ]
3452
+
3453
+ [[package]]
3454
+ name = "windows-targets"
3455
+ version = "0.52.6"
3456
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3457
+ checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
3458
+ dependencies = [
3459
+ "windows_aarch64_gnullvm 0.52.6",
3460
+ "windows_aarch64_msvc 0.52.6",
3461
+ "windows_i686_gnu 0.52.6",
3462
+ "windows_i686_gnullvm 0.52.6",
3463
+ "windows_i686_msvc 0.52.6",
3464
+ "windows_x86_64_gnu 0.52.6",
3465
+ "windows_x86_64_gnullvm 0.52.6",
3466
+ "windows_x86_64_msvc 0.52.6",
3467
+ ]
3468
+
3469
+ [[package]]
3470
+ name = "windows-targets"
3471
+ version = "0.53.2"
3472
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3473
+ checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef"
3474
+ dependencies = [
3475
+ "windows_aarch64_gnullvm 0.53.0",
3476
+ "windows_aarch64_msvc 0.53.0",
3477
+ "windows_i686_gnu 0.53.0",
3478
+ "windows_i686_gnullvm 0.53.0",
3479
+ "windows_i686_msvc 0.53.0",
3480
+ "windows_x86_64_gnu 0.53.0",
3481
+ "windows_x86_64_gnullvm 0.53.0",
3482
+ "windows_x86_64_msvc 0.53.0",
3483
+ ]
3484
+
3485
+ [[package]]
3486
+ name = "windows_aarch64_gnullvm"
3487
+ version = "0.52.6"
3488
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3489
+ checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
3490
+
3491
+ [[package]]
3492
+ name = "windows_aarch64_gnullvm"
3493
+ version = "0.53.0"
3494
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3495
+ checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
3496
+
3497
+ [[package]]
3498
+ name = "windows_aarch64_msvc"
3499
+ version = "0.52.6"
3500
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3501
+ checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
3502
+
3503
+ [[package]]
3504
+ name = "windows_aarch64_msvc"
3505
+ version = "0.53.0"
3506
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3507
+ checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
3508
+
3509
+ [[package]]
3510
+ name = "windows_i686_gnu"
3511
+ version = "0.52.6"
3512
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3513
+ checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
3514
+
3515
+ [[package]]
3516
+ name = "windows_i686_gnu"
3517
+ version = "0.53.0"
3518
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3519
+ checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
3520
+
3521
+ [[package]]
3522
+ name = "windows_i686_gnullvm"
3523
+ version = "0.52.6"
3524
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3525
+ checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
3526
+
3527
+ [[package]]
3528
+ name = "windows_i686_gnullvm"
3529
+ version = "0.53.0"
3530
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3531
+ checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
3532
+
3533
+ [[package]]
3534
+ name = "windows_i686_msvc"
3535
+ version = "0.52.6"
3536
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3537
+ checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
3538
+
3539
+ [[package]]
3540
+ name = "windows_i686_msvc"
3541
+ version = "0.53.0"
3542
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3543
+ checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
3544
+
3545
+ [[package]]
3546
+ name = "windows_x86_64_gnu"
3547
+ version = "0.52.6"
3548
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3549
+ checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
3550
+
3551
+ [[package]]
3552
+ name = "windows_x86_64_gnu"
3553
+ version = "0.53.0"
3554
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3555
+ checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
3556
+
3557
+ [[package]]
3558
+ name = "windows_x86_64_gnullvm"
3559
+ version = "0.52.6"
3560
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3561
+ checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
3562
+
3563
+ [[package]]
3564
+ name = "windows_x86_64_gnullvm"
3565
+ version = "0.53.0"
3566
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3567
+ checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
3568
+
3569
+ [[package]]
3570
+ name = "windows_x86_64_msvc"
3571
+ version = "0.52.6"
3572
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3573
+ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
3574
+
3575
+ [[package]]
3576
+ name = "windows_x86_64_msvc"
3577
+ version = "0.53.0"
3578
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3579
+ checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
3580
+
3581
+ [[package]]
3582
+ name = "winnow"
3583
+ version = "0.7.11"
3584
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3585
+ checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd"
3586
+ dependencies = [
3587
+ "memchr",
3588
+ ]
3589
+
3590
+ [[package]]
3591
+ name = "wit-bindgen-rt"
3592
+ version = "0.39.0"
3593
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3594
+ checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
3595
+ dependencies = [
3596
+ "bitflags 2.9.1",
3597
+ ]
3598
+
3599
+ [[package]]
3600
+ name = "writeable"
3601
+ version = "0.6.1"
3602
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3603
+ checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb"
3604
+
3605
+ [[package]]
3606
+ name = "yoke"
3607
+ version = "0.7.5"
3608
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3609
+ checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40"
3610
+ dependencies = [
3611
+ "serde",
3612
+ "stable_deref_trait",
3613
+ "yoke-derive 0.7.5",
3614
+ "zerofrom",
3615
+ ]
3616
+
3617
+ [[package]]
3618
+ name = "yoke"
3619
+ version = "0.8.0"
3620
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3621
+ checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc"
3622
+ dependencies = [
3623
+ "serde",
3624
+ "stable_deref_trait",
3625
+ "yoke-derive 0.8.0",
3626
+ "zerofrom",
3627
+ ]
3628
+
3629
+ [[package]]
3630
+ name = "yoke-derive"
3631
+ version = "0.7.5"
3632
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3633
+ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154"
3634
+ dependencies = [
3635
+ "proc-macro2",
3636
+ "quote",
3637
+ "syn 2.0.103",
3638
+ "synstructure",
3639
+ ]
3640
+
3641
+ [[package]]
3642
+ name = "yoke-derive"
3643
+ version = "0.8.0"
3644
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3645
+ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
3646
+ dependencies = [
3647
+ "proc-macro2",
3648
+ "quote",
3649
+ "syn 2.0.103",
3650
+ "synstructure",
3651
+ ]
3652
+
3653
+ [[package]]
3654
+ name = "zerocopy"
3655
+ version = "0.8.26"
3656
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3657
+ checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f"
3658
+ dependencies = [
3659
+ "zerocopy-derive",
3660
+ ]
3661
+
3662
+ [[package]]
3663
+ name = "zerocopy-derive"
3664
+ version = "0.8.26"
3665
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3666
+ checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
3667
+ dependencies = [
3668
+ "proc-macro2",
3669
+ "quote",
3670
+ "syn 2.0.103",
3671
+ ]
3672
+
3673
+ [[package]]
3674
+ name = "zerofrom"
3675
+ version = "0.1.6"
3676
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3677
+ checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
3678
+ dependencies = [
3679
+ "zerofrom-derive",
3680
+ ]
3681
+
3682
+ [[package]]
3683
+ name = "zerofrom-derive"
3684
+ version = "0.1.6"
3685
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3686
+ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
3687
+ dependencies = [
3688
+ "proc-macro2",
3689
+ "quote",
3690
+ "syn 2.0.103",
3691
+ "synstructure",
3692
+ ]
3693
+
3694
+ [[package]]
3695
+ name = "zeroize"
3696
+ version = "1.8.1"
3697
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3698
+ checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
3699
+
3700
+ [[package]]
3701
+ name = "zerotrie"
3702
+ version = "0.2.2"
3703
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3704
+ checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595"
3705
+ dependencies = [
3706
+ "displaydoc",
3707
+ "yoke 0.8.0",
3708
+ "zerofrom",
3709
+ ]
3710
+
3711
+ [[package]]
3712
+ name = "zerovec"
3713
+ version = "0.11.2"
3714
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3715
+ checksum = "4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428"
3716
+ dependencies = [
3717
+ "yoke 0.8.0",
3718
+ "zerofrom",
3719
+ "zerovec-derive",
3720
+ ]
3721
+
3722
+ [[package]]
3723
+ name = "zerovec-derive"
3724
+ version = "0.11.1"
3725
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3726
+ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
3727
+ dependencies = [
3728
+ "proc-macro2",
3729
+ "quote",
3730
+ "syn 2.0.103",
3731
+ ]
3732
+
3733
+ [[package]]
3734
+ name = "zip"
3735
+ version = "1.1.4"
3736
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3737
+ checksum = "9cc23c04387f4da0374be4533ad1208cbb091d5c11d070dfef13676ad6497164"
3738
+ dependencies = [
3739
+ "arbitrary",
3740
+ "crc32fast",
3741
+ "crossbeam-utils",
3742
+ "displaydoc",
3743
+ "indexmap",
3744
+ "num_enum",
3745
+ "thiserror 1.0.69",
3746
+ ]
stt-rs/Cargo.toml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [package]
2
+ name = "kyutai-stt-rs"
3
+ version = "0.1.0"
4
+ edition = "2024"
5
+
6
+ [dependencies]
7
+ anyhow = "1.0"
8
+ candle = { version = "0.9.1", package = "candle-core" }
9
+ candle-nn = "0.9.1"
10
+ candle-transformers = "0.9.1"
11
+ clap = { version = "4.4.12", features = ["derive"] }
12
+ hf-hub = "0.4.3"
13
+ kaudio = "0.2.1"
14
+ moshi = "0.6.1"
15
+ sentencepiece = "0.11.3"
16
+ serde = { version = "1.0.210", features = ["derive"] }
17
+ serde_json = "1.0.115"
18
+
19
+ [features]
20
+ default = []
21
+ cuda = ["candle/cuda", "candle-nn/cuda"]
22
+ cudnn = ["candle/cudnn", "candle-nn/cudnn"]
23
+ metal = ["candle/metal", "candle-nn/metal"]
24
+
25
+ [profile.release]
26
+ debug = true
27
+
28
+ [profile.release-no-debug]
29
+ inherits = "release"
30
+ debug = false
31
+
stt-rs/src/main.rs ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright (c) Kyutai, all rights reserved.
2
+ // This source code is licensed under the license found in the
3
+ // LICENSE file in the root directory of this source tree.
4
+
5
+ use anyhow::Result;
6
+ use candle::{Device, Tensor};
7
+ use clap::Parser;
8
+
9
+ #[derive(Debug, Parser)]
10
+ struct Args {
11
+ /// The audio input file, in wav/mp3/ogg/... format.
12
+ in_file: String,
13
+
14
+ /// The repo where to get the model from.
15
+ #[arg(long, default_value = "kyutai/stt-1b-en_fr-candle")]
16
+ hf_repo: String,
17
+
18
+ /// Path to the model file in the repo.
19
+ #[arg(long, default_value = "model.safetensors")]
20
+ model_path: String,
21
+
22
+ /// Run the model on cpu.
23
+ #[arg(long)]
24
+ cpu: bool,
25
+
26
+ /// Display word level timestamps.
27
+ #[arg(long)]
28
+ timestamps: bool,
29
+
30
+ /// Display the level of voice activity detection (VAD).
31
+ #[arg(long)]
32
+ vad: bool,
33
+ }
34
+
35
+ fn device(cpu: bool) -> Result<Device> {
36
+ if cpu {
37
+ Ok(Device::Cpu)
38
+ } else if candle::utils::cuda_is_available() {
39
+ Ok(Device::new_cuda(0)?)
40
+ } else if candle::utils::metal_is_available() {
41
+ Ok(Device::new_metal(0)?)
42
+ } else {
43
+ Ok(Device::Cpu)
44
+ }
45
+ }
46
+
47
+ #[derive(Debug, serde::Deserialize)]
48
+ struct SttConfig {
49
+ audio_silence_prefix_seconds: f64,
50
+ audio_delay_seconds: f64,
51
+ }
52
+
53
+ #[derive(Debug, serde::Deserialize)]
54
+ struct Config {
55
+ mimi_name: String,
56
+ tokenizer_name: String,
57
+ card: usize,
58
+ text_card: usize,
59
+ dim: usize,
60
+ n_q: usize,
61
+ context: usize,
62
+ max_period: f64,
63
+ num_heads: usize,
64
+ num_layers: usize,
65
+ causal: bool,
66
+ stt_config: SttConfig,
67
+ }
68
+
69
+ impl Config {
70
+ fn model_config(&self, vad: bool) -> moshi::lm::Config {
71
+ let lm_cfg = moshi::transformer::Config {
72
+ d_model: self.dim,
73
+ num_heads: self.num_heads,
74
+ num_layers: self.num_layers,
75
+ dim_feedforward: self.dim * 4,
76
+ causal: self.causal,
77
+ norm_first: true,
78
+ bias_ff: false,
79
+ bias_attn: false,
80
+ layer_scale: None,
81
+ context: self.context,
82
+ max_period: self.max_period as usize,
83
+ use_conv_block: false,
84
+ use_conv_bias: true,
85
+ cross_attention: None,
86
+ gating: Some(candle_nn::Activation::Silu),
87
+ norm: moshi::NormType::RmsNorm,
88
+ positional_embedding: moshi::transformer::PositionalEmbedding::Rope,
89
+ conv_layout: false,
90
+ conv_kernel_size: 3,
91
+ kv_repeat: 1,
92
+ max_seq_len: 4096 * 4,
93
+ shared_cross_attn: false,
94
+ };
95
+ let extra_heads = if vad {
96
+ Some(moshi::lm::ExtraHeadsConfig {
97
+ num_heads: 4,
98
+ dim: 6,
99
+ })
100
+ } else {
101
+ None
102
+ };
103
+ moshi::lm::Config {
104
+ transformer: lm_cfg,
105
+ depformer: None,
106
+ audio_vocab_size: self.card + 1,
107
+ text_in_vocab_size: self.text_card + 1,
108
+ text_out_vocab_size: self.text_card,
109
+ audio_codebooks: self.n_q,
110
+ conditioners: Default::default(),
111
+ extra_heads,
112
+ }
113
+ }
114
+ }
115
+
116
+ struct Model {
117
+ state: moshi::asr::State,
118
+ text_tokenizer: sentencepiece::SentencePieceProcessor,
119
+ timestamps: bool,
120
+ vad: bool,
121
+ config: Config,
122
+ dev: Device,
123
+ }
124
+
125
+ impl Model {
126
+ fn load_from_hf(args: &Args, dev: &Device) -> Result<Self> {
127
+ // Retrieve the model files from the Hugging Face Hub
128
+ let api = hf_hub::api::sync::Api::new()?;
129
+ let repo = api.model(args.hf_repo.to_string());
130
+ let config_file = repo.get("config.json")?;
131
+ let config: Config = serde_json::from_str(&std::fs::read_to_string(&config_file)?)?;
132
+ let tokenizer_file = repo.get(&config.tokenizer_name)?;
133
+ let model_file = repo.get(&args.model_path)?;
134
+ let mimi_file = repo.get(&config.mimi_name)?;
135
+ let is_quantized = model_file.to_str().unwrap().ends_with(".gguf");
136
+
137
+ let text_tokenizer = sentencepiece::SentencePieceProcessor::open(&tokenizer_file)?;
138
+
139
+ let lm = if is_quantized {
140
+ let vb_lm = candle_transformers::quantized_var_builder::VarBuilder::from_gguf(
141
+ &model_file,
142
+ dev,
143
+ )?;
144
+ moshi::lm::LmModel::new(
145
+ &config.model_config(args.vad),
146
+ moshi::nn::MaybeQuantizedVarBuilder::Quantized(vb_lm),
147
+ )?
148
+ } else {
149
+ let dtype = dev.bf16_default_to_f32();
150
+ let vb_lm = unsafe {
151
+ candle_nn::VarBuilder::from_mmaped_safetensors(&[&model_file], dtype, dev)?
152
+ };
153
+ moshi::lm::LmModel::new(
154
+ &config.model_config(args.vad),
155
+ moshi::nn::MaybeQuantizedVarBuilder::Real(vb_lm),
156
+ )?
157
+ };
158
+
159
+ let audio_tokenizer = moshi::mimi::load(mimi_file.to_str().unwrap(), Some(32), dev)?;
160
+ let asr_delay_in_tokens = (config.stt_config.audio_delay_seconds * 12.5) as usize;
161
+ let state = moshi::asr::State::new(1, asr_delay_in_tokens, 0., audio_tokenizer, lm)?;
162
+ Ok(Model {
163
+ state,
164
+ config,
165
+ text_tokenizer,
166
+ timestamps: args.timestamps,
167
+ vad: args.vad,
168
+ dev: dev.clone(),
169
+ })
170
+ }
171
+
172
+ fn run(&mut self, mut pcm: Vec<f32>) -> Result<()> {
173
+ use std::io::Write;
174
+
175
+ // Add the silence prefix to the audio.
176
+ if self.config.stt_config.audio_silence_prefix_seconds > 0.0 {
177
+ let silence_len =
178
+ (self.config.stt_config.audio_silence_prefix_seconds * 24000.0) as usize;
179
+ pcm.splice(0..0, vec![0.0; silence_len]);
180
+ }
181
+ // Add some silence at the end to ensure all the audio is processed.
182
+ let suffix = (self.config.stt_config.audio_delay_seconds * 24000.0) as usize;
183
+ pcm.resize(pcm.len() + suffix + 24000, 0.0);
184
+
185
+ let mut last_word = None;
186
+ let mut printed_eot = false;
187
+ for pcm in pcm.chunks(1920) {
188
+ let pcm = Tensor::new(pcm, &self.dev)?.reshape((1, 1, ()))?;
189
+ let asr_msgs = self.state.step_pcm(pcm, None, &().into(), |_, _, _| ())?;
190
+ for asr_msg in asr_msgs.iter() {
191
+ match asr_msg {
192
+ moshi::asr::AsrMsg::Step { prs, .. } => {
193
+ // prs is the probability of having no voice activity for different time
194
+ // horizons.
195
+ // In kyutai/stt-1b-en_fr-candle, these horizons are 0.5s, 1s, 2s, and 3s.
196
+ if self.vad && prs[2][0] > 0.5 && !printed_eot {
197
+ printed_eot = true;
198
+ if !self.timestamps {
199
+ print!(" <endofturn pr={}>", prs[2][0]);
200
+ } else {
201
+ println!("<endofturn pr={}>", prs[2][0]);
202
+ }
203
+ }
204
+ }
205
+ moshi::asr::AsrMsg::EndWord { stop_time, .. } => {
206
+ printed_eot = false;
207
+ #[allow(clippy::collapsible_if)]
208
+ if self.timestamps {
209
+ if let Some((word, start_time)) = last_word.take() {
210
+ println!("[{start_time:5.2}-{stop_time:5.2}] {word}");
211
+ }
212
+ }
213
+ }
214
+ moshi::asr::AsrMsg::Word {
215
+ tokens, start_time, ..
216
+ } => {
217
+ printed_eot = false;
218
+ let word = self
219
+ .text_tokenizer
220
+ .decode_piece_ids(tokens)
221
+ .unwrap_or_else(|_| String::new());
222
+ if !self.timestamps {
223
+ print!(" {word}");
224
+ std::io::stdout().flush()?
225
+ } else {
226
+ if let Some((word, prev_start_time)) = last_word.take() {
227
+ println!("[{prev_start_time:5.2}-{start_time:5.2}] {word}");
228
+ }
229
+ last_word = Some((word, *start_time));
230
+ }
231
+ }
232
+ }
233
+ }
234
+ }
235
+ if let Some((word, start_time)) = last_word.take() {
236
+ println!("[{start_time:5.2}- ] {word}");
237
+ }
238
+ println!();
239
+ Ok(())
240
+ }
241
+ }
242
+
243
+ fn main() -> Result<()> {
244
+ let args = Args::parse();
245
+ let device = device(args.cpu)?;
246
+ println!("Using device: {:?}", device);
247
+
248
+ println!("Loading audio file from: {}", args.in_file);
249
+ let (pcm, sample_rate) = kaudio::pcm_decode(&args.in_file)?;
250
+ let pcm = if sample_rate != 24_000 {
251
+ kaudio::resample(&pcm, sample_rate as usize, 24_000)?
252
+ } else {
253
+ pcm
254
+ };
255
+ println!("Loading model from repository: {}", args.hf_repo);
256
+ let mut model = Model::load_from_hf(&args, &device)?;
257
+ println!("Running inference");
258
+ model.run(pcm)?;
259
+ Ok(())
260
+ }
stt_pytorch.ipynb ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {
7
+ "colab": {
8
+ "base_uri": "https://localhost:8080/"
9
+ },
10
+ "id": "gJEMjPgeI-rw",
11
+ "outputId": "7491c067-b1be-4505-b3f5-19ba4c00a593"
12
+ },
13
+ "outputs": [],
14
+ "source": [
15
+ "!pip install moshi"
16
+ ]
17
+ },
18
+ {
19
+ "cell_type": "code",
20
+ "execution_count": null,
21
+ "metadata": {
22
+ "colab": {
23
+ "base_uri": "https://localhost:8080/"
24
+ },
25
+ "id": "CA4K5iDFJcqJ",
26
+ "outputId": "b609843a-a193-4729-b099-5f8780532333"
27
+ },
28
+ "outputs": [],
29
+ "source": [
30
+ "!wget https://github.com/kyutai-labs/moshi/raw/refs/heads/main/data/sample_fr_hibiki_crepes.mp3"
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "code",
35
+ "execution_count": null,
36
+ "metadata": {
37
+ "id": "VA3Haix3IZ8Q"
38
+ },
39
+ "outputs": [],
40
+ "source": [
41
+ "from dataclasses import dataclass\n",
42
+ "import time\n",
43
+ "import sentencepiece\n",
44
+ "import sphn\n",
45
+ "import textwrap\n",
46
+ "import torch\n",
47
+ "\n",
48
+ "from moshi.models import loaders, MimiModel, LMModel, LMGen"
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "code",
53
+ "execution_count": null,
54
+ "metadata": {
55
+ "id": "9AK5zBMTI9bw"
56
+ },
57
+ "outputs": [],
58
+ "source": [
59
+ "@dataclass\n",
60
+ "class InferenceState:\n",
61
+ " mimi: MimiModel\n",
62
+ " text_tokenizer: sentencepiece.SentencePieceProcessor\n",
63
+ " lm_gen: LMGen\n",
64
+ "\n",
65
+ " def __init__(\n",
66
+ " self,\n",
67
+ " mimi: MimiModel,\n",
68
+ " text_tokenizer: sentencepiece.SentencePieceProcessor,\n",
69
+ " lm: LMModel,\n",
70
+ " batch_size: int,\n",
71
+ " device: str | torch.device,\n",
72
+ " ):\n",
73
+ " self.mimi = mimi\n",
74
+ " self.text_tokenizer = text_tokenizer\n",
75
+ " self.lm_gen = LMGen(lm, temp=0, temp_text=0, use_sampling=False)\n",
76
+ " self.device = device\n",
77
+ " self.frame_size = int(self.mimi.sample_rate / self.mimi.frame_rate)\n",
78
+ " self.batch_size = batch_size\n",
79
+ " self.mimi.streaming_forever(batch_size)\n",
80
+ " self.lm_gen.streaming_forever(batch_size)\n",
81
+ "\n",
82
+ " def run(self, in_pcms: torch.Tensor):\n",
83
+ " ntokens = 0\n",
84
+ " first_frame = True\n",
85
+ " chunks = [\n",
86
+ " c\n",
87
+ " for c in in_pcms.split(self.frame_size, dim=2)\n",
88
+ " if c.shape[-1] == self.frame_size\n",
89
+ " ]\n",
90
+ " start_time = time.time()\n",
91
+ " all_text = []\n",
92
+ " for chunk in chunks:\n",
93
+ " codes = self.mimi.encode(chunk)\n",
94
+ " if first_frame:\n",
95
+ " # Ensure that the first slice of codes is properly seen by the transformer\n",
96
+ " # as otherwise the first slice is replaced by the initial tokens.\n",
97
+ " tokens = self.lm_gen.step(codes)\n",
98
+ " first_frame = False\n",
99
+ " tokens = self.lm_gen.step(codes)\n",
100
+ " if tokens is None:\n",
101
+ " continue\n",
102
+ " assert tokens.shape[1] == 1\n",
103
+ " one_text = tokens[0, 0].cpu()\n",
104
+ " if one_text.item() not in [0, 3]:\n",
105
+ " text = self.text_tokenizer.id_to_piece(one_text.item())\n",
106
+ " text = text.replace(\"▁\", \" \")\n",
107
+ " all_text.append(text)\n",
108
+ " ntokens += 1\n",
109
+ " dt = time.time() - start_time\n",
110
+ " print(\n",
111
+ " f\"processed {ntokens} steps in {dt:.0f}s, {1000 * dt / ntokens:.2f}ms/step\"\n",
112
+ " )\n",
113
+ " return \"\".join(all_text)"
114
+ ]
115
+ },
116
+ {
117
+ "cell_type": "code",
118
+ "execution_count": null,
119
+ "metadata": {
120
+ "colab": {
121
+ "base_uri": "https://localhost:8080/",
122
+ "height": 353,
123
+ "referenced_widgets": [
124
+ "0a5f6f887e2b4cd1990a0e9ec0153ed9",
125
+ "f7893826fcba4bdc87539589d669249b",
126
+ "8805afb12c484781be85082ff02dad13",
127
+ "97679c0d9ab44bed9a3456f2fcb541fd",
128
+ "d73c0321bed54a52b5e1da0a7788e32a",
129
+ "d67be13a920d4fc89e5570b5b29fc1d2",
130
+ "6b377c2d7bf945fb89e46c39d246a332",
131
+ "b82ff365c78e41ad8094b46daf79449d",
132
+ "477aa7fa82dc42d5bce6f1743c45d626",
133
+ "cbd288510c474430beb66f346f382c45",
134
+ "aafc347cdf28428ea6a7abe5b46b726f",
135
+ "fca09acd5d0d45468c8b04bfb2de7646",
136
+ "79e35214b51b4a9e9b3f7144b0b34f7b",
137
+ "89e9a37f69904bd48b954d627bff6687",
138
+ "57028789c78248a7b0ad4f031c9545c9",
139
+ "1150fcb427994c2984d4d0f4e4745fe5",
140
+ "e24b1fc52f294f849019c9b3befb613f",
141
+ "8724878682cf4c3ca992667c45009398",
142
+ "36a22c977d5242008871310133b7d2af",
143
+ "5b3683cad5cb4877b43fadd003edf97f",
144
+ "703f98272e4d469d8f27f5a465715dd8",
145
+ "9dbe02ef5fac41cfaee3d02946e65c88",
146
+ "37faa87ad03a4271992c21ce6a629e18",
147
+ "570c547e48cd421b814b2c5e028e4c0b",
148
+ "b173768580fc4c0a8e3abf272e4c363a",
149
+ "e57d1620f0a9427b85d8b4885ef4e8e3",
150
+ "5dd4474df70743498b616608182714dd",
151
+ "cc907676a65f4ad1bf68a77b4a00e89b",
152
+ "a34abc3b118e4305951a466919c28ff6",
153
+ "a77ccfcdb90146c7a63b4b2d232bc494",
154
+ "f7313e6e3a27475993cab3961d6ae363",
155
+ "39b47fad9c554839868fe9e4bbf7def2",
156
+ "14e9511ea0bd44c49f0cf3abf1a6d40e",
157
+ "a4ea8e0c4cac4d5e88b7e3f527e4fe90",
158
+ "571afc0f4b2840c9830d6b5a307ed1f9",
159
+ "6ec593cab5b64f0ea638bb175b9daa5c",
160
+ "77a52aed00ae408bb24524880e19ec8a",
161
+ "0b2de4b29b4b44fe9d96361a40c793d0",
162
+ "3c5b5fb1a5ac468a89c1058bd90cfb58",
163
+ "e53e0a2a240e43cfa562c89b3d703dea",
164
+ "35966343cf9249ef8bc028a0d5c5f97d",
165
+ "e36a37e0d41c47ccb8bc6d56c19fb17c",
166
+ "279ccf7de43847a1a6579c9182a46cc8",
167
+ "41b5d6ab0b7d43c790a55f125c0e7494"
168
+ ]
169
+ },
170
+ "id": "UsQJdAgkLp9n",
171
+ "outputId": "9b7131c3-69c5-4323-8312-2ce7621d8869"
172
+ },
173
+ "outputs": [],
174
+ "source": [
175
+ "device = \"cuda\"\n",
176
+ "# Use the en+fr low latency model, an alternative is kyutai/stt-2.6b-en\n",
177
+ "checkpoint_info = loaders.CheckpointInfo.from_hf_repo(\"kyutai/stt-1b-en_fr\")\n",
178
+ "mimi = checkpoint_info.get_mimi(device=device)\n",
179
+ "text_tokenizer = checkpoint_info.get_text_tokenizer()\n",
180
+ "lm = checkpoint_info.get_moshi(device=device)\n",
181
+ "in_pcms, _ = sphn.read(\"sample_fr_hibiki_crepes.mp3\", sample_rate=mimi.sample_rate)\n",
182
+ "in_pcms = torch.from_numpy(in_pcms).to(device=device)\n",
183
+ "\n",
184
+ "stt_config = checkpoint_info.stt_config\n",
185
+ "pad_left = int(stt_config.get(\"audio_silence_prefix_seconds\", 0.0) * 24000)\n",
186
+ "pad_right = int((stt_config.get(\"audio_delay_seconds\", 0.0) + 1.0) * 24000)\n",
187
+ "in_pcms = torch.nn.functional.pad(in_pcms, (pad_left, pad_right), mode=\"constant\")\n",
188
+ "in_pcms = in_pcms[None, 0:1].expand(1, -1, -1)\n",
189
+ "\n",
190
+ "state = InferenceState(mimi, text_tokenizer, lm, batch_size=1, device=device)\n",
191
+ "text = state.run(in_pcms)\n",
192
+ "print(textwrap.fill(text, width=100))"
193
+ ]
194
+ },
195
+ {
196
+ "cell_type": "code",
197
+ "execution_count": null,
198
+ "metadata": {
199
+ "colab": {
200
+ "base_uri": "https://localhost:8080/",
201
+ "height": 75
202
+ },
203
+ "id": "CIAXs9oaPrtj",
204
+ "outputId": "94cc208c-2454-4dd4-a64e-d79025144af5"
205
+ },
206
+ "outputs": [],
207
+ "source": [
208
+ "from IPython.display import Audio\n",
209
+ "\n",
210
+ "Audio(\"sample_fr_hibiki_crepes.mp3\")"
211
+ ]
212
+ },
213
+ {
214
+ "cell_type": "code",
215
+ "execution_count": null,
216
+ "metadata": {
217
+ "id": "qkUZ6CBKOdTa"
218
+ },
219
+ "outputs": [],
220
+ "source": []
221
+ }
222
+ ],
223
+ "metadata": {
224
+ "accelerator": "GPU",
225
+ "colab": {
226
+ "gpuType": "L4",
227
+ "provenance": []
228
+ },
229
+ "kernelspec": {
230
+ "display_name": "Python 3 (ipykernel)",
231
+ "language": "python",
232
+ "name": "python3"
233
+ }
234
+ },
235
+ "nbformat": 4,
236
+ "nbformat_minor": 0
237
+ }
tts_pytorch.ipynb ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "0",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "# Fast install, might break in the future.\n",
11
+ "!pip install 'safetensors<0.6'\n",
12
+ "!pip install 'sphn<0.2'\n",
13
+ "!pip install --no-deps \"moshi==0.2.11\"\n",
14
+ "# Slow install (will download torch and cuda), but future proof.\n",
15
+ "# !pip install \"moshi==0.2.11\""
16
+ ]
17
+ },
18
+ {
19
+ "cell_type": "code",
20
+ "execution_count": null,
21
+ "id": "1",
22
+ "metadata": {},
23
+ "outputs": [],
24
+ "source": [
25
+ "import numpy as np\n",
26
+ "import torch\n",
27
+ "from moshi.models.loaders import CheckpointInfo\n",
28
+ "from moshi.models.tts import DEFAULT_DSM_TTS_REPO, DEFAULT_DSM_TTS_VOICE_REPO, TTSModel\n",
29
+ "\n",
30
+ "from IPython.display import display, Audio"
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "code",
35
+ "execution_count": null,
36
+ "id": "2",
37
+ "metadata": {},
38
+ "outputs": [],
39
+ "source": [
40
+ "# Configuration\n",
41
+ "text = \"Hey there! How are you? I had the craziest day today.\"\n",
42
+ "voice = \"expresso/ex03-ex01_happy_001_channel1_334s.wav\"\n",
43
+ "print(f\"See https://huggingface.co/{DEFAULT_DSM_TTS_VOICE_REPO} for available voices.\")"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": null,
49
+ "id": "3",
50
+ "metadata": {},
51
+ "outputs": [],
52
+ "source": [
53
+ "# Set everything up\n",
54
+ "checkpoint_info = CheckpointInfo.from_hf_repo(DEFAULT_DSM_TTS_REPO)\n",
55
+ "tts_model = TTSModel.from_checkpoint_info(\n",
56
+ " checkpoint_info, n_q=32, temp=0.6, device=torch.device(\"cuda\")\n",
57
+ ")\n",
58
+ "\n",
59
+ "# If you want to make a dialog, you can pass more than one turn [text_speaker_1, text_speaker_2, text_2_speaker_1, ...]\n",
60
+ "entries = tts_model.prepare_script([text], padding_between=1)\n",
61
+ "voice_path = tts_model.get_voice_path(voice)\n",
62
+ "# CFG coef goes here because the model was trained with CFG distillation,\n",
63
+ "# so it's not _actually_ doing CFG at inference time.\n",
64
+ "# Also, if you are generating a dialog, you should have two voices in the list.\n",
65
+ "condition_attributes = tts_model.make_condition_attributes([voice_path], cfg_coef=2.0)"
66
+ ]
67
+ },
68
+ {
69
+ "cell_type": "code",
70
+ "execution_count": null,
71
+ "id": "4",
72
+ "metadata": {},
73
+ "outputs": [],
74
+ "source": [
75
+ "print(\"Generating audio...\")\n",
76
+ "\n",
77
+ "pcms = []\n",
78
+ "\n",
79
+ "\n",
80
+ "def _on_frame(frame):\n",
81
+ " print(\"Step\", len(pcms), end=\"\\r\")\n",
82
+ " if (frame != -1).all():\n",
83
+ " pcm = tts_model.mimi.decode(frame[:, 1:, :]).cpu().numpy()\n",
84
+ " pcms.append(np.clip(pcm[0, 0], -1, 1))\n",
85
+ "\n",
86
+ "\n",
87
+ "# You could also generate multiple audios at once by extending the following lists.\n",
88
+ "all_entries = [entries]\n",
89
+ "all_condition_attributes = [condition_attributes]\n",
90
+ "with tts_model.mimi.streaming(len(all_entries)):\n",
91
+ " result = tts_model.generate(\n",
92
+ " all_entries, all_condition_attributes, on_frame=_on_frame\n",
93
+ " )\n",
94
+ "\n",
95
+ "print(\"Done generating.\")\n",
96
+ "audio = np.concatenate(pcms, axis=-1)"
97
+ ]
98
+ },
99
+ {
100
+ "cell_type": "code",
101
+ "execution_count": null,
102
+ "id": "5",
103
+ "metadata": {},
104
+ "outputs": [],
105
+ "source": [
106
+ "display(Audio(audio, rate=tts_model.mimi.sample_rate, autoplay=True))"
107
+ ]
108
+ },
109
+ {
110
+ "cell_type": "code",
111
+ "execution_count": null,
112
+ "id": "6",
113
+ "metadata": {},
114
+ "outputs": [],
115
+ "source": []
116
+ }
117
+ ],
118
+ "metadata": {
119
+ "kernelspec": {
120
+ "display_name": "Python 3 (ipykernel)",
121
+ "language": "python",
122
+ "name": "python3"
123
+ },
124
+ "language_info": {
125
+ "codemirror_mode": {
126
+ "name": "ipython",
127
+ "version": 3
128
+ },
129
+ "file_extension": ".py",
130
+ "mimetype": "text/x-python",
131
+ "name": "python",
132
+ "nbconvert_exporter": "python",
133
+ "pygments_lexer": "ipython3",
134
+ "version": "3.13.2"
135
+ }
136
+ },
137
+ "nbformat": 4,
138
+ "nbformat_minor": 5
139
+ }