aoiandroid ThreadAbort commited on
Commit
33774ef
·
0 Parent(s):

Duplicate from ThreadAbort/IndexTTS-Rust

Browse files

Co-authored-by: Christopher Chenoweth <ThreadAbort@users.noreply.huggingface.co>

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +58 -0
  2. .gitignore +38 -0
  3. CLAUDE.md +140 -0
  4. CODEBASE_ANALYSIS.md +594 -0
  5. Cargo.lock +3683 -0
  6. Cargo.toml +88 -0
  7. DIRECTORY_STRUCTURE.txt +224 -0
  8. EXPLORATION_SUMMARY.md +283 -0
  9. LICENSE +201 -0
  10. MANIFEST.in +3 -0
  11. README.md +295 -0
  12. SOURCE_FILE_LISTING.txt +513 -0
  13. archive/README_INDEXTTS_1_5.md +247 -0
  14. benches/inference.rs +98 -0
  15. benches/mel_spectrogram.rs +45 -0
  16. config.yaml +51 -0
  17. context.md +383 -0
  18. crates/marine_salience/Cargo.toml +18 -0
  19. crates/marine_salience/src/config.rs +140 -0
  20. crates/marine_salience/src/ema.rs +126 -0
  21. crates/marine_salience/src/lib.rs +42 -0
  22. crates/marine_salience/src/packet.rs +122 -0
  23. crates/marine_salience/src/processor.rs +334 -0
  24. docs/Integrating Marine Algorithm into IndexTTS-Rust.md +450 -0
  25. examples/analyze_chris.rs +3 -0
  26. examples/cases.jsonl +3 -0
  27. examples/emo_hate.wav +3 -0
  28. examples/emo_sad.wav +3 -0
  29. examples/marine_test.rs +3 -0
  30. examples/voice_01.wav +3 -0
  31. examples/voice_02.wav +3 -0
  32. examples/voice_03.wav +3 -0
  33. examples/voice_04.wav +3 -0
  34. examples/voice_05.wav +3 -0
  35. examples/voice_06.wav +3 -0
  36. examples/voice_07.wav +3 -0
  37. examples/voice_08.wav +3 -0
  38. examples/voice_09.wav +3 -0
  39. examples/voice_10.wav +3 -0
  40. examples/voice_11.wav +3 -0
  41. examples/voice_12.wav +3 -0
  42. models/bigvgan.onnx +3 -0
  43. models/bigvgan.onnx.data +3 -0
  44. models/speaker_encoder.onnx +3 -0
  45. models/speaker_encoder.onnx.data +3 -0
  46. src/audio/dsp.rs +210 -0
  47. src/audio/io.rs +150 -0
  48. src/audio/mel.rs +356 -0
  49. src/audio/mod.rs +57 -0
  50. src/audio/resample.rs +75 -0
.gitattributes ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ examples/voice_05.wav filter=lfs diff=lfs merge=lfs -text
37
+ examples/voice_07.wav filter=lfs diff=lfs merge=lfs -text
38
+ examples/voice_08.wav filter=lfs diff=lfs merge=lfs -text
39
+ examples/voice_09.wav filter=lfs diff=lfs merge=lfs -text
40
+ examples/emo_sad.wav filter=lfs diff=lfs merge=lfs -text
41
+ examples/voice_02.wav filter=lfs diff=lfs merge=lfs -text
42
+ examples/voice_06.wav filter=lfs diff=lfs merge=lfs -text
43
+ examples/voice_10.wav filter=lfs diff=lfs merge=lfs -text
44
+ examples/voice_11.wav filter=lfs diff=lfs merge=lfs -text
45
+ examples/voice_12.wav filter=lfs diff=lfs merge=lfs -text
46
+ examples/emo_hate.wav filter=lfs diff=lfs merge=lfs -text
47
+ examples/voice_01.wav filter=lfs diff=lfs merge=lfs -text
48
+ examples/voice_03.wav filter=lfs diff=lfs merge=lfs -text
49
+ examples/voice_04.wav filter=lfs diff=lfs merge=lfs -text
50
+ indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 filter=lfs diff=lfs merge=lfs -text
51
+ examples/* filter=lfs diff=lfs merge=lfs -text
52
+ *.wav filter=lfs diff=lfs merge=lfs -text
53
+ *. filter=lfs diff=lfs merge=lfs -text
54
+ .onnx filter=lfs diff=lfs merge=lfs -text
55
+ .wav filter=lfs diff=lfs merge=lfs -text
56
+ .mp3 filter=lfs diff=lfs merge=lfs -text
57
+ .flac filter=lfs diff=lfs merge=lfs -text
58
+ *.onnx.data filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ venv/
2
+ __pycache__
3
+ *.egg-info
4
+ *.DS_Store
5
+ .idea/
6
+ .vscode/
7
+ checkpoints/*.pth
8
+ checkpoints/*.vocab
9
+ checkpoints/*.model
10
+ checkpoints/.cache
11
+ outputs/
12
+ build/
13
+ *.py[cod]
14
+ *.egg-info/
15
+ .venv
16
+ checkpoints/*
17
+ __MACOSX
18
+ .lock
19
+ # Python build artifacts
20
+ *.py[cod]
21
+ *.egg-info/
22
+ .venv
23
+ build/
24
+ dist/
25
+ *.egg-info/
26
+ # Rust build artifacts
27
+ /target/
28
+ **/*.rs.bk
29
+ .venv/
30
+ .claude-flow/
31
+ **/target/
32
+ indexout/
33
+ output.wav
34
+ *.wav
35
+ *.flac
36
+ .swarm/
37
+ .claude/
38
+ clone_chris.py
CLAUDE.md ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project Overview
6
+
7
+ IndexTTS-Rust is a high-performance Text-to-Speech engine, a complete Rust rewrite of the Python IndexTTS system. It uses ONNX Runtime for neural network inference and provides zero-shot voice cloning with emotion control.
8
+
9
+ ## Build and Development Commands
10
+
11
+ ```bash
12
+ # Build (always build release for performance testing)
13
+ cargo build --release
14
+
15
+ # Run linter (MANDATORY before commits - catches many issues)
16
+ cargo clippy -- -D warnings
17
+
18
+ # Run tests
19
+ cargo test
20
+
21
+ # Run specific test
22
+ cargo test test_name
23
+
24
+ # Run benchmarks (Criterion-based)
25
+ cargo bench
26
+
27
+ # Run specific benchmark
28
+ cargo bench --bench mel_spectrogram
29
+ cargo bench --bench inference
30
+
31
+ # Check compilation without building
32
+ cargo check
33
+
34
+ # Format code
35
+ cargo fmt
36
+
37
+ # Full pre-commit workflow (BUILD -> CLIPPY -> BUILD)
38
+ cargo build --release && cargo clippy -- -D warnings && cargo build --release
39
+ ```
40
+
41
+ ## CLI Usage
42
+
43
+ ```bash
44
+ # Show help
45
+ ./target/release/indextts --help
46
+
47
+ # Synthesize speech
48
+ ./target/release/indextts synthesize \
49
+ --text "Hello world" \
50
+ --voice examples/voice_01.wav \
51
+ --output output.wav
52
+
53
+ # Generate default config
54
+ ./target/release/indextts init-config -o config.yaml
55
+
56
+ # Show system info
57
+ ./target/release/indextts info
58
+
59
+ # Run built-in benchmarks
60
+ ./target/release/indextts benchmark --iterations 100
61
+ ```
62
+
63
+ ## Architecture
64
+
65
+ The codebase follows a modular pipeline architecture where each stage processes data sequentially:
66
+
67
+ ```
68
+ Text Input → Normalization → Tokenization → Model Inference → Vocoding → Audio Output
69
+ ```
70
+
71
+ ### Core Modules (src/)
72
+
73
+ - **audio/** - Audio DSP operations
74
+ - `mel.rs` - Mel-spectrogram computation (STFT, filterbanks)
75
+ - `io.rs` - WAV file I/O using hound
76
+ - `dsp.rs` - Signal processing utilities
77
+ - `resample.rs` - Audio resampling using rubato
78
+
79
+ - **text/** - Text processing pipeline
80
+ - `normalizer.rs` - Text normalization (Chinese/English/mixed)
81
+ - `tokenizer.rs` - BPE tokenization via HuggingFace tokenizers
82
+ - `phoneme.rs` - Grapheme-to-phoneme conversion
83
+
84
+ - **model/** - Neural network inference
85
+ - `session.rs` - ONNX Runtime wrapper (load-dynamic feature)
86
+ - `gpt.rs` - GPT-based sequence generation
87
+ - `embedding.rs` - Speaker and emotion encoders
88
+
89
+ - **vocoder/** - Neural vocoding
90
+ - `bigvgan.rs` - BigVGAN waveform synthesis
91
+ - `activations.rs` - Snake/SnakeBeta activation functions
92
+
93
+ - **pipeline/** - TTS orchestration
94
+ - `synthesis.rs` - Main synthesis logic, coordinates all modules
95
+
96
+ - **config/** - Configuration management (YAML-based via serde)
97
+
98
+ - **error.rs** - Error types using thiserror
99
+
100
+ - **lib.rs** - Library entry point, exposes public API
101
+
102
+ - **main.rs** - CLI entry point using clap
103
+
104
+ ### Key Constants (lib.rs)
105
+
106
+ ```rust
107
+ pub const SAMPLE_RATE: u32 = 22050; // Output audio sample rate
108
+ pub const N_MELS: usize = 80; // Mel filterbank channels
109
+ pub const N_FFT: usize = 1024; // FFT size
110
+ pub const HOP_LENGTH: usize = 256; // STFT hop length
111
+ ```
112
+
113
+ ### Dependencies Pattern
114
+
115
+ - **Audio**: hound (WAV), rustfft/realfft (DSP), rubato (resampling), dasp (signal processing)
116
+ - **ML Inference**: ort (ONNX Runtime with load-dynamic), ndarray, safetensors
117
+ - **Text**: tokenizers (HuggingFace), jieba-rs (Chinese), regex, unicode-segmentation
118
+ - **Parallelism**: rayon (data parallelism), tokio (async)
119
+ - **CLI**: clap (derive), env_logger, indicatif
120
+
121
+ ## Important Notes
122
+
123
+ 1. **ONNX Runtime**: Uses `load-dynamic` feature - requires ONNX Runtime library installed on system
124
+ 2. **Model Files**: ONNX models go in `models/` directory (not in git, download separately)
125
+ 3. **Reference Implementation**: Python code in `indextts - REMOVING - REF ONLY/` is kept for reference only
126
+ 4. **Performance**: Release builds use LTO and single codegen-unit for maximum optimization
127
+ 5. **Audio Format**: All internal processing at 22050 Hz, 80-band mel spectrograms
128
+
129
+ ## Testing Strategy
130
+
131
+ - Unit tests inline in modules
132
+ - Criterion benchmarks in `benches/` for performance regression testing
133
+ - Python regression tests in `tests/` for end-to-end validation
134
+ - Example audio files in `examples/` for testing voice cloning
135
+
136
+ ## Missing Infrastructure (TODO)
137
+
138
+ - No `scripts/manage.sh` yet (should include build, test, clean, docker controls)
139
+ - No `context.md` yet for conversation continuity
140
+ - No integration tests with actual ONNX models
CODEBASE_ANALYSIS.md ADDED
@@ -0,0 +1,594 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # IndexTTS-Rust Comprehensive Codebase Analysis
2
+
3
+ ## Executive Summary
4
+
5
+ **IndexTTS** is an **industrial-level, controllable, and efficient zero-shot Text-To-Speech (TTS) system** currently implemented in **Python** using PyTorch. The project is being converted to Rust (as indicated by the branch name `claude/convert-to-rust-01USgPYEqMyp5KXjjFNVwztU`).
6
+
7
+ **Key Statistics:**
8
+ - **Total Python Files:** 194
9
+ - **Total Lines of Code:** ~25,000+ (not counting dependencies)
10
+ - **Current Version:** IndexTTS 1.5 (latest with stability improvements, especially for English)
11
+ - **No Rust code exists yet** - this is a fresh conversion project
12
+
13
+ ---
14
+
15
+ ## 1. PROJECT STRUCTURE
16
+
17
+ ### Root Directory Layout
18
+ ```
19
+ IndexTTS-Rust/
20
+ ├── indextts/ # Main package (194 .py files)
21
+ │ ├── gpt/ # GPT-based model implementation
22
+ │ ├── BigVGAN/ # Vocoder for audio synthesis
23
+ │ ├── s2mel/ # Semantic-to-Mel spectrogram conversion
24
+ │ ├── utils/ # Text processing, feature extraction, utilities
25
+ │ └── vqvae/ # Vector Quantized VAE components
26
+ ├── examples/ # Sample audio files and test cases
27
+ ├── tests/ # Test files for regression testing
28
+ ├── tools/ # Utility scripts and i18n support
29
+ ├── webui.py # Gradio-based web interface (18KB)
30
+ ├── cli.py # Command-line interface
31
+ ├── requirements.txt # Python dependencies
32
+ └── archive/ # Historical documentation
33
+ ```
34
+
35
+ ---
36
+
37
+ ## 2. CURRENT IMPLEMENTATION (PYTHON)
38
+
39
+ ### Programming Language & Framework
40
+ - **Language:** Python 3.x
41
+ - **Deep Learning Framework:** PyTorch (primary dependency)
42
+ - **Model Format:** HuggingFace compatible (.safetensors)
43
+
44
+ ### Key Dependencies (requirements.txt)
45
+
46
+ | Dependency | Version | Purpose |
47
+ |-----------|---------|---------|
48
+ | torch | (implicit) | Deep learning framework |
49
+ | transformers | 4.52.1 | HuggingFace transformers library |
50
+ | librosa | 0.10.2.post1 | Audio processing |
51
+ | numpy | 1.26.2 | Numerical computing |
52
+ | accelerate | 1.8.1 | Distributed training/inference |
53
+ | deepspeed | 0.17.1 | Inference optimization |
54
+ | torchaudio | (implicit) | Audio I/O |
55
+ | safetensors | 0.5.2 | Model serialization |
56
+ | gradio | (latest) | Web UI framework |
57
+ | modelscope | 1.27.0 | Model hub integration |
58
+ | jieba | 0.42.1 | Chinese text tokenization |
59
+ | g2p-en | 2.1.0 | English phoneme conversion |
60
+ | sentencepiece | (latest) | BPE tokenization |
61
+ | descript-audiotools | 0.7.2 | Audio manipulation |
62
+ | cn2an | 0.5.22 | Chinese number normalization |
63
+ | WeTextProcessing / wetext | (conditional) | Text normalization (Linux/macOS) |
64
+
65
+ ---
66
+
67
+ ## 3. MAIN FUNCTIONALITY - THE TTS PIPELINE
68
+
69
+ ### What IndexTTS Does
70
+
71
+ **IndexTTS is a zero-shot multi-lingual TTS system that:**
72
+
73
+ 1. **Takes text input** (Chinese, English, or mixed)
74
+ 2. **Takes a voice reference audio** (speaker prompt)
75
+ 3. **Generates high-quality speech** in the speaker's voice
76
+ 4. **Supports multiple control mechanisms:**
77
+ - Pinyin-based pronunciation control (for Chinese)
78
+ - Pause control via punctuation
79
+ - Emotion vector manipulation (8 dimensions)
80
+ - Emotion text guidance via Qwen model
81
+ - Style reference audio
82
+
83
+ ### Core TTS Pipeline (infer_v2.py - 739 lines)
84
+
85
+ ```
86
+ Input Text
87
+
88
+ Text Normalization (TextNormalizer)
89
+ ├─ Chinese-specific normalization
90
+ ├─ English-specific normalization
91
+ ├─ Pinyin tone extraction/preservation
92
+ └─ Name entity handling
93
+
94
+ Text Tokenization (TextTokenizer + SentencePiece)
95
+ ├─ CJK character handling
96
+ └─ BPE encoding
97
+
98
+ Semantic Encoding (w2v-BERT model)
99
+ ├─ Input: Text tokens + Reference audio
100
+ ├─ Process: Semantic codec (RepCodec)
101
+ └─ Output: Semantic codes
102
+
103
+ Speaker Conditioning
104
+ ├─ Extract features from reference audio
105
+ ├─ CAMPPlus speaker embedding
106
+ ├─ Emotion embedding (from reference or text)
107
+ └─ Mel spectrogram reference
108
+
109
+ GPT-based Sequence Generation (UnifiedVoice)
110
+ ├─ Semantic tokens → Mel tokens
111
+ ├─ Conformer-based speaker conditioning
112
+ ├─ Perceiver-based attention pooling
113
+ └─ Emotion control via vectors or text
114
+
115
+ Length Regulation (s2mel)
116
+ ├─ Acoustic code expansion
117
+ ├─ Flow matching for duration modeling
118
+ └─ CFM (Continuous Flow Matching) estimator
119
+
120
+ BigVGAN Vocoder
121
+ ├─ Mel spectrogram → Waveform
122
+ ├─ Uses anti-aliased activation functions
123
+ ├─ Optional CUDA kernel optimization
124
+ └─ Optional DeepSpeed acceleration
125
+
126
+ Output Audio Waveform (22050 Hz)
127
+ ```
128
+
129
+ ---
130
+
131
+ ## 4. KEY ALGORITHMS AND COMPONENTS NEEDING RUST CONVERSION
132
+
133
+ ### A. Text Processing Pipeline
134
+
135
+ **TextNormalizer (front.py - ~500 lines)**
136
+ - Chinese text normalization using WeTextProcessing/wetext
137
+ - English text normalization
138
+ - Pinyin tone extraction and preservation
139
+ - Name entity detection and preservation
140
+ - Character mapping and replacement
141
+ - Pattern matching using regex
142
+
143
+ **TextTokenizer (front.py - ~200 lines)**
144
+ - SentencePiece BPE tokenization
145
+ - CJK character tokenization
146
+ - Special token handling (BOS, EOS, UNK)
147
+ - Vocabulary management
148
+
149
+ ### B. Neural Network Components
150
+
151
+ #### 1. **UnifiedVoice GPT Model** (model_v2.py - 747 lines)
152
+ - Multi-layer transformer (configurable depth)
153
+ - Speaker conditioning via Conformer encoder
154
+ - Perceiver resampler for attention pooling
155
+ - Emotion conditioning encoder
156
+ - Position embeddings (learned)
157
+ - Mel and text embeddings
158
+ - Final layer norm + linear output layer
159
+
160
+ #### 2. **Conformer Encoder** (conformer_encoder.py - 520 lines)
161
+ - Conformer blocks with attention + convolution
162
+ - Multi-head self-attention with relative position bias
163
+ - Positionwise feed-forward networks
164
+ - Layer normalization
165
+ - Subsampling layers (Conv2d with various factors)
166
+ - Positional encoding (absolute and relative)
167
+
168
+ #### 3. **Perceiver Resampler** (perceiver.py - 317 lines)
169
+ - Latent queries (learnable embeddings)
170
+ - Cross-attention with context
171
+ - Feed-forward networks
172
+ - Dimension projection
173
+
174
+ #### 4. **BigVGAN Vocoder** (models.py - ~1000 lines)
175
+ - Multi-scale convolution blocks (AMPBlock1, AMPBlock2)
176
+ - Anti-aliased activation functions (Snake, SnakeBeta)
177
+ - Spectral normalization
178
+ - Transposed convolution upsampling
179
+ - Weight normalization
180
+ - Optional CUDA kernel for activation
181
+
182
+ #### 5. **S2Mel (Semantic-to-Mel) Model** (s2mel/modules/)
183
+ - Flow matching / CFM (Continuous Flow Matching)
184
+ - Length regulator
185
+ - Diffusion transformer
186
+ - Acoustic codec quantization
187
+ - Style embeddings
188
+
189
+ ### C. Feature Extraction & Processing
190
+
191
+ **Audio Processing (audio.py)**
192
+ - Mel spectrogram computation using librosa
193
+ - Hann windowing and STFT
194
+ - Dynamic range compression/decompression
195
+ - Spectral normalization
196
+
197
+ **Semantic Models**
198
+ - W2V-BERT (wav2vec 2.0 BERT) embeddings
199
+ - RepCodec (semantic codec with vector quantization)
200
+ - Amphion Codec encoders/decoders
201
+
202
+ **Speaker Features**
203
+ - CAMPPlus speaker embedding (192-dim)
204
+ - Campplus model inference
205
+ - Mel-based reference features
206
+
207
+ ### D. Model Loading & Configuration
208
+
209
+ **Checkpoint Loading** (checkpoint.py - ~50 lines)
210
+ - Model weight restoration from .safetensors/.pt files
211
+
212
+ **HuggingFace Integration**
213
+ - Model hub downloads
214
+ - Configuration loading (OmegaConf)
215
+
216
+ **Configuration System** (YAML-based)
217
+ - Model architecture parameters
218
+ - Training/inference settings
219
+ - Dataset configuration
220
+ - Vocoder settings
221
+
222
+ ---
223
+
224
+ ## 5. EXTERNAL MODELS USED
225
+
226
+ ### Pre-trained Models (Downloaded from HuggingFace)
227
+
228
+ | Model | Source | Purpose | Size | Parameters |
229
+ |-------|--------|---------|------|-----------|
230
+ | IndexTTS-2 | IndexTeam/IndexTTS-2 | Main TTS model | ~2GB | Various checkpoints |
231
+ | W2V-BERT-2.0 | facebook/w2v-bert-2.0 | Semantic feature extraction | ~1GB | 614M |
232
+ | MaskGCT | amphion/MaskGCT | Semantic codec | - | - |
233
+ | CAMPPlus | funasr/campplus | Speaker embedding | ~100MB | - |
234
+ | BigVGAN v2 | nvidia/bigvgan_v2_22khz_80band_256x | Vocoder | ~100MB | - |
235
+ | Qwen Model | (via modelscope) | Emotion text guidance | Variable | - |
236
+
237
+ ### Model Component Breakdown
238
+ ```
239
+ Checkpoint Files Loaded:
240
+ ├── gpt_checkpoint.pth # UnifiedVoice model weights
241
+ ├── s2mel_checkpoint.pth # Semantic-to-Mel model
242
+ ├── bpe_model.model # SentencePiece tokenizer
243
+ ├── emotion_matrix.pt # Emotion embedding vectors (8-dim)
244
+ ├── speaker_matrix.pt # Speaker embedding matrix
245
+ ├── w2v_stat.pt # Semantic model statistics (mean/std)
246
+ ├── qwen_emo_path/ # Qwen-based emotion detector
247
+ └── vocoder config # BigVGAN vocoder config
248
+ ```
249
+
250
+ ---
251
+
252
+ ## 6. INFERENCE MODES & CAPABILITIES
253
+
254
+ ### A. Single Text Generation
255
+ ```python
256
+ tts.infer(
257
+ spk_audio_prompt="voice.wav",
258
+ text="Hello world",
259
+ output_path="output.wav",
260
+ emo_audio_prompt=None, # Optional emotion reference
261
+ emo_alpha=1.0, # Emotion weight
262
+ emo_vector=None, # Direct emotion control [0-1 values]
263
+ use_emo_text=False, # Generate emotion from text
264
+ emo_text=None, # Text for emotion extraction
265
+ interval_silence=200 # Silence between segments (ms)
266
+ )
267
+ ```
268
+
269
+ ### B. Batch/Fast Inference
270
+ ```python
271
+ tts.infer_fast(...) # Parallel segment generation
272
+ ```
273
+
274
+ ### C. Multi-language Support
275
+ - **Chinese (Simplified & Traditional):** Full pinyin support
276
+ - **English:** Phoneme-based
277
+ - **Mixed:** Chinese + English in single utterance
278
+
279
+ ### D. Emotion Control Methods
280
+ 1. **Reference Audio:** Extract from emotion_audio_prompt
281
+ 2. **Emotion Vectors:** Direct 8-dimensional control
282
+ 3. **Text-based:** Use Qwen model to detect emotion from text
283
+ 4. **Speaker-based:** Use speaker's natural emotion
284
+
285
+ ### E. Punctuation-based Pausing
286
+ - Periods, commas, question marks, exclamation marks trigger pauses
287
+ - Pause duration controlled via configuration
288
+
289
+ ---
290
+
291
+ ## 7. MAJOR COMPONENTS BREAKDOWN
292
+
293
+ ### indextts/gpt/ (16,953 lines)
294
+ **Purpose:** GPT-based sequence-to-sequence modeling
295
+
296
+ **Files:**
297
+ - `model_v2.py` (747L) - UnifiedVoice implementation, GPT2InferenceModel
298
+ - `model.py` (713L) - Original model (v1)
299
+ - `conformer_encoder.py` (520L) - Conformer speaker encoder
300
+ - `perceiver.py` (317L) - Perceiver attention mechanism
301
+ - `transformers_*.py` (~13,000L) - HuggingFace transformer implementations (customized)
302
+
303
+ ### indextts/BigVGAN/ (6+ files, ~1000+ lines)
304
+ **Purpose:** Neural vocoder for mel-to-audio conversion
305
+
306
+ **Key Files:**
307
+ - `models.py` - BigVGAN architecture with AMPBlocks
308
+ - `ECAPA_TDNN.py` - Speaker encoder
309
+ - `activations.py` - Snake/SnakeBeta activation functions
310
+ - `alias_free_activation/` - Anti-aliasing filters (CUDA + Torch versions)
311
+ - `alias_free_torch/` - Pure PyTorch fallback
312
+ - `nnet/` - Network modules (normalization, CNN, linear)
313
+
314
+ ### indextts/s2mel/ (~500+ lines)
315
+ **Purpose:** Semantic tokens → Mel spectrogram conversion
316
+
317
+ **Key Files:**
318
+ - `modules/audio.py` - Mel spectrogram computation
319
+ - `modules/commons.py` - Common utilities
320
+ - `modules/layers.py` - Neural network layers
321
+ - `modules/length_regulator.py` - Duration modeling
322
+ - `modules/flow_matching.py` - Continuous flow matching
323
+ - `modules/diffusion_transformer.py` - Diffusion-based generation
324
+ - `modules/rmvpe.py` - Pitch extraction
325
+ - `modules/bigvgan/` - BigVGAN vocoder
326
+ - `dac/` - DAC (Descript Audio Codec)
327
+
328
+ ### indextts/utils/ (12+ files, ~500 lines)
329
+ **Purpose:** Text processing, feature extraction, utilities
330
+
331
+ **Key Files:**
332
+ - `front.py` (700L) - TextNormalizer, TextTokenizer
333
+ - `maskgct_utils.py` (250L) - Semantic codec builders
334
+ - `arch_util.py` - Architecture utilities (AttentionBlock)
335
+ - `checkpoint.py` - Model loading
336
+ - `xtransformers.py` (1600L) - Transformer utilities
337
+ - `feature_extractors.py` - Mel spectrogram features
338
+ - `typical_sampling.py` - Sampling strategies
339
+ - `maskgct/` - MaskGCT codec components (~100+ files)
340
+
341
+ ### indextts/utils/maskgct/ (~100+ Python files)
342
+ **Purpose:** MaskGCT (Masked Generative Codec Transformer) implementation
343
+
344
+ **Components:**
345
+ - `models/codec/` - Various audio codecs (Amphion, FACodec, SpeechTokenizer, NS3, VEVo, KMeans)
346
+ - `models/tts/maskgct/` - TTS-specific implementations
347
+ - Multiple codec variants with quantization
348
+
349
+ ---
350
+
351
+ ## 8. CONFIGURATION & MODEL DOWNLOADING
352
+
353
+ ### Configuration System (OmegaConf YAML)
354
+ Example config.yaml structure:
355
+ ```yaml
356
+ gpt:
357
+ layers: 8
358
+ model_dim: 512
359
+ heads: 8
360
+ max_text_tokens: 120
361
+ max_mel_tokens: 250
362
+ stop_mel_token: 8193
363
+ conformer_config: {...}
364
+
365
+ vocoder:
366
+ name: "nvidia/bigvgan_v2_22khz_80band_256x"
367
+
368
+ s2mel:
369
+ checkpoint: "models/s2mel.pth"
370
+ preprocess_params:
371
+ sr: 22050
372
+ spect_params:
373
+ n_fft: 1024
374
+ hop_length: 256
375
+ n_mels: 80
376
+
377
+ dataset:
378
+ bpe_model: "models/bpe.model"
379
+
380
+ emotions:
381
+ num: [5, 6, 8, ...] # Emotion vector counts per dimension
382
+
383
+ w2v_stat: "models/w2v_stat.pt"
384
+ ```
385
+
386
+ ### Model Auto-download
387
+ ```python
388
+ download_model_from_huggingface(
389
+ local_path="./checkpoints",
390
+ cache_path="./checkpoints/hf_cache"
391
+ )
392
+ ```
393
+
394
+ Preloads from HuggingFace:
395
+ - IndexTeam/IndexTTS-2
396
+ - amphion/MaskGCT
397
+ - funasr/campplus
398
+ - facebook/w2v-bert-2.0
399
+ - nvidia/bigvgan_v2_22khz_80band_256x
400
+
401
+ ---
402
+
403
+ ## 9. INTERFACES
404
+
405
+ ### A. Command Line (cli.py - 64 lines)
406
+ ```bash
407
+ python -m indextts.cli "Text to synthesize" \
408
+ -v voice_prompt.wav \
409
+ -o output.wav \
410
+ -c checkpoints/config.yaml \
411
+ --model_dir checkpoints \
412
+ --fp16 \
413
+ -d cuda:0
414
+ ```
415
+
416
+ ### B. Web UI (webui.py - 18KB)
417
+ Gradio-based interface with:
418
+ - Real-time inference
419
+ - Multiple emotion control modes
420
+ - Example cases loading
421
+ - Language selection (Chinese/English)
422
+ - Batch processing
423
+ - Cache management
424
+
425
+ ### C. Python API (infer_v2.py)
426
+ ```python
427
+ from indextts.infer_v2 import IndexTTS2
428
+
429
+ tts = IndexTTS2(
430
+ cfg_path="checkpoints/config.yaml",
431
+ model_dir="checkpoints",
432
+ use_fp16=True,
433
+ device="cuda:0"
434
+ )
435
+
436
+ audio = tts.infer(
437
+ spk_audio_prompt="speaker.wav",
438
+ text="Hello",
439
+ output_path="output.wav"
440
+ )
441
+ ```
442
+
443
+ ---
444
+
445
+ ## 10. CRITICAL ALGORITHMS TO IMPLEMENT
446
+
447
+ ### Priority 1: Core Inference Pipeline
448
+ 1. **Text Normalization** - Pattern matching, phoneme handling
449
+ 2. **Text Tokenization** - SentencePiece integration
450
+ 3. **Semantic Encoding** - W2V-BERT model inference
451
+ 4. **GPT Generation** - Token-by-token generation with sampling
452
+ 5. **Vocoder** - BigVGAN mel-to-audio conversion
453
+
454
+ ### Priority 2: Feature Extraction
455
+ 1. **Mel Spectrogram** - STFT, librosa filters
456
+ 2. **Speaker Embeddings** - CAMPPlus inference
457
+ 3. **Emotion Encoding** - Vector quantization
458
+ 4. **Audio Loading/Processing** - Resampling, normalization
459
+
460
+ ### Priority 3: Advanced Features
461
+ 1. **Conformer Encoding** - Complex attention mechanism
462
+ 2. **Perceiver Pooling** - Cross-attention mechanisms
463
+ 3. **Flow Matching** - Continuous diffusion
464
+ 4. **Length Regulation** - Duration prediction
465
+
466
+ ### Priority 4: Optional Optimizations
467
+ 1. **CUDA Kernels** - Anti-aliased activations
468
+ 2. **DeepSpeed Integration** - Model parallelism
469
+ 3. **KV Cache** - Inference optimization
470
+
471
+ ---
472
+
473
+ ## 11. DATA FLOW EXAMPLE
474
+
475
+ ```
476
+ Input: text="你好", voice="speaker.wav", emotion="happy"
477
+
478
+ 1. TextNormalizer.normalize("你好")
479
+ → "你好" (no change needed)
480
+
481
+ 2. TextTokenizer.encode("你好")
482
+ → [token_id_1, token_id_2, ...]
483
+
484
+ 3. Audio Loading & Processing:
485
+ - Load speaker.wav → 22050 Hz
486
+ - Extract W2V-BERT features
487
+ - Get semantic codes via RepCodec
488
+ - Extract CAMPPlus embedding (192-dim)
489
+ - Compute mel spectrogram
490
+
491
+ 4. Emotion Processing:
492
+ - If emotion vector: scale by emotion_alpha
493
+ - If emotion audio: extract embeddings
494
+ - Create emotion conditioning
495
+
496
+ 5. GPT Generation:
497
+ - Input: [semantic_codes, text_tokens]
498
+ - Output: mel_tokens (variable length)
499
+
500
+ 6. Length Regulation (s2mel):
501
+ - Input: mel_tokens + speaker_style
502
+ - Output: acoustic_codes (fine-grained tokens)
503
+
504
+ 7. BigVGAN Vocoding:
505
+ - Input: acoustic_codes → mel_spectrogram
506
+ - Output: waveform at 22050 Hz
507
+
508
+ 8. Post-processing:
509
+ - Optional silence insertion
510
+ - Audio normalization
511
+ - WAV file writing
512
+ ```
513
+
514
+ ---
515
+
516
+ ## 12. TESTING
517
+
518
+ ### Regression Tests (regression_test.py)
519
+ Tests various scenarios:
520
+ - Chinese text with pinyin tones
521
+ - English text
522
+ - Mixed Chinese/English
523
+ - Long-form text
524
+ - Names and entities
525
+ - Special punctuation
526
+
527
+ ### Padding Tests (padding_test.py)
528
+ - Variable length input handling
529
+ - Batch processing
530
+ - Edge cases
531
+
532
+ ---
533
+
534
+ ## 13. FILE STATISTICS SUMMARY
535
+
536
+ | Category | Count | Lines |
537
+ |----------|-------|-------|
538
+ | Python Files | 194 | ~25,000+ |
539
+ | GPT Module | 9 | 16,953 |
540
+ | BigVGAN | 6+ | ~1,000+ |
541
+ | Utils | 12+ | ~500 |
542
+ | MaskGCT | 100+ | ~10,000+ |
543
+ | S2Mel | 10+ | ~2,000+ |
544
+ | Root Level | 3 | 730 |
545
+
546
+ ---
547
+
548
+ ## 14. KEY TECHNICAL CHALLENGES FOR RUST CONVERSION
549
+
550
+ 1. **PyTorch Model Loading** → Need ONNX export or custom binary format
551
+ 2. **Text Normalization Libraries** → May need Rust bindings or reimplementation
552
+ 3. **Complex Attention Mechanisms** → Transformers, Perceiver, Conformer
553
+ 4. **Mel Spectrogram Computation** → STFT, librosa filter banks
554
+ 5. **Quantization & Codecs** → Multiple codec implementations
555
+ 6. **Large Model Inference** → Optimization, batching, caching
556
+ 7. **CUDA Kernels** → Custom activation functions (if needed)
557
+ 8. **Web Server Integration** → Replace Gradio with Rust web framework
558
+
559
+ ---
560
+
561
+ ## 15. DEPENDENCY CONVERSION ROADMAP
562
+
563
+ | Python Library | Rust Alternative | Priority |
564
+ |---|---|---|
565
+ | torch/transformers | ort, tch-rs, candle | Critical |
566
+ | librosa | rustfft, dasp_signal | Critical |
567
+ | sentencepiece | sentencepiece, tokenizers | Critical |
568
+ | numpy | ndarray, nalgebra | Critical |
569
+ | jieba | jieba-rs | High |
570
+ | torchaudio | dasp, wav, hound | High |
571
+ | gradio | actix-web, rocket, axum | Medium |
572
+ | OmegaConf | serde, config-rs | Medium |
573
+ | safetensors | safetensors-rs | High |
574
+
575
+ ---
576
+
577
+ ## Summary
578
+
579
+ IndexTTS is a sophisticated, state-of-the-art TTS system with:
580
+ - **194 Python files** across multiple specialized modules
581
+ - **Multi-stage processing pipeline** from text to audio
582
+ - **Advanced neural architectures** (Conformer, Perceiver, GPT, BigVGAN)
583
+ - **Multi-language support** with emotion control
584
+ - **Production-ready** with web UI and CLI interfaces
585
+ - **Heavy reliance on PyTorch** and HuggingFace ecosystems
586
+ - **Large external models** requiring careful integration
587
+
588
+ The Rust conversion will require careful translation of:
589
+ 1. Complex text processing pipelines
590
+ 2. Neural network inference engines
591
+ 3. Audio DSP operations
592
+ 4. Model loading and management
593
+ 5. Web interface integration
594
+
Cargo.lock ADDED
@@ -0,0 +1,3683 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is automatically @generated by Cargo.
2
+ # It is not intended for manual editing.
3
+ version = 4
4
+
5
+ [[package]]
6
+ name = "adler2"
7
+ version = "2.0.1"
8
+ source = "registry+https://github.com/rust-lang/crates.io-index"
9
+ checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
10
+
11
+ [[package]]
12
+ name = "adler32"
13
+ version = "1.2.0"
14
+ source = "registry+https://github.com/rust-lang/crates.io-index"
15
+ checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
16
+
17
+ [[package]]
18
+ name = "ahash"
19
+ version = "0.8.12"
20
+ source = "registry+https://github.com/rust-lang/crates.io-index"
21
+ checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
22
+ dependencies = [
23
+ "cfg-if",
24
+ "once_cell",
25
+ "version_check",
26
+ "zerocopy",
27
+ ]
28
+
29
+ [[package]]
30
+ name = "aho-corasick"
31
+ version = "1.1.4"
32
+ source = "registry+https://github.com/rust-lang/crates.io-index"
33
+ checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
34
+ dependencies = [
35
+ "memchr",
36
+ ]
37
+
38
+ [[package]]
39
+ name = "allocator-api2"
40
+ version = "0.2.21"
41
+ source = "registry+https://github.com/rust-lang/crates.io-index"
42
+ checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
43
+
44
+ [[package]]
45
+ name = "anes"
46
+ version = "0.1.6"
47
+ source = "registry+https://github.com/rust-lang/crates.io-index"
48
+ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
49
+
50
+ [[package]]
51
+ name = "anstream"
52
+ version = "0.6.21"
53
+ source = "registry+https://github.com/rust-lang/crates.io-index"
54
+ checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
55
+ dependencies = [
56
+ "anstyle",
57
+ "anstyle-parse",
58
+ "anstyle-query",
59
+ "anstyle-wincon",
60
+ "colorchoice",
61
+ "is_terminal_polyfill",
62
+ "utf8parse",
63
+ ]
64
+
65
+ [[package]]
66
+ name = "anstyle"
67
+ version = "1.0.13"
68
+ source = "registry+https://github.com/rust-lang/crates.io-index"
69
+ checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
70
+
71
+ [[package]]
72
+ name = "anstyle-parse"
73
+ version = "0.2.7"
74
+ source = "registry+https://github.com/rust-lang/crates.io-index"
75
+ checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
76
+ dependencies = [
77
+ "utf8parse",
78
+ ]
79
+
80
+ [[package]]
81
+ name = "anstyle-query"
82
+ version = "1.1.5"
83
+ source = "registry+https://github.com/rust-lang/crates.io-index"
84
+ checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
85
+ dependencies = [
86
+ "windows-sys 0.61.2",
87
+ ]
88
+
89
+ [[package]]
90
+ name = "anstyle-wincon"
91
+ version = "3.0.11"
92
+ source = "registry+https://github.com/rust-lang/crates.io-index"
93
+ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
94
+ dependencies = [
95
+ "anstyle",
96
+ "once_cell_polyfill",
97
+ "windows-sys 0.61.2",
98
+ ]
99
+
100
+ [[package]]
101
+ name = "anyhow"
102
+ version = "1.0.100"
103
+ source = "registry+https://github.com/rust-lang/crates.io-index"
104
+ checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
105
+
106
+ [[package]]
107
+ name = "arraydeque"
108
+ version = "0.5.1"
109
+ source = "registry+https://github.com/rust-lang/crates.io-index"
110
+ checksum = "7d902e3d592a523def97af8f317b08ce16b7ab854c1985a0c671e6f15cebc236"
111
+
112
+ [[package]]
113
+ name = "async-trait"
114
+ version = "0.1.89"
115
+ source = "registry+https://github.com/rust-lang/crates.io-index"
116
+ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
117
+ dependencies = [
118
+ "proc-macro2",
119
+ "quote",
120
+ "syn 2.0.110",
121
+ ]
122
+
123
+ [[package]]
124
+ name = "atomic-waker"
125
+ version = "1.1.2"
126
+ source = "registry+https://github.com/rust-lang/crates.io-index"
127
+ checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
128
+
129
+ [[package]]
130
+ name = "autocfg"
131
+ version = "1.5.0"
132
+ source = "registry+https://github.com/rust-lang/crates.io-index"
133
+ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
134
+
135
+ [[package]]
136
+ name = "base64"
137
+ version = "0.13.1"
138
+ source = "registry+https://github.com/rust-lang/crates.io-index"
139
+ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
140
+
141
+ [[package]]
142
+ name = "base64"
143
+ version = "0.21.7"
144
+ source = "registry+https://github.com/rust-lang/crates.io-index"
145
+ checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
146
+
147
+ [[package]]
148
+ name = "base64"
149
+ version = "0.22.1"
150
+ source = "registry+https://github.com/rust-lang/crates.io-index"
151
+ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
152
+
153
+ [[package]]
154
+ name = "base64ct"
155
+ version = "1.8.0"
156
+ source = "registry+https://github.com/rust-lang/crates.io-index"
157
+ checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba"
158
+
159
+ [[package]]
160
+ name = "bitflags"
161
+ version = "2.10.0"
162
+ source = "registry+https://github.com/rust-lang/crates.io-index"
163
+ checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
164
+ dependencies = [
165
+ "serde_core",
166
+ ]
167
+
168
+ [[package]]
169
+ name = "block-buffer"
170
+ version = "0.10.4"
171
+ source = "registry+https://github.com/rust-lang/crates.io-index"
172
+ checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
173
+ dependencies = [
174
+ "generic-array",
175
+ ]
176
+
177
+ [[package]]
178
+ name = "bumpalo"
179
+ version = "3.19.0"
180
+ source = "registry+https://github.com/rust-lang/crates.io-index"
181
+ checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
182
+
183
+ [[package]]
184
+ name = "bytemuck"
185
+ version = "1.24.0"
186
+ source = "registry+https://github.com/rust-lang/crates.io-index"
187
+ checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4"
188
+ dependencies = [
189
+ "bytemuck_derive",
190
+ ]
191
+
192
+ [[package]]
193
+ name = "bytemuck_derive"
194
+ version = "1.10.2"
195
+ source = "registry+https://github.com/rust-lang/crates.io-index"
196
+ checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff"
197
+ dependencies = [
198
+ "proc-macro2",
199
+ "quote",
200
+ "syn 2.0.110",
201
+ ]
202
+
203
+ [[package]]
204
+ name = "byteorder"
205
+ version = "1.5.0"
206
+ source = "registry+https://github.com/rust-lang/crates.io-index"
207
+ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
208
+
209
+ [[package]]
210
+ name = "bytes"
211
+ version = "1.11.0"
212
+ source = "registry+https://github.com/rust-lang/crates.io-index"
213
+ checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3"
214
+
215
+ [[package]]
216
+ name = "cast"
217
+ version = "0.3.0"
218
+ source = "registry+https://github.com/rust-lang/crates.io-index"
219
+ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
220
+
221
+ [[package]]
222
+ name = "cc"
223
+ version = "1.2.46"
224
+ source = "registry+https://github.com/rust-lang/crates.io-index"
225
+ checksum = "b97463e1064cb1b1c1384ad0a0b9c8abd0988e2a91f52606c80ef14aadb63e36"
226
+ dependencies = [
227
+ "find-msvc-tools",
228
+ "jobserver",
229
+ "libc",
230
+ "shlex",
231
+ ]
232
+
233
+ [[package]]
234
+ name = "cedarwood"
235
+ version = "0.4.6"
236
+ source = "registry+https://github.com/rust-lang/crates.io-index"
237
+ checksum = "6d910bedd62c24733263d0bed247460853c9d22e8956bd4cd964302095e04e90"
238
+ dependencies = [
239
+ "smallvec 1.15.1",
240
+ ]
241
+
242
+ [[package]]
243
+ name = "cfg-if"
244
+ version = "1.0.4"
245
+ source = "registry+https://github.com/rust-lang/crates.io-index"
246
+ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
247
+
248
+ [[package]]
249
+ name = "ciborium"
250
+ version = "0.2.2"
251
+ source = "registry+https://github.com/rust-lang/crates.io-index"
252
+ checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
253
+ dependencies = [
254
+ "ciborium-io",
255
+ "ciborium-ll",
256
+ "serde",
257
+ ]
258
+
259
+ [[package]]
260
+ name = "ciborium-io"
261
+ version = "0.2.2"
262
+ source = "registry+https://github.com/rust-lang/crates.io-index"
263
+ checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
264
+
265
+ [[package]]
266
+ name = "ciborium-ll"
267
+ version = "0.2.2"
268
+ source = "registry+https://github.com/rust-lang/crates.io-index"
269
+ checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
270
+ dependencies = [
271
+ "ciborium-io",
272
+ "half",
273
+ ]
274
+
275
+ [[package]]
276
+ name = "clap"
277
+ version = "4.5.51"
278
+ source = "registry+https://github.com/rust-lang/crates.io-index"
279
+ checksum = "4c26d721170e0295f191a69bd9a1f93efcdb0aff38684b61ab5750468972e5f5"
280
+ dependencies = [
281
+ "clap_builder",
282
+ "clap_derive",
283
+ ]
284
+
285
+ [[package]]
286
+ name = "clap_builder"
287
+ version = "4.5.51"
288
+ source = "registry+https://github.com/rust-lang/crates.io-index"
289
+ checksum = "75835f0c7bf681bfd05abe44e965760fea999a5286c6eb2d59883634fd02011a"
290
+ dependencies = [
291
+ "anstream",
292
+ "anstyle",
293
+ "clap_lex",
294
+ "strsim",
295
+ ]
296
+
297
+ [[package]]
298
+ name = "clap_derive"
299
+ version = "4.5.49"
300
+ source = "registry+https://github.com/rust-lang/crates.io-index"
301
+ checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671"
302
+ dependencies = [
303
+ "heck",
304
+ "proc-macro2",
305
+ "quote",
306
+ "syn 2.0.110",
307
+ ]
308
+
309
+ [[package]]
310
+ name = "clap_lex"
311
+ version = "0.7.6"
312
+ source = "registry+https://github.com/rust-lang/crates.io-index"
313
+ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
314
+
315
+ [[package]]
316
+ name = "colorchoice"
317
+ version = "1.0.4"
318
+ source = "registry+https://github.com/rust-lang/crates.io-index"
319
+ checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
320
+
321
+ [[package]]
322
+ name = "config"
323
+ version = "0.14.1"
324
+ source = "registry+https://github.com/rust-lang/crates.io-index"
325
+ checksum = "68578f196d2a33ff61b27fae256c3164f65e36382648e30666dde05b8cc9dfdf"
326
+ dependencies = [
327
+ "async-trait",
328
+ "convert_case",
329
+ "json5",
330
+ "nom",
331
+ "pathdiff",
332
+ "ron",
333
+ "rust-ini",
334
+ "serde",
335
+ "serde_json",
336
+ "toml",
337
+ "yaml-rust2",
338
+ ]
339
+
340
+ [[package]]
341
+ name = "console"
342
+ version = "0.15.11"
343
+ source = "registry+https://github.com/rust-lang/crates.io-index"
344
+ checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8"
345
+ dependencies = [
346
+ "encode_unicode",
347
+ "libc",
348
+ "once_cell",
349
+ "unicode-width",
350
+ "windows-sys 0.59.0",
351
+ ]
352
+
353
+ [[package]]
354
+ name = "const-random"
355
+ version = "0.1.18"
356
+ source = "registry+https://github.com/rust-lang/crates.io-index"
357
+ checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359"
358
+ dependencies = [
359
+ "const-random-macro",
360
+ ]
361
+
362
+ [[package]]
363
+ name = "const-random-macro"
364
+ version = "0.1.16"
365
+ source = "registry+https://github.com/rust-lang/crates.io-index"
366
+ checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
367
+ dependencies = [
368
+ "getrandom 0.2.16",
369
+ "once_cell",
370
+ "tiny-keccak",
371
+ ]
372
+
373
+ [[package]]
374
+ name = "convert_case"
375
+ version = "0.6.0"
376
+ source = "registry+https://github.com/rust-lang/crates.io-index"
377
+ checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca"
378
+ dependencies = [
379
+ "unicode-segmentation",
380
+ ]
381
+
382
+ [[package]]
383
+ name = "core-foundation"
384
+ version = "0.9.4"
385
+ source = "registry+https://github.com/rust-lang/crates.io-index"
386
+ checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f"
387
+ dependencies = [
388
+ "core-foundation-sys",
389
+ "libc",
390
+ ]
391
+
392
+ [[package]]
393
+ name = "core-foundation-sys"
394
+ version = "0.8.7"
395
+ source = "registry+https://github.com/rust-lang/crates.io-index"
396
+ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
397
+
398
+ [[package]]
399
+ name = "core2"
400
+ version = "0.4.0"
401
+ source = "registry+https://github.com/rust-lang/crates.io-index"
402
+ checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505"
403
+ dependencies = [
404
+ "memchr",
405
+ ]
406
+
407
+ [[package]]
408
+ name = "cpufeatures"
409
+ version = "0.2.17"
410
+ source = "registry+https://github.com/rust-lang/crates.io-index"
411
+ checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
412
+ dependencies = [
413
+ "libc",
414
+ ]
415
+
416
+ [[package]]
417
+ name = "crc32fast"
418
+ version = "1.5.0"
419
+ source = "registry+https://github.com/rust-lang/crates.io-index"
420
+ checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
421
+ dependencies = [
422
+ "cfg-if",
423
+ ]
424
+
425
+ [[package]]
426
+ name = "criterion"
427
+ version = "0.5.1"
428
+ source = "registry+https://github.com/rust-lang/crates.io-index"
429
+ checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
430
+ dependencies = [
431
+ "anes",
432
+ "cast",
433
+ "ciborium",
434
+ "clap",
435
+ "criterion-plot",
436
+ "is-terminal",
437
+ "itertools 0.10.5",
438
+ "num-traits",
439
+ "once_cell",
440
+ "oorandom",
441
+ "plotters",
442
+ "rayon",
443
+ "regex",
444
+ "serde",
445
+ "serde_derive",
446
+ "serde_json",
447
+ "tinytemplate",
448
+ "walkdir",
449
+ ]
450
+
451
+ [[package]]
452
+ name = "criterion-plot"
453
+ version = "0.5.0"
454
+ source = "registry+https://github.com/rust-lang/crates.io-index"
455
+ checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
456
+ dependencies = [
457
+ "cast",
458
+ "itertools 0.10.5",
459
+ ]
460
+
461
+ [[package]]
462
+ name = "crossbeam-deque"
463
+ version = "0.8.6"
464
+ source = "registry+https://github.com/rust-lang/crates.io-index"
465
+ checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
466
+ dependencies = [
467
+ "crossbeam-epoch",
468
+ "crossbeam-utils",
469
+ ]
470
+
471
+ [[package]]
472
+ name = "crossbeam-epoch"
473
+ version = "0.9.18"
474
+ source = "registry+https://github.com/rust-lang/crates.io-index"
475
+ checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
476
+ dependencies = [
477
+ "crossbeam-utils",
478
+ ]
479
+
480
+ [[package]]
481
+ name = "crossbeam-utils"
482
+ version = "0.8.21"
483
+ source = "registry+https://github.com/rust-lang/crates.io-index"
484
+ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
485
+
486
+ [[package]]
487
+ name = "crunchy"
488
+ version = "0.2.4"
489
+ source = "registry+https://github.com/rust-lang/crates.io-index"
490
+ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
491
+
492
+ [[package]]
493
+ name = "crypto-common"
494
+ version = "0.1.7"
495
+ source = "registry+https://github.com/rust-lang/crates.io-index"
496
+ checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
497
+ dependencies = [
498
+ "generic-array",
499
+ "typenum",
500
+ ]
501
+
502
+ [[package]]
503
+ name = "darling"
504
+ version = "0.20.11"
505
+ source = "registry+https://github.com/rust-lang/crates.io-index"
506
+ checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
507
+ dependencies = [
508
+ "darling_core",
509
+ "darling_macro",
510
+ ]
511
+
512
+ [[package]]
513
+ name = "darling_core"
514
+ version = "0.20.11"
515
+ source = "registry+https://github.com/rust-lang/crates.io-index"
516
+ checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e"
517
+ dependencies = [
518
+ "fnv",
519
+ "ident_case",
520
+ "proc-macro2",
521
+ "quote",
522
+ "strsim",
523
+ "syn 2.0.110",
524
+ ]
525
+
526
+ [[package]]
527
+ name = "darling_macro"
528
+ version = "0.20.11"
529
+ source = "registry+https://github.com/rust-lang/crates.io-index"
530
+ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
531
+ dependencies = [
532
+ "darling_core",
533
+ "quote",
534
+ "syn 2.0.110",
535
+ ]
536
+
537
+ [[package]]
538
+ name = "dary_heap"
539
+ version = "0.3.8"
540
+ source = "registry+https://github.com/rust-lang/crates.io-index"
541
+ checksum = "06d2e3287df1c007e74221c49ca10a95d557349e54b3a75dc2fb14712c751f04"
542
+
543
+ [[package]]
544
+ name = "dasp_envelope"
545
+ version = "0.11.0"
546
+ source = "registry+https://github.com/rust-lang/crates.io-index"
547
+ checksum = "8ec617ce7016f101a87fe85ed44180839744265fae73bb4aa43e7ece1b7668b6"
548
+ dependencies = [
549
+ "dasp_frame",
550
+ "dasp_peak",
551
+ "dasp_ring_buffer",
552
+ "dasp_rms",
553
+ "dasp_sample",
554
+ ]
555
+
556
+ [[package]]
557
+ name = "dasp_frame"
558
+ version = "0.11.0"
559
+ source = "registry+https://github.com/rust-lang/crates.io-index"
560
+ checksum = "b2a3937f5fe2135702897535c8d4a5553f8b116f76c1529088797f2eee7c5cd6"
561
+ dependencies = [
562
+ "dasp_sample",
563
+ ]
564
+
565
+ [[package]]
566
+ name = "dasp_interpolate"
567
+ version = "0.11.0"
568
+ source = "registry+https://github.com/rust-lang/crates.io-index"
569
+ checksum = "7fc975a6563bb7ca7ec0a6c784ead49983a21c24835b0bc96eea11ee407c7486"
570
+ dependencies = [
571
+ "dasp_frame",
572
+ "dasp_ring_buffer",
573
+ "dasp_sample",
574
+ ]
575
+
576
+ [[package]]
577
+ name = "dasp_peak"
578
+ version = "0.11.0"
579
+ source = "registry+https://github.com/rust-lang/crates.io-index"
580
+ checksum = "5cf88559d79c21f3d8523d91250c397f9a15b5fc72fbb3f87fdb0a37b79915bf"
581
+ dependencies = [
582
+ "dasp_frame",
583
+ "dasp_sample",
584
+ ]
585
+
586
+ [[package]]
587
+ name = "dasp_ring_buffer"
588
+ version = "0.11.0"
589
+ source = "registry+https://github.com/rust-lang/crates.io-index"
590
+ checksum = "07d79e19b89618a543c4adec9c5a347fe378a19041699b3278e616e387511ea1"
591
+
592
+ [[package]]
593
+ name = "dasp_rms"
594
+ version = "0.11.0"
595
+ source = "registry+https://github.com/rust-lang/crates.io-index"
596
+ checksum = "a6c5dcb30b7e5014486e2822537ea2beae50b19722ffe2ed7549ab03774575aa"
597
+ dependencies = [
598
+ "dasp_frame",
599
+ "dasp_ring_buffer",
600
+ "dasp_sample",
601
+ ]
602
+
603
+ [[package]]
604
+ name = "dasp_sample"
605
+ version = "0.11.0"
606
+ source = "registry+https://github.com/rust-lang/crates.io-index"
607
+ checksum = "0c87e182de0887fd5361989c677c4e8f5000cd9491d6d563161a8f3a5519fc7f"
608
+
609
+ [[package]]
610
+ name = "dasp_signal"
611
+ version = "0.11.0"
612
+ source = "registry+https://github.com/rust-lang/crates.io-index"
613
+ checksum = "aa1ab7d01689c6ed4eae3d38fe1cea08cba761573fbd2d592528d55b421077e7"
614
+ dependencies = [
615
+ "dasp_envelope",
616
+ "dasp_frame",
617
+ "dasp_interpolate",
618
+ "dasp_peak",
619
+ "dasp_ring_buffer",
620
+ "dasp_rms",
621
+ "dasp_sample",
622
+ "dasp_window",
623
+ ]
624
+
625
+ [[package]]
626
+ name = "dasp_window"
627
+ version = "0.11.1"
628
+ source = "registry+https://github.com/rust-lang/crates.io-index"
629
+ checksum = "99ded7b88821d2ce4e8b842c9f1c86ac911891ab89443cc1de750cae764c5076"
630
+ dependencies = [
631
+ "dasp_sample",
632
+ ]
633
+
634
+ [[package]]
635
+ name = "der"
636
+ version = "0.7.10"
637
+ source = "registry+https://github.com/rust-lang/crates.io-index"
638
+ checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb"
639
+ dependencies = [
640
+ "pem-rfc7468",
641
+ "zeroize",
642
+ ]
643
+
644
+ [[package]]
645
+ name = "derive_builder"
646
+ version = "0.20.2"
647
+ source = "registry+https://github.com/rust-lang/crates.io-index"
648
+ checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
649
+ dependencies = [
650
+ "derive_builder_macro",
651
+ ]
652
+
653
+ [[package]]
654
+ name = "derive_builder_core"
655
+ version = "0.20.2"
656
+ source = "registry+https://github.com/rust-lang/crates.io-index"
657
+ checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
658
+ dependencies = [
659
+ "darling",
660
+ "proc-macro2",
661
+ "quote",
662
+ "syn 2.0.110",
663
+ ]
664
+
665
+ [[package]]
666
+ name = "derive_builder_macro"
667
+ version = "0.20.2"
668
+ source = "registry+https://github.com/rust-lang/crates.io-index"
669
+ checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
670
+ dependencies = [
671
+ "derive_builder_core",
672
+ "syn 2.0.110",
673
+ ]
674
+
675
+ [[package]]
676
+ name = "digest"
677
+ version = "0.10.7"
678
+ source = "registry+https://github.com/rust-lang/crates.io-index"
679
+ checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
680
+ dependencies = [
681
+ "block-buffer",
682
+ "crypto-common",
683
+ ]
684
+
685
+ [[package]]
686
+ name = "displaydoc"
687
+ version = "0.2.5"
688
+ source = "registry+https://github.com/rust-lang/crates.io-index"
689
+ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
690
+ dependencies = [
691
+ "proc-macro2",
692
+ "quote",
693
+ "syn 2.0.110",
694
+ ]
695
+
696
+ [[package]]
697
+ name = "dlv-list"
698
+ version = "0.5.2"
699
+ source = "registry+https://github.com/rust-lang/crates.io-index"
700
+ checksum = "442039f5147480ba31067cb00ada1adae6892028e40e45fc5de7b7df6dcc1b5f"
701
+ dependencies = [
702
+ "const-random",
703
+ ]
704
+
705
+ [[package]]
706
+ name = "either"
707
+ version = "1.15.0"
708
+ source = "registry+https://github.com/rust-lang/crates.io-index"
709
+ checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
710
+
711
+ [[package]]
712
+ name = "encode_unicode"
713
+ version = "1.0.0"
714
+ source = "registry+https://github.com/rust-lang/crates.io-index"
715
+ checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
716
+
717
+ [[package]]
718
+ name = "encoding_rs"
719
+ version = "0.8.35"
720
+ source = "registry+https://github.com/rust-lang/crates.io-index"
721
+ checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
722
+ dependencies = [
723
+ "cfg-if",
724
+ ]
725
+
726
+ [[package]]
727
+ name = "env_filter"
728
+ version = "0.1.4"
729
+ source = "registry+https://github.com/rust-lang/crates.io-index"
730
+ checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2"
731
+ dependencies = [
732
+ "log",
733
+ "regex",
734
+ ]
735
+
736
+ [[package]]
737
+ name = "env_logger"
738
+ version = "0.11.8"
739
+ source = "registry+https://github.com/rust-lang/crates.io-index"
740
+ checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f"
741
+ dependencies = [
742
+ "anstream",
743
+ "anstyle",
744
+ "env_filter",
745
+ "jiff",
746
+ "log",
747
+ ]
748
+
749
+ [[package]]
750
+ name = "equivalent"
751
+ version = "1.0.2"
752
+ source = "registry+https://github.com/rust-lang/crates.io-index"
753
+ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
754
+
755
+ [[package]]
756
+ name = "errno"
757
+ version = "0.3.14"
758
+ source = "registry+https://github.com/rust-lang/crates.io-index"
759
+ checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
760
+ dependencies = [
761
+ "libc",
762
+ "windows-sys 0.61.2",
763
+ ]
764
+
765
+ [[package]]
766
+ name = "esaxx-rs"
767
+ version = "0.1.10"
768
+ source = "registry+https://github.com/rust-lang/crates.io-index"
769
+ checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6"
770
+ dependencies = [
771
+ "cc",
772
+ ]
773
+
774
+ [[package]]
775
+ name = "fastrand"
776
+ version = "2.3.0"
777
+ source = "registry+https://github.com/rust-lang/crates.io-index"
778
+ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
779
+
780
+ [[package]]
781
+ name = "filetime"
782
+ version = "0.2.26"
783
+ source = "registry+https://github.com/rust-lang/crates.io-index"
784
+ checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed"
785
+ dependencies = [
786
+ "cfg-if",
787
+ "libc",
788
+ "libredox",
789
+ "windows-sys 0.60.2",
790
+ ]
791
+
792
+ [[package]]
793
+ name = "find-msvc-tools"
794
+ version = "0.1.5"
795
+ source = "registry+https://github.com/rust-lang/crates.io-index"
796
+ checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844"
797
+
798
+ [[package]]
799
+ name = "flate2"
800
+ version = "1.1.5"
801
+ source = "registry+https://github.com/rust-lang/crates.io-index"
802
+ checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
803
+ dependencies = [
804
+ "crc32fast",
805
+ "miniz_oxide",
806
+ ]
807
+
808
+ [[package]]
809
+ name = "fnv"
810
+ version = "1.0.7"
811
+ source = "registry+https://github.com/rust-lang/crates.io-index"
812
+ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
813
+
814
+ [[package]]
815
+ name = "foldhash"
816
+ version = "0.2.0"
817
+ source = "registry+https://github.com/rust-lang/crates.io-index"
818
+ checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
819
+
820
+ [[package]]
821
+ name = "foreign-types"
822
+ version = "0.3.2"
823
+ source = "registry+https://github.com/rust-lang/crates.io-index"
824
+ checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
825
+ dependencies = [
826
+ "foreign-types-shared",
827
+ ]
828
+
829
+ [[package]]
830
+ name = "foreign-types-shared"
831
+ version = "0.1.1"
832
+ source = "registry+https://github.com/rust-lang/crates.io-index"
833
+ checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
834
+
835
+ [[package]]
836
+ name = "form_urlencoded"
837
+ version = "1.2.2"
838
+ source = "registry+https://github.com/rust-lang/crates.io-index"
839
+ checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf"
840
+ dependencies = [
841
+ "percent-encoding",
842
+ ]
843
+
844
+ [[package]]
845
+ name = "futures-channel"
846
+ version = "0.3.31"
847
+ source = "registry+https://github.com/rust-lang/crates.io-index"
848
+ checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
849
+ dependencies = [
850
+ "futures-core",
851
+ "futures-sink",
852
+ ]
853
+
854
+ [[package]]
855
+ name = "futures-core"
856
+ version = "0.3.31"
857
+ source = "registry+https://github.com/rust-lang/crates.io-index"
858
+ checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
859
+
860
+ [[package]]
861
+ name = "futures-io"
862
+ version = "0.3.31"
863
+ source = "registry+https://github.com/rust-lang/crates.io-index"
864
+ checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
865
+
866
+ [[package]]
867
+ name = "futures-sink"
868
+ version = "0.3.31"
869
+ source = "registry+https://github.com/rust-lang/crates.io-index"
870
+ checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
871
+
872
+ [[package]]
873
+ name = "futures-task"
874
+ version = "0.3.31"
875
+ source = "registry+https://github.com/rust-lang/crates.io-index"
876
+ checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
877
+
878
+ [[package]]
879
+ name = "futures-util"
880
+ version = "0.3.31"
881
+ source = "registry+https://github.com/rust-lang/crates.io-index"
882
+ checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
883
+ dependencies = [
884
+ "futures-core",
885
+ "futures-io",
886
+ "futures-sink",
887
+ "futures-task",
888
+ "memchr",
889
+ "pin-project-lite",
890
+ "pin-utils",
891
+ "slab",
892
+ ]
893
+
894
+ [[package]]
895
+ name = "fxhash"
896
+ version = "0.2.1"
897
+ source = "registry+https://github.com/rust-lang/crates.io-index"
898
+ checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
899
+ dependencies = [
900
+ "byteorder",
901
+ ]
902
+
903
+ [[package]]
904
+ name = "generic-array"
905
+ version = "0.14.7"
906
+ source = "registry+https://github.com/rust-lang/crates.io-index"
907
+ checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
908
+ dependencies = [
909
+ "typenum",
910
+ "version_check",
911
+ ]
912
+
913
+ [[package]]
914
+ name = "getrandom"
915
+ version = "0.2.16"
916
+ source = "registry+https://github.com/rust-lang/crates.io-index"
917
+ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
918
+ dependencies = [
919
+ "cfg-if",
920
+ "libc",
921
+ "wasi",
922
+ ]
923
+
924
+ [[package]]
925
+ name = "getrandom"
926
+ version = "0.3.4"
927
+ source = "registry+https://github.com/rust-lang/crates.io-index"
928
+ checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
929
+ dependencies = [
930
+ "cfg-if",
931
+ "libc",
932
+ "r-efi",
933
+ "wasip2",
934
+ ]
935
+
936
+ [[package]]
937
+ name = "h2"
938
+ version = "0.4.12"
939
+ source = "registry+https://github.com/rust-lang/crates.io-index"
940
+ checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386"
941
+ dependencies = [
942
+ "atomic-waker",
943
+ "bytes",
944
+ "fnv",
945
+ "futures-core",
946
+ "futures-sink",
947
+ "http",
948
+ "indexmap",
949
+ "slab",
950
+ "tokio",
951
+ "tokio-util",
952
+ "tracing",
953
+ ]
954
+
955
+ [[package]]
956
+ name = "half"
957
+ version = "2.7.1"
958
+ source = "registry+https://github.com/rust-lang/crates.io-index"
959
+ checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
960
+ dependencies = [
961
+ "cfg-if",
962
+ "crunchy",
963
+ "zerocopy",
964
+ ]
965
+
966
+ [[package]]
967
+ name = "hashbrown"
968
+ version = "0.14.5"
969
+ source = "registry+https://github.com/rust-lang/crates.io-index"
970
+ checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
971
+ dependencies = [
972
+ "ahash",
973
+ "allocator-api2",
974
+ ]
975
+
976
+ [[package]]
977
+ name = "hashbrown"
978
+ version = "0.16.0"
979
+ source = "registry+https://github.com/rust-lang/crates.io-index"
980
+ checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d"
981
+ dependencies = [
982
+ "allocator-api2",
983
+ "equivalent",
984
+ "foldhash",
985
+ ]
986
+
987
+ [[package]]
988
+ name = "hashlink"
989
+ version = "0.8.4"
990
+ source = "registry+https://github.com/rust-lang/crates.io-index"
991
+ checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7"
992
+ dependencies = [
993
+ "hashbrown 0.14.5",
994
+ ]
995
+
996
+ [[package]]
997
+ name = "heck"
998
+ version = "0.5.0"
999
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1000
+ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
1001
+
1002
+ [[package]]
1003
+ name = "hermit-abi"
1004
+ version = "0.5.2"
1005
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1006
+ checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
1007
+
1008
+ [[package]]
1009
+ name = "hex"
1010
+ version = "0.4.3"
1011
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1012
+ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
1013
+
1014
+ [[package]]
1015
+ name = "hound"
1016
+ version = "3.5.1"
1017
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1018
+ checksum = "62adaabb884c94955b19907d60019f4e145d091c75345379e70d1ee696f7854f"
1019
+
1020
+ [[package]]
1021
+ name = "http"
1022
+ version = "1.3.1"
1023
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1024
+ checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565"
1025
+ dependencies = [
1026
+ "bytes",
1027
+ "fnv",
1028
+ "itoa",
1029
+ ]
1030
+
1031
+ [[package]]
1032
+ name = "http-body"
1033
+ version = "1.0.1"
1034
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1035
+ checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
1036
+ dependencies = [
1037
+ "bytes",
1038
+ "http",
1039
+ ]
1040
+
1041
+ [[package]]
1042
+ name = "http-body-util"
1043
+ version = "0.1.3"
1044
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1045
+ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
1046
+ dependencies = [
1047
+ "bytes",
1048
+ "futures-core",
1049
+ "http",
1050
+ "http-body",
1051
+ "pin-project-lite",
1052
+ ]
1053
+
1054
+ [[package]]
1055
+ name = "httparse"
1056
+ version = "1.10.1"
1057
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1058
+ checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
1059
+
1060
+ [[package]]
1061
+ name = "hyper"
1062
+ version = "1.8.1"
1063
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1064
+ checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11"
1065
+ dependencies = [
1066
+ "atomic-waker",
1067
+ "bytes",
1068
+ "futures-channel",
1069
+ "futures-core",
1070
+ "h2",
1071
+ "http",
1072
+ "http-body",
1073
+ "httparse",
1074
+ "itoa",
1075
+ "pin-project-lite",
1076
+ "pin-utils",
1077
+ "smallvec 1.15.1",
1078
+ "tokio",
1079
+ "want",
1080
+ ]
1081
+
1082
+ [[package]]
1083
+ name = "hyper-rustls"
1084
+ version = "0.27.7"
1085
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1086
+ checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
1087
+ dependencies = [
1088
+ "http",
1089
+ "hyper",
1090
+ "hyper-util",
1091
+ "rustls",
1092
+ "rustls-pki-types",
1093
+ "tokio",
1094
+ "tokio-rustls",
1095
+ "tower-service",
1096
+ ]
1097
+
1098
+ [[package]]
1099
+ name = "hyper-tls"
1100
+ version = "0.6.0"
1101
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1102
+ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
1103
+ dependencies = [
1104
+ "bytes",
1105
+ "http-body-util",
1106
+ "hyper",
1107
+ "hyper-util",
1108
+ "native-tls",
1109
+ "tokio",
1110
+ "tokio-native-tls",
1111
+ "tower-service",
1112
+ ]
1113
+
1114
+ [[package]]
1115
+ name = "hyper-util"
1116
+ version = "0.1.18"
1117
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1118
+ checksum = "52e9a2a24dc5c6821e71a7030e1e14b7b632acac55c40e9d2e082c621261bb56"
1119
+ dependencies = [
1120
+ "base64 0.22.1",
1121
+ "bytes",
1122
+ "futures-channel",
1123
+ "futures-core",
1124
+ "futures-util",
1125
+ "http",
1126
+ "http-body",
1127
+ "hyper",
1128
+ "ipnet",
1129
+ "libc",
1130
+ "percent-encoding",
1131
+ "pin-project-lite",
1132
+ "socket2",
1133
+ "system-configuration",
1134
+ "tokio",
1135
+ "tower-service",
1136
+ "tracing",
1137
+ "windows-registry",
1138
+ ]
1139
+
1140
+ [[package]]
1141
+ name = "icu_collections"
1142
+ version = "2.1.1"
1143
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1144
+ checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43"
1145
+ dependencies = [
1146
+ "displaydoc",
1147
+ "potential_utf",
1148
+ "yoke",
1149
+ "zerofrom",
1150
+ "zerovec",
1151
+ ]
1152
+
1153
+ [[package]]
1154
+ name = "icu_locale_core"
1155
+ version = "2.1.1"
1156
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1157
+ checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6"
1158
+ dependencies = [
1159
+ "displaydoc",
1160
+ "litemap",
1161
+ "tinystr",
1162
+ "writeable",
1163
+ "zerovec",
1164
+ ]
1165
+
1166
+ [[package]]
1167
+ name = "icu_normalizer"
1168
+ version = "2.1.1"
1169
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1170
+ checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599"
1171
+ dependencies = [
1172
+ "icu_collections",
1173
+ "icu_normalizer_data",
1174
+ "icu_properties",
1175
+ "icu_provider",
1176
+ "smallvec 1.15.1",
1177
+ "zerovec",
1178
+ ]
1179
+
1180
+ [[package]]
1181
+ name = "icu_normalizer_data"
1182
+ version = "2.1.1"
1183
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1184
+ checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a"
1185
+
1186
+ [[package]]
1187
+ name = "icu_properties"
1188
+ version = "2.1.1"
1189
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1190
+ checksum = "e93fcd3157766c0c8da2f8cff6ce651a31f0810eaa1c51ec363ef790bbb5fb99"
1191
+ dependencies = [
1192
+ "icu_collections",
1193
+ "icu_locale_core",
1194
+ "icu_properties_data",
1195
+ "icu_provider",
1196
+ "zerotrie",
1197
+ "zerovec",
1198
+ ]
1199
+
1200
+ [[package]]
1201
+ name = "icu_properties_data"
1202
+ version = "2.1.1"
1203
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1204
+ checksum = "02845b3647bb045f1100ecd6480ff52f34c35f82d9880e029d329c21d1054899"
1205
+
1206
+ [[package]]
1207
+ name = "icu_provider"
1208
+ version = "2.1.1"
1209
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1210
+ checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614"
1211
+ dependencies = [
1212
+ "displaydoc",
1213
+ "icu_locale_core",
1214
+ "writeable",
1215
+ "yoke",
1216
+ "zerofrom",
1217
+ "zerotrie",
1218
+ "zerovec",
1219
+ ]
1220
+
1221
+ [[package]]
1222
+ name = "ident_case"
1223
+ version = "1.0.1"
1224
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1225
+ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
1226
+
1227
+ [[package]]
1228
+ name = "idna"
1229
+ version = "1.1.0"
1230
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1231
+ checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
1232
+ dependencies = [
1233
+ "idna_adapter",
1234
+ "smallvec 1.15.1",
1235
+ "utf8_iter",
1236
+ ]
1237
+
1238
+ [[package]]
1239
+ name = "idna_adapter"
1240
+ version = "1.2.1"
1241
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1242
+ checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344"
1243
+ dependencies = [
1244
+ "icu_normalizer",
1245
+ "icu_properties",
1246
+ ]
1247
+
1248
+ [[package]]
1249
+ name = "include-flate"
1250
+ version = "0.3.1"
1251
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1252
+ checksum = "e01b7cb6ca682a621e7cda1c358c9724b53a7b4409be9be1dd443b7f3a26f998"
1253
+ dependencies = [
1254
+ "include-flate-codegen",
1255
+ "include-flate-compress",
1256
+ "libflate",
1257
+ "zstd",
1258
+ ]
1259
+
1260
+ [[package]]
1261
+ name = "include-flate-codegen"
1262
+ version = "0.3.1"
1263
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1264
+ checksum = "4f49bf5274aebe468d6e6eba14a977eaf1efa481dc173f361020de70c1c48050"
1265
+ dependencies = [
1266
+ "include-flate-compress",
1267
+ "libflate",
1268
+ "proc-macro-error",
1269
+ "proc-macro2",
1270
+ "quote",
1271
+ "syn 2.0.110",
1272
+ "zstd",
1273
+ ]
1274
+
1275
+ [[package]]
1276
+ name = "include-flate-compress"
1277
+ version = "0.3.1"
1278
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1279
+ checksum = "eae6a40e716bcd5931f5dbb79cd921512a4f647e2e9413fded3171fca3824dbc"
1280
+ dependencies = [
1281
+ "libflate",
1282
+ "zstd",
1283
+ ]
1284
+
1285
+ [[package]]
1286
+ name = "indexmap"
1287
+ version = "2.12.0"
1288
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1289
+ checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f"
1290
+ dependencies = [
1291
+ "equivalent",
1292
+ "hashbrown 0.16.0",
1293
+ ]
1294
+
1295
+ [[package]]
1296
+ name = "indextts"
1297
+ version = "0.1.0"
1298
+ dependencies = [
1299
+ "anyhow",
1300
+ "bytemuck",
1301
+ "clap",
1302
+ "config",
1303
+ "criterion",
1304
+ "dasp_sample",
1305
+ "dasp_signal",
1306
+ "env_logger",
1307
+ "hex",
1308
+ "hound",
1309
+ "indicatif",
1310
+ "jieba-rs",
1311
+ "lazy_static",
1312
+ "log",
1313
+ "ndarray 0.15.6",
1314
+ "num-complex",
1315
+ "num-traits",
1316
+ "num_cpus",
1317
+ "ort",
1318
+ "rand",
1319
+ "rayon",
1320
+ "realfft",
1321
+ "regex",
1322
+ "reqwest",
1323
+ "rubato",
1324
+ "rustfft",
1325
+ "safetensors",
1326
+ "serde",
1327
+ "serde_json",
1328
+ "serde_yaml",
1329
+ "sha2",
1330
+ "tempfile",
1331
+ "thiserror",
1332
+ "tokenizers",
1333
+ "tokio",
1334
+ "toml",
1335
+ "unicode-segmentation",
1336
+ ]
1337
+
1338
+ [[package]]
1339
+ name = "indicatif"
1340
+ version = "0.17.11"
1341
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1342
+ checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235"
1343
+ dependencies = [
1344
+ "console",
1345
+ "number_prefix",
1346
+ "portable-atomic",
1347
+ "unicode-width",
1348
+ "web-time",
1349
+ ]
1350
+
1351
+ [[package]]
1352
+ name = "ipnet"
1353
+ version = "2.11.0"
1354
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1355
+ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
1356
+
1357
+ [[package]]
1358
+ name = "iri-string"
1359
+ version = "0.7.9"
1360
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1361
+ checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397"
1362
+ dependencies = [
1363
+ "memchr",
1364
+ "serde",
1365
+ ]
1366
+
1367
+ [[package]]
1368
+ name = "is-terminal"
1369
+ version = "0.4.17"
1370
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1371
+ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46"
1372
+ dependencies = [
1373
+ "hermit-abi",
1374
+ "libc",
1375
+ "windows-sys 0.61.2",
1376
+ ]
1377
+
1378
+ [[package]]
1379
+ name = "is_terminal_polyfill"
1380
+ version = "1.70.2"
1381
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1382
+ checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
1383
+
1384
+ [[package]]
1385
+ name = "itertools"
1386
+ version = "0.10.5"
1387
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1388
+ checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
1389
+ dependencies = [
1390
+ "either",
1391
+ ]
1392
+
1393
+ [[package]]
1394
+ name = "itertools"
1395
+ version = "0.11.0"
1396
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1397
+ checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
1398
+ dependencies = [
1399
+ "either",
1400
+ ]
1401
+
1402
+ [[package]]
1403
+ name = "itertools"
1404
+ version = "0.12.1"
1405
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1406
+ checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
1407
+ dependencies = [
1408
+ "either",
1409
+ ]
1410
+
1411
+ [[package]]
1412
+ name = "itoa"
1413
+ version = "1.0.15"
1414
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1415
+ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
1416
+
1417
+ [[package]]
1418
+ name = "jieba-macros"
1419
+ version = "0.7.1"
1420
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1421
+ checksum = "7c676b32a471d3cfae8dac2ad2f8334cd52e53377733cca8c1fb0a5062fec192"
1422
+ dependencies = [
1423
+ "phf_codegen",
1424
+ ]
1425
+
1426
+ [[package]]
1427
+ name = "jieba-rs"
1428
+ version = "0.7.4"
1429
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1430
+ checksum = "f5dd552bbb95d578520ee68403bf8aaf0dbbb2ce55b0854d019f9350ad61040a"
1431
+ dependencies = [
1432
+ "cedarwood",
1433
+ "fxhash",
1434
+ "include-flate",
1435
+ "jieba-macros",
1436
+ "lazy_static",
1437
+ "phf",
1438
+ "regex",
1439
+ ]
1440
+
1441
+ [[package]]
1442
+ name = "jiff"
1443
+ version = "0.2.16"
1444
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1445
+ checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35"
1446
+ dependencies = [
1447
+ "jiff-static",
1448
+ "log",
1449
+ "portable-atomic",
1450
+ "portable-atomic-util",
1451
+ "serde_core",
1452
+ ]
1453
+
1454
+ [[package]]
1455
+ name = "jiff-static"
1456
+ version = "0.2.16"
1457
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1458
+ checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69"
1459
+ dependencies = [
1460
+ "proc-macro2",
1461
+ "quote",
1462
+ "syn 2.0.110",
1463
+ ]
1464
+
1465
+ [[package]]
1466
+ name = "jobserver"
1467
+ version = "0.1.34"
1468
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1469
+ checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
1470
+ dependencies = [
1471
+ "getrandom 0.3.4",
1472
+ "libc",
1473
+ ]
1474
+
1475
+ [[package]]
1476
+ name = "js-sys"
1477
+ version = "0.3.82"
1478
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1479
+ checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65"
1480
+ dependencies = [
1481
+ "once_cell",
1482
+ "wasm-bindgen",
1483
+ ]
1484
+
1485
+ [[package]]
1486
+ name = "json5"
1487
+ version = "0.4.1"
1488
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1489
+ checksum = "96b0db21af676c1ce64250b5f40f3ce2cf27e4e47cb91ed91eb6fe9350b430c1"
1490
+ dependencies = [
1491
+ "pest",
1492
+ "pest_derive",
1493
+ "serde",
1494
+ ]
1495
+
1496
+ [[package]]
1497
+ name = "lazy_static"
1498
+ version = "1.5.0"
1499
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1500
+ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
1501
+
1502
+ [[package]]
1503
+ name = "libc"
1504
+ version = "0.2.177"
1505
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1506
+ checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
1507
+
1508
+ [[package]]
1509
+ name = "libflate"
1510
+ version = "2.2.1"
1511
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1512
+ checksum = "e3248b8d211bd23a104a42d81b4fa8bb8ac4a3b75e7a43d85d2c9ccb6179cd74"
1513
+ dependencies = [
1514
+ "adler32",
1515
+ "core2",
1516
+ "crc32fast",
1517
+ "dary_heap",
1518
+ "libflate_lz77",
1519
+ ]
1520
+
1521
+ [[package]]
1522
+ name = "libflate_lz77"
1523
+ version = "2.2.0"
1524
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1525
+ checksum = "a599cb10a9cd92b1300debcef28da8f70b935ec937f44fcd1b70a7c986a11c5c"
1526
+ dependencies = [
1527
+ "core2",
1528
+ "hashbrown 0.16.0",
1529
+ "rle-decode-fast",
1530
+ ]
1531
+
1532
+ [[package]]
1533
+ name = "libloading"
1534
+ version = "0.8.9"
1535
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1536
+ checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
1537
+ dependencies = [
1538
+ "cfg-if",
1539
+ "windows-link",
1540
+ ]
1541
+
1542
+ [[package]]
1543
+ name = "libredox"
1544
+ version = "0.1.10"
1545
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1546
+ checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb"
1547
+ dependencies = [
1548
+ "bitflags",
1549
+ "libc",
1550
+ "redox_syscall",
1551
+ ]
1552
+
1553
+ [[package]]
1554
+ name = "linux-raw-sys"
1555
+ version = "0.11.0"
1556
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1557
+ checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
1558
+
1559
+ [[package]]
1560
+ name = "litemap"
1561
+ version = "0.8.1"
1562
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1563
+ checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77"
1564
+
1565
+ [[package]]
1566
+ name = "lock_api"
1567
+ version = "0.4.14"
1568
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1569
+ checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
1570
+ dependencies = [
1571
+ "scopeguard",
1572
+ ]
1573
+
1574
+ [[package]]
1575
+ name = "log"
1576
+ version = "0.4.28"
1577
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1578
+ checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
1579
+
1580
+ [[package]]
1581
+ name = "macro_rules_attribute"
1582
+ version = "0.2.2"
1583
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1584
+ checksum = "65049d7923698040cd0b1ddcced9b0eb14dd22c5f86ae59c3740eab64a676520"
1585
+ dependencies = [
1586
+ "macro_rules_attribute-proc_macro",
1587
+ "paste",
1588
+ ]
1589
+
1590
+ [[package]]
1591
+ name = "macro_rules_attribute-proc_macro"
1592
+ version = "0.2.2"
1593
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1594
+ checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30"
1595
+
1596
+ [[package]]
1597
+ name = "matrixmultiply"
1598
+ version = "0.3.10"
1599
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1600
+ checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08"
1601
+ dependencies = [
1602
+ "autocfg",
1603
+ "rawpointer",
1604
+ ]
1605
+
1606
+ [[package]]
1607
+ name = "memchr"
1608
+ version = "2.7.6"
1609
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1610
+ checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
1611
+
1612
+ [[package]]
1613
+ name = "mime"
1614
+ version = "0.3.17"
1615
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1616
+ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
1617
+
1618
+ [[package]]
1619
+ name = "minimal-lexical"
1620
+ version = "0.2.1"
1621
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1622
+ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
1623
+
1624
+ [[package]]
1625
+ name = "miniz_oxide"
1626
+ version = "0.8.9"
1627
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1628
+ checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
1629
+ dependencies = [
1630
+ "adler2",
1631
+ "simd-adler32",
1632
+ ]
1633
+
1634
+ [[package]]
1635
+ name = "mio"
1636
+ version = "1.1.0"
1637
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1638
+ checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873"
1639
+ dependencies = [
1640
+ "libc",
1641
+ "wasi",
1642
+ "windows-sys 0.61.2",
1643
+ ]
1644
+
1645
+ [[package]]
1646
+ name = "monostate"
1647
+ version = "0.1.18"
1648
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1649
+ checksum = "3341a273f6c9d5bef1908f17b7267bbab0e95c9bf69a0d4dcf8e9e1b2c76ef67"
1650
+ dependencies = [
1651
+ "monostate-impl",
1652
+ "serde",
1653
+ "serde_core",
1654
+ ]
1655
+
1656
+ [[package]]
1657
+ name = "monostate-impl"
1658
+ version = "0.1.18"
1659
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1660
+ checksum = "e4db6d5580af57bf992f59068d4ea26fd518574ff48d7639b255a36f9de6e7e9"
1661
+ dependencies = [
1662
+ "proc-macro2",
1663
+ "quote",
1664
+ "syn 2.0.110",
1665
+ ]
1666
+
1667
+ [[package]]
1668
+ name = "native-tls"
1669
+ version = "0.2.14"
1670
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1671
+ checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e"
1672
+ dependencies = [
1673
+ "libc",
1674
+ "log",
1675
+ "openssl",
1676
+ "openssl-probe",
1677
+ "openssl-sys",
1678
+ "schannel",
1679
+ "security-framework",
1680
+ "security-framework-sys",
1681
+ "tempfile",
1682
+ ]
1683
+
1684
+ [[package]]
1685
+ name = "ndarray"
1686
+ version = "0.15.6"
1687
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1688
+ checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32"
1689
+ dependencies = [
1690
+ "matrixmultiply",
1691
+ "num-complex",
1692
+ "num-integer",
1693
+ "num-traits",
1694
+ "rawpointer",
1695
+ "rayon",
1696
+ ]
1697
+
1698
+ [[package]]
1699
+ name = "ndarray"
1700
+ version = "0.16.1"
1701
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1702
+ checksum = "882ed72dce9365842bf196bdeedf5055305f11fc8c03dee7bb0194a6cad34841"
1703
+ dependencies = [
1704
+ "matrixmultiply",
1705
+ "num-complex",
1706
+ "num-integer",
1707
+ "num-traits",
1708
+ "portable-atomic",
1709
+ "portable-atomic-util",
1710
+ "rawpointer",
1711
+ ]
1712
+
1713
+ [[package]]
1714
+ name = "nom"
1715
+ version = "7.1.3"
1716
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1717
+ checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
1718
+ dependencies = [
1719
+ "memchr",
1720
+ "minimal-lexical",
1721
+ ]
1722
+
1723
+ [[package]]
1724
+ name = "num-complex"
1725
+ version = "0.4.6"
1726
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1727
+ checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
1728
+ dependencies = [
1729
+ "num-traits",
1730
+ ]
1731
+
1732
+ [[package]]
1733
+ name = "num-integer"
1734
+ version = "0.1.46"
1735
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1736
+ checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
1737
+ dependencies = [
1738
+ "num-traits",
1739
+ ]
1740
+
1741
+ [[package]]
1742
+ name = "num-traits"
1743
+ version = "0.2.19"
1744
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1745
+ checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
1746
+ dependencies = [
1747
+ "autocfg",
1748
+ ]
1749
+
1750
+ [[package]]
1751
+ name = "num_cpus"
1752
+ version = "1.17.0"
1753
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1754
+ checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
1755
+ dependencies = [
1756
+ "hermit-abi",
1757
+ "libc",
1758
+ ]
1759
+
1760
+ [[package]]
1761
+ name = "number_prefix"
1762
+ version = "0.4.0"
1763
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1764
+ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
1765
+
1766
+ [[package]]
1767
+ name = "once_cell"
1768
+ version = "1.21.3"
1769
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1770
+ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
1771
+
1772
+ [[package]]
1773
+ name = "once_cell_polyfill"
1774
+ version = "1.70.2"
1775
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1776
+ checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
1777
+
1778
+ [[package]]
1779
+ name = "onig"
1780
+ version = "6.5.1"
1781
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1782
+ checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0"
1783
+ dependencies = [
1784
+ "bitflags",
1785
+ "libc",
1786
+ "once_cell",
1787
+ "onig_sys",
1788
+ ]
1789
+
1790
+ [[package]]
1791
+ name = "onig_sys"
1792
+ version = "69.9.1"
1793
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1794
+ checksum = "c7f86c6eef3d6df15f23bcfb6af487cbd2fed4e5581d58d5bf1f5f8b7f6727dc"
1795
+ dependencies = [
1796
+ "cc",
1797
+ "pkg-config",
1798
+ ]
1799
+
1800
+ [[package]]
1801
+ name = "oorandom"
1802
+ version = "11.1.5"
1803
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1804
+ checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
1805
+
1806
+ [[package]]
1807
+ name = "openssl"
1808
+ version = "0.10.75"
1809
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1810
+ checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328"
1811
+ dependencies = [
1812
+ "bitflags",
1813
+ "cfg-if",
1814
+ "foreign-types",
1815
+ "libc",
1816
+ "once_cell",
1817
+ "openssl-macros",
1818
+ "openssl-sys",
1819
+ ]
1820
+
1821
+ [[package]]
1822
+ name = "openssl-macros"
1823
+ version = "0.1.1"
1824
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1825
+ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
1826
+ dependencies = [
1827
+ "proc-macro2",
1828
+ "quote",
1829
+ "syn 2.0.110",
1830
+ ]
1831
+
1832
+ [[package]]
1833
+ name = "openssl-probe"
1834
+ version = "0.1.6"
1835
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1836
+ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
1837
+
1838
+ [[package]]
1839
+ name = "openssl-sys"
1840
+ version = "0.9.111"
1841
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1842
+ checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321"
1843
+ dependencies = [
1844
+ "cc",
1845
+ "libc",
1846
+ "pkg-config",
1847
+ "vcpkg",
1848
+ ]
1849
+
1850
+ [[package]]
1851
+ name = "ordered-multimap"
1852
+ version = "0.7.3"
1853
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1854
+ checksum = "49203cdcae0030493bad186b28da2fa25645fa276a51b6fec8010d281e02ef79"
1855
+ dependencies = [
1856
+ "dlv-list",
1857
+ "hashbrown 0.14.5",
1858
+ ]
1859
+
1860
+ [[package]]
1861
+ name = "ort"
1862
+ version = "2.0.0-rc.10"
1863
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1864
+ checksum = "1fa7e49bd669d32d7bc2a15ec540a527e7764aec722a45467814005725bcd721"
1865
+ dependencies = [
1866
+ "libloading",
1867
+ "ndarray 0.16.1",
1868
+ "ort-sys",
1869
+ "smallvec 2.0.0-alpha.10",
1870
+ "tracing",
1871
+ ]
1872
+
1873
+ [[package]]
1874
+ name = "ort-sys"
1875
+ version = "2.0.0-rc.10"
1876
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1877
+ checksum = "e2aba9f5c7c479925205799216e7e5d07cc1d4fa76ea8058c60a9a30f6a4e890"
1878
+ dependencies = [
1879
+ "flate2",
1880
+ "pkg-config",
1881
+ "sha2",
1882
+ "tar",
1883
+ "ureq",
1884
+ ]
1885
+
1886
+ [[package]]
1887
+ name = "parking_lot"
1888
+ version = "0.12.5"
1889
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1890
+ checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
1891
+ dependencies = [
1892
+ "lock_api",
1893
+ "parking_lot_core",
1894
+ ]
1895
+
1896
+ [[package]]
1897
+ name = "parking_lot_core"
1898
+ version = "0.9.12"
1899
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1900
+ checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
1901
+ dependencies = [
1902
+ "cfg-if",
1903
+ "libc",
1904
+ "redox_syscall",
1905
+ "smallvec 1.15.1",
1906
+ "windows-link",
1907
+ ]
1908
+
1909
+ [[package]]
1910
+ name = "paste"
1911
+ version = "1.0.15"
1912
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1913
+ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
1914
+
1915
+ [[package]]
1916
+ name = "pathdiff"
1917
+ version = "0.2.3"
1918
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1919
+ checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3"
1920
+
1921
+ [[package]]
1922
+ name = "pem-rfc7468"
1923
+ version = "0.7.0"
1924
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1925
+ checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412"
1926
+ dependencies = [
1927
+ "base64ct",
1928
+ ]
1929
+
1930
+ [[package]]
1931
+ name = "percent-encoding"
1932
+ version = "2.3.2"
1933
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1934
+ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
1935
+
1936
+ [[package]]
1937
+ name = "pest"
1938
+ version = "2.8.3"
1939
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1940
+ checksum = "989e7521a040efde50c3ab6bbadafbe15ab6dc042686926be59ac35d74607df4"
1941
+ dependencies = [
1942
+ "memchr",
1943
+ "ucd-trie",
1944
+ ]
1945
+
1946
+ [[package]]
1947
+ name = "pest_derive"
1948
+ version = "2.8.3"
1949
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1950
+ checksum = "187da9a3030dbafabbbfb20cb323b976dc7b7ce91fcd84f2f74d6e31d378e2de"
1951
+ dependencies = [
1952
+ "pest",
1953
+ "pest_generator",
1954
+ ]
1955
+
1956
+ [[package]]
1957
+ name = "pest_generator"
1958
+ version = "2.8.3"
1959
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1960
+ checksum = "49b401d98f5757ebe97a26085998d6c0eecec4995cad6ab7fc30ffdf4b052843"
1961
+ dependencies = [
1962
+ "pest",
1963
+ "pest_meta",
1964
+ "proc-macro2",
1965
+ "quote",
1966
+ "syn 2.0.110",
1967
+ ]
1968
+
1969
+ [[package]]
1970
+ name = "pest_meta"
1971
+ version = "2.8.3"
1972
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1973
+ checksum = "72f27a2cfee9f9039c4d86faa5af122a0ac3851441a34865b8a043b46be0065a"
1974
+ dependencies = [
1975
+ "pest",
1976
+ "sha2",
1977
+ ]
1978
+
1979
+ [[package]]
1980
+ name = "phf"
1981
+ version = "0.11.3"
1982
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1983
+ checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
1984
+ dependencies = [
1985
+ "phf_shared",
1986
+ ]
1987
+
1988
+ [[package]]
1989
+ name = "phf_codegen"
1990
+ version = "0.11.3"
1991
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1992
+ checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
1993
+ dependencies = [
1994
+ "phf_generator",
1995
+ "phf_shared",
1996
+ ]
1997
+
1998
+ [[package]]
1999
+ name = "phf_generator"
2000
+ version = "0.11.3"
2001
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2002
+ checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
2003
+ dependencies = [
2004
+ "phf_shared",
2005
+ "rand",
2006
+ ]
2007
+
2008
+ [[package]]
2009
+ name = "phf_shared"
2010
+ version = "0.11.3"
2011
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2012
+ checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
2013
+ dependencies = [
2014
+ "siphasher",
2015
+ ]
2016
+
2017
+ [[package]]
2018
+ name = "pin-project-lite"
2019
+ version = "0.2.16"
2020
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2021
+ checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
2022
+
2023
+ [[package]]
2024
+ name = "pin-utils"
2025
+ version = "0.1.0"
2026
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2027
+ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
2028
+
2029
+ [[package]]
2030
+ name = "pkg-config"
2031
+ version = "0.3.32"
2032
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2033
+ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
2034
+
2035
+ [[package]]
2036
+ name = "plotters"
2037
+ version = "0.3.7"
2038
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2039
+ checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
2040
+ dependencies = [
2041
+ "num-traits",
2042
+ "plotters-backend",
2043
+ "plotters-svg",
2044
+ "wasm-bindgen",
2045
+ "web-sys",
2046
+ ]
2047
+
2048
+ [[package]]
2049
+ name = "plotters-backend"
2050
+ version = "0.3.7"
2051
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2052
+ checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
2053
+
2054
+ [[package]]
2055
+ name = "plotters-svg"
2056
+ version = "0.3.7"
2057
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2058
+ checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
2059
+ dependencies = [
2060
+ "plotters-backend",
2061
+ ]
2062
+
2063
+ [[package]]
2064
+ name = "portable-atomic"
2065
+ version = "1.11.1"
2066
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2067
+ checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483"
2068
+
2069
+ [[package]]
2070
+ name = "portable-atomic-util"
2071
+ version = "0.2.4"
2072
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2073
+ checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507"
2074
+ dependencies = [
2075
+ "portable-atomic",
2076
+ ]
2077
+
2078
+ [[package]]
2079
+ name = "potential_utf"
2080
+ version = "0.1.4"
2081
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2082
+ checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77"
2083
+ dependencies = [
2084
+ "zerovec",
2085
+ ]
2086
+
2087
+ [[package]]
2088
+ name = "ppv-lite86"
2089
+ version = "0.2.21"
2090
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2091
+ checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
2092
+ dependencies = [
2093
+ "zerocopy",
2094
+ ]
2095
+
2096
+ [[package]]
2097
+ name = "primal-check"
2098
+ version = "0.3.4"
2099
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2100
+ checksum = "dc0d895b311e3af9902528fbb8f928688abbd95872819320517cc24ca6b2bd08"
2101
+ dependencies = [
2102
+ "num-integer",
2103
+ ]
2104
+
2105
+ [[package]]
2106
+ name = "proc-macro-error"
2107
+ version = "1.0.4"
2108
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2109
+ checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
2110
+ dependencies = [
2111
+ "proc-macro-error-attr",
2112
+ "proc-macro2",
2113
+ "quote",
2114
+ "syn 1.0.109",
2115
+ "version_check",
2116
+ ]
2117
+
2118
+ [[package]]
2119
+ name = "proc-macro-error-attr"
2120
+ version = "1.0.4"
2121
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2122
+ checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
2123
+ dependencies = [
2124
+ "proc-macro2",
2125
+ "quote",
2126
+ "version_check",
2127
+ ]
2128
+
2129
+ [[package]]
2130
+ name = "proc-macro2"
2131
+ version = "1.0.103"
2132
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2133
+ checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
2134
+ dependencies = [
2135
+ "unicode-ident",
2136
+ ]
2137
+
2138
+ [[package]]
2139
+ name = "quote"
2140
+ version = "1.0.42"
2141
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2142
+ checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
2143
+ dependencies = [
2144
+ "proc-macro2",
2145
+ ]
2146
+
2147
+ [[package]]
2148
+ name = "r-efi"
2149
+ version = "5.3.0"
2150
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2151
+ checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
2152
+
2153
+ [[package]]
2154
+ name = "rand"
2155
+ version = "0.8.5"
2156
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2157
+ checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
2158
+ dependencies = [
2159
+ "libc",
2160
+ "rand_chacha",
2161
+ "rand_core",
2162
+ ]
2163
+
2164
+ [[package]]
2165
+ name = "rand_chacha"
2166
+ version = "0.3.1"
2167
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2168
+ checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
2169
+ dependencies = [
2170
+ "ppv-lite86",
2171
+ "rand_core",
2172
+ ]
2173
+
2174
+ [[package]]
2175
+ name = "rand_core"
2176
+ version = "0.6.4"
2177
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2178
+ checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
2179
+ dependencies = [
2180
+ "getrandom 0.2.16",
2181
+ ]
2182
+
2183
+ [[package]]
2184
+ name = "rawpointer"
2185
+ version = "0.2.1"
2186
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2187
+ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
2188
+
2189
+ [[package]]
2190
+ name = "rayon"
2191
+ version = "1.11.0"
2192
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2193
+ checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
2194
+ dependencies = [
2195
+ "either",
2196
+ "rayon-core",
2197
+ ]
2198
+
2199
+ [[package]]
2200
+ name = "rayon-cond"
2201
+ version = "0.3.0"
2202
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2203
+ checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9"
2204
+ dependencies = [
2205
+ "either",
2206
+ "itertools 0.11.0",
2207
+ "rayon",
2208
+ ]
2209
+
2210
+ [[package]]
2211
+ name = "rayon-core"
2212
+ version = "1.13.0"
2213
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2214
+ checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
2215
+ dependencies = [
2216
+ "crossbeam-deque",
2217
+ "crossbeam-utils",
2218
+ ]
2219
+
2220
+ [[package]]
2221
+ name = "realfft"
2222
+ version = "3.5.0"
2223
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2224
+ checksum = "f821338fddb99d089116342c46e9f1fbf3828dba077674613e734e01d6ea8677"
2225
+ dependencies = [
2226
+ "rustfft",
2227
+ ]
2228
+
2229
+ [[package]]
2230
+ name = "redox_syscall"
2231
+ version = "0.5.18"
2232
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2233
+ checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
2234
+ dependencies = [
2235
+ "bitflags",
2236
+ ]
2237
+
2238
+ [[package]]
2239
+ name = "regex"
2240
+ version = "1.12.2"
2241
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2242
+ checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
2243
+ dependencies = [
2244
+ "aho-corasick",
2245
+ "memchr",
2246
+ "regex-automata",
2247
+ "regex-syntax",
2248
+ ]
2249
+
2250
+ [[package]]
2251
+ name = "regex-automata"
2252
+ version = "0.4.13"
2253
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2254
+ checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
2255
+ dependencies = [
2256
+ "aho-corasick",
2257
+ "memchr",
2258
+ "regex-syntax",
2259
+ ]
2260
+
2261
+ [[package]]
2262
+ name = "regex-syntax"
2263
+ version = "0.8.8"
2264
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2265
+ checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
2266
+
2267
+ [[package]]
2268
+ name = "reqwest"
2269
+ version = "0.12.24"
2270
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2271
+ checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f"
2272
+ dependencies = [
2273
+ "base64 0.22.1",
2274
+ "bytes",
2275
+ "encoding_rs",
2276
+ "futures-channel",
2277
+ "futures-core",
2278
+ "futures-util",
2279
+ "h2",
2280
+ "http",
2281
+ "http-body",
2282
+ "http-body-util",
2283
+ "hyper",
2284
+ "hyper-rustls",
2285
+ "hyper-tls",
2286
+ "hyper-util",
2287
+ "js-sys",
2288
+ "log",
2289
+ "mime",
2290
+ "native-tls",
2291
+ "percent-encoding",
2292
+ "pin-project-lite",
2293
+ "rustls-pki-types",
2294
+ "serde",
2295
+ "serde_json",
2296
+ "serde_urlencoded",
2297
+ "sync_wrapper",
2298
+ "tokio",
2299
+ "tokio-native-tls",
2300
+ "tower",
2301
+ "tower-http",
2302
+ "tower-service",
2303
+ "url",
2304
+ "wasm-bindgen",
2305
+ "wasm-bindgen-futures",
2306
+ "web-sys",
2307
+ ]
2308
+
2309
+ [[package]]
2310
+ name = "ring"
2311
+ version = "0.17.14"
2312
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2313
+ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
2314
+ dependencies = [
2315
+ "cc",
2316
+ "cfg-if",
2317
+ "getrandom 0.2.16",
2318
+ "libc",
2319
+ "untrusted",
2320
+ "windows-sys 0.52.0",
2321
+ ]
2322
+
2323
+ [[package]]
2324
+ name = "rle-decode-fast"
2325
+ version = "1.0.3"
2326
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2327
+ checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422"
2328
+
2329
+ [[package]]
2330
+ name = "ron"
2331
+ version = "0.8.1"
2332
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2333
+ checksum = "b91f7eff05f748767f183df4320a63d6936e9c6107d97c9e6bdd9784f4289c94"
2334
+ dependencies = [
2335
+ "base64 0.21.7",
2336
+ "bitflags",
2337
+ "serde",
2338
+ "serde_derive",
2339
+ ]
2340
+
2341
+ [[package]]
2342
+ name = "rubato"
2343
+ version = "0.15.0"
2344
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2345
+ checksum = "b5d18b486e7d29a408ef3f825bc1327d8f87af091c987ca2f5b734625940e234"
2346
+ dependencies = [
2347
+ "num-complex",
2348
+ "num-integer",
2349
+ "num-traits",
2350
+ "realfft",
2351
+ ]
2352
+
2353
+ [[package]]
2354
+ name = "rust-ini"
2355
+ version = "0.20.0"
2356
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2357
+ checksum = "3e0698206bcb8882bf2a9ecb4c1e7785db57ff052297085a6efd4fe42302068a"
2358
+ dependencies = [
2359
+ "cfg-if",
2360
+ "ordered-multimap",
2361
+ ]
2362
+
2363
+ [[package]]
2364
+ name = "rustfft"
2365
+ version = "6.4.1"
2366
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2367
+ checksum = "21db5f9893e91f41798c88680037dba611ca6674703c1a18601b01a72c8adb89"
2368
+ dependencies = [
2369
+ "num-complex",
2370
+ "num-integer",
2371
+ "num-traits",
2372
+ "primal-check",
2373
+ "strength_reduce",
2374
+ "transpose",
2375
+ ]
2376
+
2377
+ [[package]]
2378
+ name = "rustix"
2379
+ version = "1.1.2"
2380
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2381
+ checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
2382
+ dependencies = [
2383
+ "bitflags",
2384
+ "errno",
2385
+ "libc",
2386
+ "linux-raw-sys",
2387
+ "windows-sys 0.61.2",
2388
+ ]
2389
+
2390
+ [[package]]
2391
+ name = "rustls"
2392
+ version = "0.23.35"
2393
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2394
+ checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f"
2395
+ dependencies = [
2396
+ "once_cell",
2397
+ "rustls-pki-types",
2398
+ "rustls-webpki",
2399
+ "subtle",
2400
+ "zeroize",
2401
+ ]
2402
+
2403
+ [[package]]
2404
+ name = "rustls-pki-types"
2405
+ version = "1.13.0"
2406
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2407
+ checksum = "94182ad936a0c91c324cd46c6511b9510ed16af436d7b5bab34beab0afd55f7a"
2408
+ dependencies = [
2409
+ "zeroize",
2410
+ ]
2411
+
2412
+ [[package]]
2413
+ name = "rustls-webpki"
2414
+ version = "0.103.8"
2415
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2416
+ checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52"
2417
+ dependencies = [
2418
+ "ring",
2419
+ "rustls-pki-types",
2420
+ "untrusted",
2421
+ ]
2422
+
2423
+ [[package]]
2424
+ name = "rustversion"
2425
+ version = "1.0.22"
2426
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2427
+ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
2428
+
2429
+ [[package]]
2430
+ name = "ryu"
2431
+ version = "1.0.20"
2432
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2433
+ checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
2434
+
2435
+ [[package]]
2436
+ name = "safetensors"
2437
+ version = "0.4.5"
2438
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2439
+ checksum = "44560c11236a6130a46ce36c836a62936dc81ebf8c36a37947423571be0e55b6"
2440
+ dependencies = [
2441
+ "serde",
2442
+ "serde_json",
2443
+ ]
2444
+
2445
+ [[package]]
2446
+ name = "same-file"
2447
+ version = "1.0.6"
2448
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2449
+ checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
2450
+ dependencies = [
2451
+ "winapi-util",
2452
+ ]
2453
+
2454
+ [[package]]
2455
+ name = "schannel"
2456
+ version = "0.1.28"
2457
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2458
+ checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1"
2459
+ dependencies = [
2460
+ "windows-sys 0.61.2",
2461
+ ]
2462
+
2463
+ [[package]]
2464
+ name = "scopeguard"
2465
+ version = "1.2.0"
2466
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2467
+ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
2468
+
2469
+ [[package]]
2470
+ name = "security-framework"
2471
+ version = "2.11.1"
2472
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2473
+ checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
2474
+ dependencies = [
2475
+ "bitflags",
2476
+ "core-foundation",
2477
+ "core-foundation-sys",
2478
+ "libc",
2479
+ "security-framework-sys",
2480
+ ]
2481
+
2482
+ [[package]]
2483
+ name = "security-framework-sys"
2484
+ version = "2.15.0"
2485
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2486
+ checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0"
2487
+ dependencies = [
2488
+ "core-foundation-sys",
2489
+ "libc",
2490
+ ]
2491
+
2492
+ [[package]]
2493
+ name = "serde"
2494
+ version = "1.0.228"
2495
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2496
+ checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
2497
+ dependencies = [
2498
+ "serde_core",
2499
+ "serde_derive",
2500
+ ]
2501
+
2502
+ [[package]]
2503
+ name = "serde_core"
2504
+ version = "1.0.228"
2505
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2506
+ checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
2507
+ dependencies = [
2508
+ "serde_derive",
2509
+ ]
2510
+
2511
+ [[package]]
2512
+ name = "serde_derive"
2513
+ version = "1.0.228"
2514
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2515
+ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
2516
+ dependencies = [
2517
+ "proc-macro2",
2518
+ "quote",
2519
+ "syn 2.0.110",
2520
+ ]
2521
+
2522
+ [[package]]
2523
+ name = "serde_json"
2524
+ version = "1.0.145"
2525
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2526
+ checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
2527
+ dependencies = [
2528
+ "itoa",
2529
+ "memchr",
2530
+ "ryu",
2531
+ "serde",
2532
+ "serde_core",
2533
+ ]
2534
+
2535
+ [[package]]
2536
+ name = "serde_spanned"
2537
+ version = "0.6.9"
2538
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2539
+ checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3"
2540
+ dependencies = [
2541
+ "serde",
2542
+ ]
2543
+
2544
+ [[package]]
2545
+ name = "serde_urlencoded"
2546
+ version = "0.7.1"
2547
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2548
+ checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
2549
+ dependencies = [
2550
+ "form_urlencoded",
2551
+ "itoa",
2552
+ "ryu",
2553
+ "serde",
2554
+ ]
2555
+
2556
+ [[package]]
2557
+ name = "serde_yaml"
2558
+ version = "0.9.34+deprecated"
2559
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2560
+ checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
2561
+ dependencies = [
2562
+ "indexmap",
2563
+ "itoa",
2564
+ "ryu",
2565
+ "serde",
2566
+ "unsafe-libyaml",
2567
+ ]
2568
+
2569
+ [[package]]
2570
+ name = "sha2"
2571
+ version = "0.10.9"
2572
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2573
+ checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
2574
+ dependencies = [
2575
+ "cfg-if",
2576
+ "cpufeatures",
2577
+ "digest",
2578
+ ]
2579
+
2580
+ [[package]]
2581
+ name = "shlex"
2582
+ version = "1.3.0"
2583
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2584
+ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
2585
+
2586
+ [[package]]
2587
+ name = "signal-hook-registry"
2588
+ version = "1.4.6"
2589
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2590
+ checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b"
2591
+ dependencies = [
2592
+ "libc",
2593
+ ]
2594
+
2595
+ [[package]]
2596
+ name = "simd-adler32"
2597
+ version = "0.3.7"
2598
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2599
+ checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
2600
+
2601
+ [[package]]
2602
+ name = "siphasher"
2603
+ version = "1.0.1"
2604
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2605
+ checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
2606
+
2607
+ [[package]]
2608
+ name = "slab"
2609
+ version = "0.4.11"
2610
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2611
+ checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
2612
+
2613
+ [[package]]
2614
+ name = "smallvec"
2615
+ version = "1.15.1"
2616
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2617
+ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
2618
+
2619
+ [[package]]
2620
+ name = "smallvec"
2621
+ version = "2.0.0-alpha.10"
2622
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2623
+ checksum = "51d44cfb396c3caf6fbfd0ab422af02631b69ddd96d2eff0b0f0724f9024051b"
2624
+
2625
+ [[package]]
2626
+ name = "socket2"
2627
+ version = "0.6.1"
2628
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2629
+ checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881"
2630
+ dependencies = [
2631
+ "libc",
2632
+ "windows-sys 0.60.2",
2633
+ ]
2634
+
2635
+ [[package]]
2636
+ name = "socks"
2637
+ version = "0.3.4"
2638
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2639
+ checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b"
2640
+ dependencies = [
2641
+ "byteorder",
2642
+ "libc",
2643
+ "winapi",
2644
+ ]
2645
+
2646
+ [[package]]
2647
+ name = "spm_precompiled"
2648
+ version = "0.1.4"
2649
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2650
+ checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326"
2651
+ dependencies = [
2652
+ "base64 0.13.1",
2653
+ "nom",
2654
+ "serde",
2655
+ "unicode-segmentation",
2656
+ ]
2657
+
2658
+ [[package]]
2659
+ name = "stable_deref_trait"
2660
+ version = "1.2.1"
2661
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2662
+ checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
2663
+
2664
+ [[package]]
2665
+ name = "strength_reduce"
2666
+ version = "0.2.4"
2667
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2668
+ checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82"
2669
+
2670
+ [[package]]
2671
+ name = "strsim"
2672
+ version = "0.11.1"
2673
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2674
+ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
2675
+
2676
+ [[package]]
2677
+ name = "subtle"
2678
+ version = "2.6.1"
2679
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2680
+ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
2681
+
2682
+ [[package]]
2683
+ name = "syn"
2684
+ version = "1.0.109"
2685
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2686
+ checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
2687
+ dependencies = [
2688
+ "proc-macro2",
2689
+ "unicode-ident",
2690
+ ]
2691
+
2692
+ [[package]]
2693
+ name = "syn"
2694
+ version = "2.0.110"
2695
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2696
+ checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea"
2697
+ dependencies = [
2698
+ "proc-macro2",
2699
+ "quote",
2700
+ "unicode-ident",
2701
+ ]
2702
+
2703
+ [[package]]
2704
+ name = "sync_wrapper"
2705
+ version = "1.0.2"
2706
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2707
+ checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
2708
+ dependencies = [
2709
+ "futures-core",
2710
+ ]
2711
+
2712
+ [[package]]
2713
+ name = "synstructure"
2714
+ version = "0.13.2"
2715
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2716
+ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
2717
+ dependencies = [
2718
+ "proc-macro2",
2719
+ "quote",
2720
+ "syn 2.0.110",
2721
+ ]
2722
+
2723
+ [[package]]
2724
+ name = "system-configuration"
2725
+ version = "0.6.1"
2726
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2727
+ checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
2728
+ dependencies = [
2729
+ "bitflags",
2730
+ "core-foundation",
2731
+ "system-configuration-sys",
2732
+ ]
2733
+
2734
+ [[package]]
2735
+ name = "system-configuration-sys"
2736
+ version = "0.6.0"
2737
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2738
+ checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4"
2739
+ dependencies = [
2740
+ "core-foundation-sys",
2741
+ "libc",
2742
+ ]
2743
+
2744
+ [[package]]
2745
+ name = "tar"
2746
+ version = "0.4.44"
2747
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2748
+ checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a"
2749
+ dependencies = [
2750
+ "filetime",
2751
+ "libc",
2752
+ "xattr",
2753
+ ]
2754
+
2755
+ [[package]]
2756
+ name = "tempfile"
2757
+ version = "3.23.0"
2758
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2759
+ checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
2760
+ dependencies = [
2761
+ "fastrand",
2762
+ "getrandom 0.3.4",
2763
+ "once_cell",
2764
+ "rustix",
2765
+ "windows-sys 0.61.2",
2766
+ ]
2767
+
2768
+ [[package]]
2769
+ name = "thiserror"
2770
+ version = "1.0.69"
2771
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2772
+ checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
2773
+ dependencies = [
2774
+ "thiserror-impl",
2775
+ ]
2776
+
2777
+ [[package]]
2778
+ name = "thiserror-impl"
2779
+ version = "1.0.69"
2780
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2781
+ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
2782
+ dependencies = [
2783
+ "proc-macro2",
2784
+ "quote",
2785
+ "syn 2.0.110",
2786
+ ]
2787
+
2788
+ [[package]]
2789
+ name = "tiny-keccak"
2790
+ version = "2.0.2"
2791
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2792
+ checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
2793
+ dependencies = [
2794
+ "crunchy",
2795
+ ]
2796
+
2797
+ [[package]]
2798
+ name = "tinystr"
2799
+ version = "0.8.2"
2800
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2801
+ checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869"
2802
+ dependencies = [
2803
+ "displaydoc",
2804
+ "zerovec",
2805
+ ]
2806
+
2807
+ [[package]]
2808
+ name = "tinytemplate"
2809
+ version = "1.2.1"
2810
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2811
+ checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
2812
+ dependencies = [
2813
+ "serde",
2814
+ "serde_json",
2815
+ ]
2816
+
2817
+ [[package]]
2818
+ name = "tokenizers"
2819
+ version = "0.19.1"
2820
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2821
+ checksum = "e500fad1dd3af3d626327e6a3fe5050e664a6eaa4708b8ca92f1794aaf73e6fd"
2822
+ dependencies = [
2823
+ "aho-corasick",
2824
+ "derive_builder",
2825
+ "esaxx-rs",
2826
+ "getrandom 0.2.16",
2827
+ "indicatif",
2828
+ "itertools 0.12.1",
2829
+ "lazy_static",
2830
+ "log",
2831
+ "macro_rules_attribute",
2832
+ "monostate",
2833
+ "onig",
2834
+ "paste",
2835
+ "rand",
2836
+ "rayon",
2837
+ "rayon-cond",
2838
+ "regex",
2839
+ "regex-syntax",
2840
+ "serde",
2841
+ "serde_json",
2842
+ "spm_precompiled",
2843
+ "thiserror",
2844
+ "unicode-normalization-alignments",
2845
+ "unicode-segmentation",
2846
+ "unicode_categories",
2847
+ ]
2848
+
2849
+ [[package]]
2850
+ name = "tokio"
2851
+ version = "1.48.0"
2852
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2853
+ checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408"
2854
+ dependencies = [
2855
+ "bytes",
2856
+ "libc",
2857
+ "mio",
2858
+ "parking_lot",
2859
+ "pin-project-lite",
2860
+ "signal-hook-registry",
2861
+ "socket2",
2862
+ "tokio-macros",
2863
+ "windows-sys 0.61.2",
2864
+ ]
2865
+
2866
+ [[package]]
2867
+ name = "tokio-macros"
2868
+ version = "2.6.0"
2869
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2870
+ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
2871
+ dependencies = [
2872
+ "proc-macro2",
2873
+ "quote",
2874
+ "syn 2.0.110",
2875
+ ]
2876
+
2877
+ [[package]]
2878
+ name = "tokio-native-tls"
2879
+ version = "0.3.1"
2880
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2881
+ checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
2882
+ dependencies = [
2883
+ "native-tls",
2884
+ "tokio",
2885
+ ]
2886
+
2887
+ [[package]]
2888
+ name = "tokio-rustls"
2889
+ version = "0.26.4"
2890
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2891
+ checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61"
2892
+ dependencies = [
2893
+ "rustls",
2894
+ "tokio",
2895
+ ]
2896
+
2897
+ [[package]]
2898
+ name = "tokio-util"
2899
+ version = "0.7.17"
2900
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2901
+ checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594"
2902
+ dependencies = [
2903
+ "bytes",
2904
+ "futures-core",
2905
+ "futures-sink",
2906
+ "pin-project-lite",
2907
+ "tokio",
2908
+ ]
2909
+
2910
+ [[package]]
2911
+ name = "toml"
2912
+ version = "0.8.23"
2913
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2914
+ checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
2915
+ dependencies = [
2916
+ "serde",
2917
+ "serde_spanned",
2918
+ "toml_datetime",
2919
+ "toml_edit",
2920
+ ]
2921
+
2922
+ [[package]]
2923
+ name = "toml_datetime"
2924
+ version = "0.6.11"
2925
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2926
+ checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
2927
+ dependencies = [
2928
+ "serde",
2929
+ ]
2930
+
2931
+ [[package]]
2932
+ name = "toml_edit"
2933
+ version = "0.22.27"
2934
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2935
+ checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
2936
+ dependencies = [
2937
+ "indexmap",
2938
+ "serde",
2939
+ "serde_spanned",
2940
+ "toml_datetime",
2941
+ "toml_write",
2942
+ "winnow",
2943
+ ]
2944
+
2945
+ [[package]]
2946
+ name = "toml_write"
2947
+ version = "0.1.2"
2948
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2949
+ checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
2950
+
2951
+ [[package]]
2952
+ name = "tower"
2953
+ version = "0.5.2"
2954
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2955
+ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
2956
+ dependencies = [
2957
+ "futures-core",
2958
+ "futures-util",
2959
+ "pin-project-lite",
2960
+ "sync_wrapper",
2961
+ "tokio",
2962
+ "tower-layer",
2963
+ "tower-service",
2964
+ ]
2965
+
2966
+ [[package]]
2967
+ name = "tower-http"
2968
+ version = "0.6.6"
2969
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2970
+ checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2"
2971
+ dependencies = [
2972
+ "bitflags",
2973
+ "bytes",
2974
+ "futures-util",
2975
+ "http",
2976
+ "http-body",
2977
+ "iri-string",
2978
+ "pin-project-lite",
2979
+ "tower",
2980
+ "tower-layer",
2981
+ "tower-service",
2982
+ ]
2983
+
2984
+ [[package]]
2985
+ name = "tower-layer"
2986
+ version = "0.3.3"
2987
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2988
+ checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
2989
+
2990
+ [[package]]
2991
+ name = "tower-service"
2992
+ version = "0.3.3"
2993
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2994
+ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
2995
+
2996
+ [[package]]
2997
+ name = "tracing"
2998
+ version = "0.1.41"
2999
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3000
+ checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
3001
+ dependencies = [
3002
+ "pin-project-lite",
3003
+ "tracing-core",
3004
+ ]
3005
+
3006
+ [[package]]
3007
+ name = "tracing-core"
3008
+ version = "0.1.34"
3009
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3010
+ checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678"
3011
+ dependencies = [
3012
+ "once_cell",
3013
+ ]
3014
+
3015
+ [[package]]
3016
+ name = "transpose"
3017
+ version = "0.2.3"
3018
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3019
+ checksum = "1ad61aed86bc3faea4300c7aee358b4c6d0c8d6ccc36524c96e4c92ccf26e77e"
3020
+ dependencies = [
3021
+ "num-integer",
3022
+ "strength_reduce",
3023
+ ]
3024
+
3025
+ [[package]]
3026
+ name = "try-lock"
3027
+ version = "0.2.5"
3028
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3029
+ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
3030
+
3031
+ [[package]]
3032
+ name = "typenum"
3033
+ version = "1.19.0"
3034
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3035
+ checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
3036
+
3037
+ [[package]]
3038
+ name = "ucd-trie"
3039
+ version = "0.1.7"
3040
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3041
+ checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
3042
+
3043
+ [[package]]
3044
+ name = "unicode-ident"
3045
+ version = "1.0.22"
3046
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3047
+ checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
3048
+
3049
+ [[package]]
3050
+ name = "unicode-normalization-alignments"
3051
+ version = "0.1.12"
3052
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3053
+ checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de"
3054
+ dependencies = [
3055
+ "smallvec 1.15.1",
3056
+ ]
3057
+
3058
+ [[package]]
3059
+ name = "unicode-segmentation"
3060
+ version = "1.12.0"
3061
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3062
+ checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
3063
+
3064
+ [[package]]
3065
+ name = "unicode-width"
3066
+ version = "0.2.2"
3067
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3068
+ checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
3069
+
3070
+ [[package]]
3071
+ name = "unicode_categories"
3072
+ version = "0.1.1"
3073
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3074
+ checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
3075
+
3076
+ [[package]]
3077
+ name = "unsafe-libyaml"
3078
+ version = "0.2.11"
3079
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3080
+ checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
3081
+
3082
+ [[package]]
3083
+ name = "untrusted"
3084
+ version = "0.9.0"
3085
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3086
+ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
3087
+
3088
+ [[package]]
3089
+ name = "ureq"
3090
+ version = "3.1.4"
3091
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3092
+ checksum = "d39cb1dbab692d82a977c0392ffac19e188bd9186a9f32806f0aaa859d75585a"
3093
+ dependencies = [
3094
+ "base64 0.22.1",
3095
+ "der",
3096
+ "log",
3097
+ "native-tls",
3098
+ "percent-encoding",
3099
+ "rustls-pki-types",
3100
+ "socks",
3101
+ "ureq-proto",
3102
+ "utf-8",
3103
+ "webpki-root-certs",
3104
+ ]
3105
+
3106
+ [[package]]
3107
+ name = "ureq-proto"
3108
+ version = "0.5.2"
3109
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3110
+ checksum = "60b4531c118335662134346048ddb0e54cc86bd7e81866757873055f0e38f5d2"
3111
+ dependencies = [
3112
+ "base64 0.22.1",
3113
+ "http",
3114
+ "httparse",
3115
+ "log",
3116
+ ]
3117
+
3118
+ [[package]]
3119
+ name = "url"
3120
+ version = "2.5.7"
3121
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3122
+ checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b"
3123
+ dependencies = [
3124
+ "form_urlencoded",
3125
+ "idna",
3126
+ "percent-encoding",
3127
+ "serde",
3128
+ ]
3129
+
3130
+ [[package]]
3131
+ name = "utf-8"
3132
+ version = "0.7.6"
3133
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3134
+ checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
3135
+
3136
+ [[package]]
3137
+ name = "utf8_iter"
3138
+ version = "1.0.4"
3139
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3140
+ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
3141
+
3142
+ [[package]]
3143
+ name = "utf8parse"
3144
+ version = "0.2.2"
3145
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3146
+ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
3147
+
3148
+ [[package]]
3149
+ name = "vcpkg"
3150
+ version = "0.2.15"
3151
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3152
+ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
3153
+
3154
+ [[package]]
3155
+ name = "version_check"
3156
+ version = "0.9.5"
3157
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3158
+ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
3159
+
3160
+ [[package]]
3161
+ name = "walkdir"
3162
+ version = "2.5.0"
3163
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3164
+ checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
3165
+ dependencies = [
3166
+ "same-file",
3167
+ "winapi-util",
3168
+ ]
3169
+
3170
+ [[package]]
3171
+ name = "want"
3172
+ version = "0.3.1"
3173
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3174
+ checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
3175
+ dependencies = [
3176
+ "try-lock",
3177
+ ]
3178
+
3179
+ [[package]]
3180
+ name = "wasi"
3181
+ version = "0.11.1+wasi-snapshot-preview1"
3182
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3183
+ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
3184
+
3185
+ [[package]]
3186
+ name = "wasip2"
3187
+ version = "1.0.1+wasi-0.2.4"
3188
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3189
+ checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
3190
+ dependencies = [
3191
+ "wit-bindgen",
3192
+ ]
3193
+
3194
+ [[package]]
3195
+ name = "wasm-bindgen"
3196
+ version = "0.2.105"
3197
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3198
+ checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60"
3199
+ dependencies = [
3200
+ "cfg-if",
3201
+ "once_cell",
3202
+ "rustversion",
3203
+ "wasm-bindgen-macro",
3204
+ "wasm-bindgen-shared",
3205
+ ]
3206
+
3207
+ [[package]]
3208
+ name = "wasm-bindgen-futures"
3209
+ version = "0.4.55"
3210
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3211
+ checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0"
3212
+ dependencies = [
3213
+ "cfg-if",
3214
+ "js-sys",
3215
+ "once_cell",
3216
+ "wasm-bindgen",
3217
+ "web-sys",
3218
+ ]
3219
+
3220
+ [[package]]
3221
+ name = "wasm-bindgen-macro"
3222
+ version = "0.2.105"
3223
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3224
+ checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2"
3225
+ dependencies = [
3226
+ "quote",
3227
+ "wasm-bindgen-macro-support",
3228
+ ]
3229
+
3230
+ [[package]]
3231
+ name = "wasm-bindgen-macro-support"
3232
+ version = "0.2.105"
3233
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3234
+ checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc"
3235
+ dependencies = [
3236
+ "bumpalo",
3237
+ "proc-macro2",
3238
+ "quote",
3239
+ "syn 2.0.110",
3240
+ "wasm-bindgen-shared",
3241
+ ]
3242
+
3243
+ [[package]]
3244
+ name = "wasm-bindgen-shared"
3245
+ version = "0.2.105"
3246
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3247
+ checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76"
3248
+ dependencies = [
3249
+ "unicode-ident",
3250
+ ]
3251
+
3252
+ [[package]]
3253
+ name = "web-sys"
3254
+ version = "0.3.82"
3255
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3256
+ checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1"
3257
+ dependencies = [
3258
+ "js-sys",
3259
+ "wasm-bindgen",
3260
+ ]
3261
+
3262
+ [[package]]
3263
+ name = "web-time"
3264
+ version = "1.1.0"
3265
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3266
+ checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
3267
+ dependencies = [
3268
+ "js-sys",
3269
+ "wasm-bindgen",
3270
+ ]
3271
+
3272
+ [[package]]
3273
+ name = "webpki-root-certs"
3274
+ version = "1.0.4"
3275
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3276
+ checksum = "ee3e3b5f5e80bc89f30ce8d0343bf4e5f12341c51f3e26cbeecbc7c85443e85b"
3277
+ dependencies = [
3278
+ "rustls-pki-types",
3279
+ ]
3280
+
3281
+ [[package]]
3282
+ name = "winapi"
3283
+ version = "0.3.9"
3284
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3285
+ checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
3286
+ dependencies = [
3287
+ "winapi-i686-pc-windows-gnu",
3288
+ "winapi-x86_64-pc-windows-gnu",
3289
+ ]
3290
+
3291
+ [[package]]
3292
+ name = "winapi-i686-pc-windows-gnu"
3293
+ version = "0.4.0"
3294
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3295
+ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
3296
+
3297
+ [[package]]
3298
+ name = "winapi-util"
3299
+ version = "0.1.11"
3300
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3301
+ checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
3302
+ dependencies = [
3303
+ "windows-sys 0.61.2",
3304
+ ]
3305
+
3306
+ [[package]]
3307
+ name = "winapi-x86_64-pc-windows-gnu"
3308
+ version = "0.4.0"
3309
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3310
+ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
3311
+
3312
+ [[package]]
3313
+ name = "windows-link"
3314
+ version = "0.2.1"
3315
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3316
+ checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
3317
+
3318
+ [[package]]
3319
+ name = "windows-registry"
3320
+ version = "0.6.1"
3321
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3322
+ checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720"
3323
+ dependencies = [
3324
+ "windows-link",
3325
+ "windows-result",
3326
+ "windows-strings",
3327
+ ]
3328
+
3329
+ [[package]]
3330
+ name = "windows-result"
3331
+ version = "0.4.1"
3332
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3333
+ checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
3334
+ dependencies = [
3335
+ "windows-link",
3336
+ ]
3337
+
3338
+ [[package]]
3339
+ name = "windows-strings"
3340
+ version = "0.5.1"
3341
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3342
+ checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
3343
+ dependencies = [
3344
+ "windows-link",
3345
+ ]
3346
+
3347
+ [[package]]
3348
+ name = "windows-sys"
3349
+ version = "0.52.0"
3350
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3351
+ checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
3352
+ dependencies = [
3353
+ "windows-targets 0.52.6",
3354
+ ]
3355
+
3356
+ [[package]]
3357
+ name = "windows-sys"
3358
+ version = "0.59.0"
3359
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3360
+ checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
3361
+ dependencies = [
3362
+ "windows-targets 0.52.6",
3363
+ ]
3364
+
3365
+ [[package]]
3366
+ name = "windows-sys"
3367
+ version = "0.60.2"
3368
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3369
+ checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
3370
+ dependencies = [
3371
+ "windows-targets 0.53.5",
3372
+ ]
3373
+
3374
+ [[package]]
3375
+ name = "windows-sys"
3376
+ version = "0.61.2"
3377
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3378
+ checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
3379
+ dependencies = [
3380
+ "windows-link",
3381
+ ]
3382
+
3383
+ [[package]]
3384
+ name = "windows-targets"
3385
+ version = "0.52.6"
3386
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3387
+ checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
3388
+ dependencies = [
3389
+ "windows_aarch64_gnullvm 0.52.6",
3390
+ "windows_aarch64_msvc 0.52.6",
3391
+ "windows_i686_gnu 0.52.6",
3392
+ "windows_i686_gnullvm 0.52.6",
3393
+ "windows_i686_msvc 0.52.6",
3394
+ "windows_x86_64_gnu 0.52.6",
3395
+ "windows_x86_64_gnullvm 0.52.6",
3396
+ "windows_x86_64_msvc 0.52.6",
3397
+ ]
3398
+
3399
+ [[package]]
3400
+ name = "windows-targets"
3401
+ version = "0.53.5"
3402
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3403
+ checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
3404
+ dependencies = [
3405
+ "windows-link",
3406
+ "windows_aarch64_gnullvm 0.53.1",
3407
+ "windows_aarch64_msvc 0.53.1",
3408
+ "windows_i686_gnu 0.53.1",
3409
+ "windows_i686_gnullvm 0.53.1",
3410
+ "windows_i686_msvc 0.53.1",
3411
+ "windows_x86_64_gnu 0.53.1",
3412
+ "windows_x86_64_gnullvm 0.53.1",
3413
+ "windows_x86_64_msvc 0.53.1",
3414
+ ]
3415
+
3416
+ [[package]]
3417
+ name = "windows_aarch64_gnullvm"
3418
+ version = "0.52.6"
3419
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3420
+ checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
3421
+
3422
+ [[package]]
3423
+ name = "windows_aarch64_gnullvm"
3424
+ version = "0.53.1"
3425
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3426
+ checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
3427
+
3428
+ [[package]]
3429
+ name = "windows_aarch64_msvc"
3430
+ version = "0.52.6"
3431
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3432
+ checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
3433
+
3434
+ [[package]]
3435
+ name = "windows_aarch64_msvc"
3436
+ version = "0.53.1"
3437
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3438
+ checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
3439
+
3440
+ [[package]]
3441
+ name = "windows_i686_gnu"
3442
+ version = "0.52.6"
3443
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3444
+ checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
3445
+
3446
+ [[package]]
3447
+ name = "windows_i686_gnu"
3448
+ version = "0.53.1"
3449
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3450
+ checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
3451
+
3452
+ [[package]]
3453
+ name = "windows_i686_gnullvm"
3454
+ version = "0.52.6"
3455
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3456
+ checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
3457
+
3458
+ [[package]]
3459
+ name = "windows_i686_gnullvm"
3460
+ version = "0.53.1"
3461
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3462
+ checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
3463
+
3464
+ [[package]]
3465
+ name = "windows_i686_msvc"
3466
+ version = "0.52.6"
3467
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3468
+ checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
3469
+
3470
+ [[package]]
3471
+ name = "windows_i686_msvc"
3472
+ version = "0.53.1"
3473
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3474
+ checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
3475
+
3476
+ [[package]]
3477
+ name = "windows_x86_64_gnu"
3478
+ version = "0.52.6"
3479
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3480
+ checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
3481
+
3482
+ [[package]]
3483
+ name = "windows_x86_64_gnu"
3484
+ version = "0.53.1"
3485
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3486
+ checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
3487
+
3488
+ [[package]]
3489
+ name = "windows_x86_64_gnullvm"
3490
+ version = "0.52.6"
3491
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3492
+ checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
3493
+
3494
+ [[package]]
3495
+ name = "windows_x86_64_gnullvm"
3496
+ version = "0.53.1"
3497
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3498
+ checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
3499
+
3500
+ [[package]]
3501
+ name = "windows_x86_64_msvc"
3502
+ version = "0.52.6"
3503
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3504
+ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
3505
+
3506
+ [[package]]
3507
+ name = "windows_x86_64_msvc"
3508
+ version = "0.53.1"
3509
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3510
+ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
3511
+
3512
+ [[package]]
3513
+ name = "winnow"
3514
+ version = "0.7.13"
3515
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3516
+ checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf"
3517
+ dependencies = [
3518
+ "memchr",
3519
+ ]
3520
+
3521
+ [[package]]
3522
+ name = "wit-bindgen"
3523
+ version = "0.46.0"
3524
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3525
+ checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
3526
+
3527
+ [[package]]
3528
+ name = "writeable"
3529
+ version = "0.6.2"
3530
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3531
+ checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
3532
+
3533
+ [[package]]
3534
+ name = "xattr"
3535
+ version = "1.6.1"
3536
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3537
+ checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156"
3538
+ dependencies = [
3539
+ "libc",
3540
+ "rustix",
3541
+ ]
3542
+
3543
+ [[package]]
3544
+ name = "yaml-rust2"
3545
+ version = "0.8.1"
3546
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3547
+ checksum = "8902160c4e6f2fb145dbe9d6760a75e3c9522d8bf796ed7047c85919ac7115f8"
3548
+ dependencies = [
3549
+ "arraydeque",
3550
+ "encoding_rs",
3551
+ "hashlink",
3552
+ ]
3553
+
3554
+ [[package]]
3555
+ name = "yoke"
3556
+ version = "0.8.1"
3557
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3558
+ checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954"
3559
+ dependencies = [
3560
+ "stable_deref_trait",
3561
+ "yoke-derive",
3562
+ "zerofrom",
3563
+ ]
3564
+
3565
+ [[package]]
3566
+ name = "yoke-derive"
3567
+ version = "0.8.1"
3568
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3569
+ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
3570
+ dependencies = [
3571
+ "proc-macro2",
3572
+ "quote",
3573
+ "syn 2.0.110",
3574
+ "synstructure",
3575
+ ]
3576
+
3577
+ [[package]]
3578
+ name = "zerocopy"
3579
+ version = "0.8.27"
3580
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3581
+ checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c"
3582
+ dependencies = [
3583
+ "zerocopy-derive",
3584
+ ]
3585
+
3586
+ [[package]]
3587
+ name = "zerocopy-derive"
3588
+ version = "0.8.27"
3589
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3590
+ checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
3591
+ dependencies = [
3592
+ "proc-macro2",
3593
+ "quote",
3594
+ "syn 2.0.110",
3595
+ ]
3596
+
3597
+ [[package]]
3598
+ name = "zerofrom"
3599
+ version = "0.1.6"
3600
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3601
+ checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
3602
+ dependencies = [
3603
+ "zerofrom-derive",
3604
+ ]
3605
+
3606
+ [[package]]
3607
+ name = "zerofrom-derive"
3608
+ version = "0.1.6"
3609
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3610
+ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
3611
+ dependencies = [
3612
+ "proc-macro2",
3613
+ "quote",
3614
+ "syn 2.0.110",
3615
+ "synstructure",
3616
+ ]
3617
+
3618
+ [[package]]
3619
+ name = "zeroize"
3620
+ version = "1.8.2"
3621
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3622
+ checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
3623
+
3624
+ [[package]]
3625
+ name = "zerotrie"
3626
+ version = "0.2.3"
3627
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3628
+ checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851"
3629
+ dependencies = [
3630
+ "displaydoc",
3631
+ "yoke",
3632
+ "zerofrom",
3633
+ ]
3634
+
3635
+ [[package]]
3636
+ name = "zerovec"
3637
+ version = "0.11.5"
3638
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3639
+ checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002"
3640
+ dependencies = [
3641
+ "yoke",
3642
+ "zerofrom",
3643
+ "zerovec-derive",
3644
+ ]
3645
+
3646
+ [[package]]
3647
+ name = "zerovec-derive"
3648
+ version = "0.11.2"
3649
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3650
+ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
3651
+ dependencies = [
3652
+ "proc-macro2",
3653
+ "quote",
3654
+ "syn 2.0.110",
3655
+ ]
3656
+
3657
+ [[package]]
3658
+ name = "zstd"
3659
+ version = "0.13.3"
3660
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3661
+ checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
3662
+ dependencies = [
3663
+ "zstd-safe",
3664
+ ]
3665
+
3666
+ [[package]]
3667
+ name = "zstd-safe"
3668
+ version = "7.2.4"
3669
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3670
+ checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
3671
+ dependencies = [
3672
+ "zstd-sys",
3673
+ ]
3674
+
3675
+ [[package]]
3676
+ name = "zstd-sys"
3677
+ version = "2.0.16+zstd.1.5.7"
3678
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3679
+ checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748"
3680
+ dependencies = [
3681
+ "cc",
3682
+ "pkg-config",
3683
+ ]
Cargo.toml ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [package]
2
+ name = "indextts"
3
+ version = "0.1.0"
4
+ edition = "2021"
5
+ description = "High-performance Text-to-Speech engine in pure Rust - converted from IndexTTS Python"
6
+ authors = ["IndexTTS Team"]
7
+ license = "MIT"
8
+ keywords = ["tts", "speech-synthesis", "audio", "ml", "deep-learning"]
9
+ categories = ["multimedia::audio", "science"]
10
+
11
+ [[bin]]
12
+ name = "indextts"
13
+ path = "src/main.rs"
14
+
15
+ [lib]
16
+ name = "indextts"
17
+ path = "src/lib.rs"
18
+
19
+ [dependencies]
20
+ # Core ML/Inference
21
+ ort = { version = "2.0.0-rc.4", features = ["load-dynamic"] }
22
+ safetensors = "0.4"
23
+ ndarray = { version = "0.15", features = ["rayon"] }
24
+
25
+ # Audio Processing
26
+ hound = "3.5"
27
+ dasp_signal = "0.11"
28
+ dasp_sample = "0.11"
29
+ rustfft = "6.2"
30
+ realfft = "3.3"
31
+ rubato = "0.15"
32
+
33
+ # Text Processing
34
+ tokenizers = "0.19"
35
+ unicode-segmentation = "1.11"
36
+ regex = "1.10"
37
+ lazy_static = "1.5"
38
+ jieba-rs = "0.7"
39
+
40
+ # CLI & Configuration
41
+ clap = { version = "4.5", features = ["derive"] }
42
+ serde = { version = "1.0", features = ["derive"] }
43
+ serde_json = "1.0"
44
+ serde_yaml = "0.9"
45
+ toml = "0.8"
46
+ config = "0.14"
47
+
48
+ # Async & Parallelism
49
+ rayon = "1.10"
50
+ tokio = { version = "1.38", features = ["full"] }
51
+
52
+ # Utilities
53
+ anyhow = "1.0"
54
+ thiserror = "1.0"
55
+ log = "0.4"
56
+ env_logger = "0.11"
57
+ indicatif = "0.17"
58
+ bytemuck = { version = "1.16", features = ["derive"] }
59
+ num-complex = "0.4"
60
+ num-traits = "0.2"
61
+ rand = "0.8"
62
+ num_cpus = "1.16"
63
+
64
+ # HTTP/Download
65
+ reqwest = { version = "0.12", features = ["blocking", "json"] }
66
+ sha2 = "0.10"
67
+ hex = "0.4"
68
+
69
+ [dev-dependencies]
70
+ criterion = "0.5"
71
+ tempfile = "3.10"
72
+
73
+ [profile.release]
74
+ opt-level = 3
75
+ lto = true
76
+ codegen-units = 1
77
+ strip = true
78
+
79
+ [profile.dev]
80
+ opt-level = 1
81
+
82
+ [[bench]]
83
+ name = "mel_spectrogram"
84
+ harness = false
85
+
86
+ [[bench]]
87
+ name = "inference"
88
+ harness = false
DIRECTORY_STRUCTURE.txt ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ IndexTTS-Rust/ (Complete Directory Structure)
2
+
3
+ ├── indextts/ # Main Python package (194 files)
4
+ │ │
5
+ │ ├── __init__.py # Package initialization
6
+ │ ├── cli.py # Command-line interface (64 lines)
7
+ │ ├── infer.py # Original inference (v1) - 690 lines
8
+ │ ├── infer_v2.py # Main inference v2 - 739 lines ⭐⭐⭐
9
+ │ │
10
+ │ ├── gpt/ # GPT-based TTS model (9 files, 16,953 lines)
11
+ │ │ ├── __init__.py
12
+ │ │ ├── model.py # Original UnifiedVoice (713L)
13
+ │ │ ├── model_v2.py # UnifiedVoice v2 ⭐⭐⭐ (747L)
14
+ │ │ ├── conformer_encoder.py # Conformer encoder ⭐⭐ (520L)
15
+ │ │ ├── perceiver.py # Perceiver resampler (317L)
16
+ │ │ ├── conformer_encoder.py # Conformer components
17
+ │ │ ├── transformers_gpt2.py # GPT2 implementation (1,878L)
18
+ │ │ ├── transformers_generation_utils.py # Generation utilities (4,747L)
19
+ │ │ ├── transformers_beam_search.py # Beam search (1,013L)
20
+ │ │ └── transformers_modeling_utils.py # Model utilities (5,525L)
21
+ │ │
22
+ │ ├── BigVGAN/ # Neural Vocoder (6+ files, ~1000+ lines)
23
+ │ │ ├── __init__.py
24
+ │ │ ├── models.py # BigVGAN architecture ⭐⭐⭐
25
+ │ │ ├── ECAPA_TDNN.py # Speaker encoder
26
+ │ │ ├── activations.py # Snake, SnakeBeta activations
27
+ │ │ ├── utils.py # Helper functions
28
+ │ │ │
29
+ │ │ ├── alias_free_activation/ # CUDA kernel variants
30
+ │ │ │ ├── cuda/
31
+ │ │ │ │ ├── activation1d.py # CUDA kernel loader
32
+ │ │ │ │ └── load.py
33
+ │ │ │ └── torch/
34
+ │ │ │ ├── act.py # PyTorch activation
35
+ │ │ │ ├── filter.py # Anti-aliasing filter
36
+ │ │ │ └── resample.py # Resampling
37
+ │ │ │
38
+ │ │ ├── alias_free_torch/ # PyTorch-only fallback
39
+ │ │ │ ├── act.py
40
+ │ │ │ ├── filter.py
41
+ │ │ │ └── resample.py
42
+ │ │ │
43
+ │ │ └── nnet/ # Network modules
44
+ │ │ ├── linear.py
45
+ │ │ ├── normalization.py
46
+ │ │ └── CNN.py
47
+ │ │
48
+ │ ├── s2mel/ # Semantic-to-Mel Models (~500+ lines)
49
+ │ │ ├── modules/ # Core modules (10+ files)
50
+ │ │ │ ├── audio.py # Mel-spectrogram computation ⭐
51
+ │ │ │ ├── commons.py # Common utilities (21KB)
52
+ │ │ │ ├── layers.py # NN layers (13KB)
53
+ │ │ │ ├── length_regulator.py # Duration modeling
54
+ │ │ │ ├── flow_matching.py # Continuous flow matching
55
+ │ │ │ ├── diffusion_transformer.py # Diffusion model
56
+ │ │ │ ├── rmvpe.py # Pitch extraction (22KB)
57
+ │ │ │ ├── quantize.py # Quantization
58
+ │ │ │ ├── encodec.py # EnCodec codec
59
+ │ │ │ ├── wavenet.py # WaveNet implementation
60
+ │ │ │ │
61
+ │ │ │ ├── bigvgan/ # BigVGAN vocoder
62
+ │ │ │ │ ├── modules.py
63
+ │ │ │ │ ├── config.json
64
+ │ │ │ │ ├── bigvgan.py
65
+ │ │ │ │ ├── alias_free_activation/ # Variants
66
+ │ │ │ │ └── models.py
67
+ │ │ │ │
68
+ │ │ │ ├── vocos/ # Vocos codec
69
+ │ │ │ ├── hifigan/ # HiFiGAN vocoder
70
+ │ │ │ ├── openvoice/ # OpenVoice components (11 files)
71
+ │ │ │ ├── campplus/ # CAMPPlus speaker encoder
72
+ │ │ │ │ └── DTDNN.py # DTDNN architecture
73
+ │ │ │ └── gpt_fast/ # Fast GPT inference
74
+ │ │ │
75
+ │ │ ├── dac/ # DAC codec
76
+ │ │ │ ├── model/
77
+ │ │ │ ├── nn/
78
+ │ │ │ └── utils/
79
+ │ │ │
80
+ │ │ └─��� (other s2mel implementations)
81
+ │ │
82
+ │ ├── utils/ # Text & Feature Utils (12+ files, ~500L)
83
+ │ │ ├── __init__.py
84
+ │ │ ├── front.py # TextNormalizer, TextTokenizer ⭐⭐⭐ (700L)
85
+ │ │ ├── maskgct_utils.py # Semantic codec builders (250L)
86
+ │ │ ├── arch_util.py # AttentionBlock, utilities
87
+ │ │ ├── checkpoint.py # Model loading
88
+ │ │ ├── xtransformers.py # Transformer utils (1,600L)
89
+ │ │ ├── feature_extractors.py # MelSpectrogramFeatures
90
+ │ │ ├── common.py # Common functions
91
+ │ │ ├── text_utils.py # Text utilities
92
+ │ │ ├── typical_sampling.py # TypicalLogitsWarper sampling
93
+ │ │ ├── utils.py # General utils
94
+ │ │ ├── webui_utils.py # Web UI helpers
95
+ │ │ ├── tagger_cache/ # Text normalization cache
96
+ │ │ │
97
+ │ │ └── maskgct/ # MaskGCT codec (100+ files, 10KB+)
98
+ │ │ └── models/
99
+ │ │ ├── codec/ # Multiple codec implementations
100
+ │ │ │ ├── amphion_codec/ # Amphion codec
101
+ │ │ │ │ ├── codec.py
102
+ │ │ │ │ ├── vocos.py
103
+ │ │ │ │ └── quantize/ # Quantization
104
+ │ │ │ │ ├── vector_quantize.py
105
+ │ │ │ │ ├── residual_vq.py
106
+ │ │ │ │ ├── factorized_vector_quantize.py
107
+ │ │ │ │ └── lookup_free_quantize.py
108
+ │ │ │ │
109
+ │ │ │ ├── facodec/ # FACodec variant
110
+ │ │ │ │ ├── facodec_inference.py
111
+ │ │ │ │ ├── modules/
112
+ │ │ │ │ │ ├── commons.py
113
+ │ │ │ │ │ ├── attentions.py
114
+ │ │ │ │ │ ├── layers.py
115
+ │ │ │ │ │ ├── quantize.py
116
+ │ │ │ │ │ ├── wavenet.py
117
+ │ │ │ │ │ ├── style_encoder.py
118
+ │ │ │ │ │ ├── gradient_reversal.py
119
+ │ │ │ │ │ └── JDC/ (pitch detection)
120
+ │ │ │ │ └── alias_free_torch/ # Anti-aliasing
121
+ │ │ │ │
122
+ │ │ │ ├── speechtokenizer/ # Speech Tokenizer codec
123
+ │ │ │ │ ├── model.py
124
+ │ │ │ │ └── modules/
125
+ │ │ │ │ ├── seanet.py
126
+ │ │ │ │ ├── lstm.py
127
+ │ │ │ │ ├── norm.py
128
+ │ │ │ │ ├── conv.py
129
+ │ │ │ │ └── quantization/
130
+ │ │ │ │
131
+ │ │ │ ├── ns3_codec/ # NS3 codec variant
132
+ │ │ │ ├── vevo/ # VEVo codec
133
+ │ │ │ ├── kmeans/ # KMeans codec
134
+ │ │ │ ├── melvqgan/ # MelVQ-GAN codec
135
+ │ │ │ │
136
+ │ │ │ ├── codec_inference.py
137
+ │ │ │ ├── codec_sampler.py
138
+ │ │ │ ├── codec_trainer.py
139
+ │ │ │ └── codec_dataset.py
140
+ │ │ │
141
+ │ │ └── tts/
142
+ │ │ └── maskgct/
143
+ │ │ ├── maskgct_s2a.py # Semantic-to-acoustic
144
+ │ │ └── ckpt/
145
+ │ │
146
+ │ └── vqvae/ # Vector Quantized VAE
147
+ │ ├── xtts_dvae.py # Discrete VAE (currently disabled)
148
+ │ └── (other VAE components)
149
+
150
+ ├── examples/ # Sample Data & Test Cases
151
+ │ ├── cases.jsonl # Example test cases
152
+ │ ├── voice_*.wav # Sample voice prompts (12 files)
153
+ │ ├── emo_*.wav # Emotion reference samples (2 files)
154
+ │ └── sample_prompt.wav # Default prompt (implied)
155
+
156
+ ├── tests/ # Test Suite
157
+ │ ├── regression_test.py # Main regression tests ⭐
158
+ │ └── padding_test.py # Padding/batch tests
159
+
160
+ ├── tools/ # Utility Scripts & i18n
161
+ │ ├── download_files.py # Model downloading from HF
162
+ │ └── i18n/ # Internationalization
163
+ │ ├── i18n.py # Translation system
164
+ │ ├── scan_i18n.py # i18n scanner
165
+ │ └── locale/
166
+ │ ├── en_US.json # English translations
167
+ │ └── zh_CN.json # Chinese translations
168
+
169
+ ├── archive/ # Historical Docs
170
+ │ └── README_INDEXTTS_1_5.md # IndexTTS 1.5 documentation
171
+
172
+ ├── webui.py # Gradio Web UI ⭐⭐⭐ (18KB)
173
+ ├── cli.py # Command-line interface
174
+ ├── requirements.txt # Python dependencies
175
+ ├── MANIFEST.in # Package manifest
176
+ ├── .gitignore # Git ignore rules
177
+ ├── .gitattributes # Git attributes
178
+ └── LICENSE # Apache 2.0 License
179
+
180
+ ═══════════════════════════════════════════════════════════════════════════════
181
+ KEY FILES BY IMPORTANCE:
182
+ ═══════════════════════════════════════════════════════════════════════════════
183
+
184
+ ⭐⭐⭐ CRITICAL (Core Logic - MUST Convert First)
185
+ 1. indextts/infer_v2.py - Main inference pipeline (739L)
186
+ 2. indextts/gpt/model_v2.py - UnifiedVoice GPT model (747L)
187
+ 3. indextts/utils/front.py - Text processing (700L)
188
+ 4. indextts/BigVGAN/models.py - Vocoder (1000+L)
189
+ 5. indextts/s2mel/modules/audio.py - Mel-spectrogram (83L, critical DSP)
190
+
191
+ ⭐⭐ HIGH PRIORITY (Major Components)
192
+ 1. indextts/gpt/conformer_encoder.py - Conformer blocks (520L)
193
+ 2. indextts/gpt/perceiver.py - Perceiver attention (317L)
194
+ 3. indextts/utils/maskgct_utils.py - Codec builders (250L)
195
+ 4. indextts/s2mel/modules/commons.py - Common utilities (21KB)
196
+
197
+ ⭐ MEDIUM PRIORITY (Utilities & Optimization)
198
+ 1. indextts/utils/xtransformers.py - Transformer utils (1,600L)
199
+ 2. indextts/BigVGAN/activations.py - Activation functions
200
+ 3. indextts/s2mel/modules/rmvpe.py - Pitch extraction (22KB)
201
+
202
+ OPTIONAL (Web UI, Tools)
203
+ 1. webui.py - Gradio interface
204
+ 2. tools/download_files.py - Model downloading
205
+
206
+ ═══════════════════════════════════════════════════════════════════════════════
207
+ TOTAL STATISTICS:
208
+ ═══════════════════════════════════════════════════════════════════════════════
209
+ Total Python Files: 194
210
+ Total Lines of Code: ~25,000+
211
+ GPT Module: 16,953 lines
212
+ MaskGCT Codecs: ~10,000+ lines
213
+ S2Mel Models: ~2,000+ lines
214
+ BigVGAN: ~1,000+ lines
215
+ Utils: ~500 lines
216
+ Tests: ~100 lines
217
+
218
+ Models Supported: 6 major HuggingFace models
219
+ Languages: Chinese (full), English (full), Mixed
220
+ Emotion Dimensions: 8-dimensional emotion control
221
+ Audio Sample Rate: 22,050 Hz (primary)
222
+ Max Text Tokens: 120
223
+ Max Mel Tokens: 250
224
+ Mel Spectrogram Bins: 80
EXPLORATION_SUMMARY.md ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # IndexTTS-Rust Codebase Exploration - Complete Summary
2
+
3
+ ## Overview
4
+
5
+ I have conducted a **comprehensive exploration** of the IndexTTS-Rust codebase. This is a sophisticated zero-shot multi-lingual Text-to-Speech (TTS) system currently implemented in Python that is being converted to Rust.
6
+
7
+ ## Key Findings
8
+
9
+ ### Project Status
10
+ - **Current State**: Pure Python implementation with PyTorch backend
11
+ - **Target State**: Rust implementation (conversion in progress)
12
+ - **Files**: 194 Python files across multiple specialized modules
13
+ - **Code Volume**: ~25,000+ lines of Python code
14
+ - **No Rust code exists yet** - this is a fresh rewrite opportunity
15
+
16
+ ### What IndexTTS Does
17
+ IndexTTS is an **industrial-level text-to-speech system** that:
18
+ 1. Takes text input (Chinese, English, or mixed languages)
19
+ 2. Takes a reference speaker audio file (voice prompt)
20
+ 3. Generates high-quality speech in the speaker's voice with:
21
+ - Pinyin-based pronunciation control (for Chinese)
22
+ - Emotion control via 8-dimensional emotion vectors
23
+ - Text-based emotion guidance (via Qwen model)
24
+ - Punctuation-based pause control
25
+ - Style reference audio support
26
+
27
+ ### Performance Metrics
28
+ - **Best in class**: WER 0.821 on Chinese test set, 1.606 on English
29
+ - **Outperforms**: SeedTTS, CosyVoice2, F5-TTS, MaskGCT, others
30
+ - **Multi-language**: Full Chinese + English support, mixed language support
31
+ - **Speed**: Parallel inference available, batch processing support
32
+
33
+ ## Architecture Overview
34
+
35
+ ### Main Pipeline Flow
36
+ ```
37
+ Text Input
38
+ ↓ (TextNormalizer)
39
+ Normalized Text
40
+ ↓ (TextTokenizer + SentencePiece)
41
+ Text Tokens
42
+ ↓ (W2V-BERT)
43
+ Semantic Embeddings
44
+ ↓ (RepCodec)
45
+ Semantic Codes + Speaker Features (CAMPPlus) + Emotion Vectors
46
+ ↓ (UnifiedVoice GPT Model)
47
+ Mel-spectrogram Tokens
48
+ ↓ (S2Mel Length Regulator)
49
+ Acoustic Codes
50
+ ↓ (BigVGAN Vocoder)
51
+ Audio Waveform (22,050 Hz)
52
+ ```
53
+
54
+ ## Critical Components to Convert
55
+
56
+ ### Priority 1: MUST Convert First (Core Pipeline)
57
+ 1. **infer_v2.py** (739 lines) - Main inference orchestration
58
+ 2. **model_v2.py** (747 lines) - UnifiedVoice GPT model
59
+ 3. **front.py** (700 lines) - Text normalization and tokenization
60
+ 4. **BigVGAN/models.py** (1000+ lines) - Neural vocoder
61
+ 5. **s2mel/modules/audio.py** (83 lines) - Mel-spectrogram DSP
62
+
63
+ ### Priority 2: High Priority (Major Components)
64
+ 1. **conformer_encoder.py** (520 lines) - Speaker encoder
65
+ 2. **perceiver.py** (317 lines) - Attention pooling mechanism
66
+ 3. **maskgct_utils.py** (250 lines) - Semantic codec builders
67
+ 4. Various supporting modules for codec and transformer utilities
68
+
69
+ ### Priority 3: Medium Priority (Optimization & Utilities)
70
+ 1. Advanced transformer utilities
71
+ 2. Activation functions and filters
72
+ 3. Pitch extraction and flow matching
73
+ 4. Optional CUDA kernels for optimization
74
+
75
+ ## Technology Stack
76
+
77
+ ### Current (Python)
78
+ - **Framework**: PyTorch (inference only)
79
+ - **Text Processing**: SentencePiece, WeTextProcessing, regex
80
+ - **Audio**: librosa, torchaudio, scipy
81
+ - **Models**: HuggingFace Transformers
82
+ - **Web UI**: Gradio
83
+
84
+ ### Pre-trained Models (6 Major)
85
+ 1. **IndexTTS-2** (~2GB) - Main TTS model
86
+ 2. **W2V-BERT-2.0** (~1GB) - Semantic features
87
+ 3. **MaskGCT** - Semantic codec
88
+ 4. **CAMPPlus** (~100MB) - Speaker embeddings
89
+ 5. **BigVGAN v2** (~100MB) - Vocoder
90
+ 6. **Qwen** (variable) - Emotion detection
91
+
92
+ ## File Organization
93
+
94
+ ### Core Modules
95
+ - **indextts/gpt/** - GPT-based sequence generation (9 files, 16,953 lines)
96
+ - **indextts/BigVGAN/** - Neural vocoder (6+ files, 1000+ lines)
97
+ - **indextts/s2mel/** - Semantic-to-mel models (10+ files, 2000+ lines)
98
+ - **indextts/utils/** - Text processing and utilities (12+ files, 500 lines)
99
+ - **indextts/utils/maskgct/** - MaskGCT codecs (100+ files, 10000+ lines)
100
+
101
+ ### Interfaces
102
+ - **webui.py** (18KB) - Gradio web interface
103
+ - **cli.py** (64 lines) - Command-line interface
104
+ - **infer.py/infer_v2.py** - Python API
105
+
106
+ ### Data & Config
107
+ - **examples/** - Sample audio files and test cases
108
+ - **tests/** - Regression and padding tests
109
+ - **tools/** - Model downloading and i18n support
110
+
111
+ ## Detailed Documentation Generated
112
+
113
+ Three comprehensive documents have been created and saved to the repository:
114
+
115
+ 1. **CODEBASE_ANALYSIS.md** (19 KB)
116
+ - Executive summary
117
+ - Complete project structure
118
+ - Current implementation details
119
+ - TTS pipeline explanation
120
+ - Algorithms and components breakdown
121
+ - Inference modes and capabilities
122
+ - Dependency conversion roadmap
123
+
124
+ 2. **DIRECTORY_STRUCTURE.txt** (14 KB)
125
+ - Complete file tree with annotations
126
+ - Files grouped by importance (⭐⭐⭐, ⭐⭐, ⭐)
127
+ - Line counts for each file
128
+ - Statistics summary
129
+
130
+ 3. **SOURCE_FILE_LISTING.txt** (23 KB)
131
+ - Detailed file-by-file breakdown
132
+ - Classes and methods for each major file
133
+ - Parameter specifications
134
+ - Algorithm descriptions
135
+ - Dependencies for each component
136
+
137
+ ## Key Technical Challenges for Rust Conversion
138
+
139
+ ### High Complexity
140
+ 1. **PyTorch Model Loading** - Need ONNX export or custom format
141
+ 2. **Complex Attention Mechanisms** - Transformers, Perceiver, Conformer
142
+ 3. **Text Normalization Libraries** - May need Rust bindings or reimplementation
143
+ 4. **Mel Spectrogram Computation** - STFT, mel filterbank calculations
144
+
145
+ ### Medium Complexity
146
+ 1. **Quantization & Codecs** - Multiple codec implementations to translate
147
+ 2. **Large Model Inference** - Optimization, batching, caching required
148
+ 3. **Audio DSP** - Resampling, filtering, spectral operations
149
+
150
+ ### Optimization (Optional)
151
+ 1. CUDA kernels for anti-aliased activations
152
+ 2. DeepSpeed integration for model parallelism
153
+ 3. KV cache for inference optimization
154
+
155
+ ## Recommended Rust Libraries
156
+
157
+ | Component | Python Library | Rust Alternative |
158
+ |---|---|---|
159
+ | Model Inference | torch/transformers | **ort**, tch-rs, candle |
160
+ | Audio Processing | librosa | rustfft, dasp_signal |
161
+ | Text Tokenization | sentencepiece | sentencepiece (Rust binding) |
162
+ | Numerical Computing | numpy | **ndarray**, nalgebra |
163
+ | Chinese Text | jieba | **jieba-rs** |
164
+ | Audio I/O | torchaudio | hound, wav |
165
+ | Web Server | Gradio | **axum**, actix-web |
166
+ | Config Files | OmegaConf YAML | **serde**, config-rs |
167
+ | Model Format | safetensors | **safetensors-rs** |
168
+
169
+ ## Data Flow Example
170
+
171
+ ### Input
172
+ - Text: "你好" (Chinese for "Hello")
173
+ - Speaker Audio: "speaker.wav" (voice reference)
174
+ - Emotion: "happy" (optional)
175
+
176
+ ### Processing Steps
177
+ 1. Text Normalization → "你好" (no change)
178
+ 2. Text Tokenization → [token_1, token_2, ...]
179
+ 3. Audio Loading & Mel-spectrogram computation
180
+ 4. W2V-BERT semantic embedding extraction
181
+ 5. Speaker feature extraction (CAMPPlus)
182
+ 6. Emotion vector generation
183
+ 7. GPT generation of mel-tokens
184
+ 8. Length regulation for acoustic codes
185
+ 9. BigVGAN vocoding
186
+ 10. Audio output at 22,050 Hz
187
+
188
+ ### Output
189
+ - Waveform: "output.wav" (high-quality speech)
190
+
191
+ ## Test Coverage
192
+
193
+ ### Regression Tests Available
194
+ - Chinese text with pinyin tones
195
+ - English text
196
+ - Mixed Chinese-English
197
+ - Long-form text passages
198
+ - Named entities (proper nouns)
199
+ - Special punctuation handling
200
+
201
+ ## Performance Characteristics
202
+
203
+ ### Speed
204
+ - Single inference: ~2-5 seconds per sentence (GPU)
205
+ - Batch/fast inference: Parallel processing available
206
+ - Caching: Speaker features and mel spectrograms are cached
207
+
208
+ ### Quality
209
+ - 22,050 Hz sample rate (CD-quality audio)
210
+ - 80-dimensional mel-spectrogram
211
+ - 8-channel emotion control
212
+ - Natural speech synthesis with speaker similarity
213
+
214
+ ### Model Parameters
215
+ - GPT Model: 8 layers, 512 dims, 8 heads
216
+ - Max text tokens: 120
217
+ - Max mel tokens: 250
218
+ - Mel spectrogram bins: 80
219
+ - Emotion dimensions: 8
220
+
221
+ ## Next Steps for Rust Conversion
222
+
223
+ ### Phase 1: Foundation
224
+ 1. Set up Rust project structure
225
+ 2. Create model loading infrastructure (ONNX or binary format)
226
+ 3. Implement basic tensor operations using ndarray/candle
227
+
228
+ ### Phase 2: Core Pipeline
229
+ 1. Implement text normalization (regex + patterns)
230
+ 2. Implement SentencePiece tokenization
231
+ 3. Create mel-spectrogram DSP module
232
+ 4. Implement BigVGAN vocoder
233
+
234
+ ### Phase 3: Neural Components
235
+ 1. Implement transformer layers
236
+ 2. Implement Conformer encoder
237
+ 3. Implement Perceiver resampler
238
+ 4. Implement GPT generation
239
+
240
+ ### Phase 4: Integration
241
+ 1. Integrate all components
242
+ 2. Create CLI interface
243
+ 3. Create REST API or server interface
244
+ 4. Optimize and profile
245
+
246
+ ### Phase 5: Testing & Deployment
247
+ 1. Regression testing
248
+ 2. Performance benchmarking
249
+ 3. Documentation
250
+ 4. Deployment optimization
251
+
252
+ ## Summary Statistics
253
+
254
+ - **Total Files Analyzed**: 194 Python files
255
+ - **Total Lines of Code**: ~25,000+
256
+ - **Architecture Depth**: 5 major pipeline stages
257
+ - **External Models**: 6 HuggingFace models
258
+ - **Languages Supported**: 2 (Chinese, English, with mixed support)
259
+ - **Dimensions**: Text tokens, mel tokens, emotion vectors, speaker embeddings
260
+ - **DSP Operations**: STFT, mel filterbanks, upsampling, convolution
261
+ - **AI Techniques**: Transformers, Conformers, Perceiver pooling, diffusion-based generation
262
+
263
+ ## Conclusion
264
+
265
+ IndexTTS is a **production-ready, state-of-the-art TTS system** with sophisticated architecture and multiple advanced features. The codebase is well-organized with clear separation of concerns, making it suitable for conversion to Rust. The main challenges will be:
266
+
267
+ 1. **Model Loading**: Handling PyTorch model weights in Rust
268
+ 2. **Text Processing**: Ensuring accuracy in pattern matching and normalization
269
+ 3. **Neural Architecture**: Correctly implementing complex attention mechanisms
270
+ 4. **Audio DSP**: Precise STFT and mel-spectrogram computation
271
+
272
+ With careful planning and the right library selection, a full Rust conversion is feasible and would offer significant performance benefits and easier deployment.
273
+
274
+ ---
275
+
276
+ ## Documentation Files
277
+
278
+ All analysis has been saved to the repository:
279
+ - `CODEBASE_ANALYSIS.md` - Comprehensive technical analysis
280
+ - `DIRECTORY_STRUCTURE.txt` - Complete file tree
281
+ - `SOURCE_FILE_LISTING.txt` - Detailed component breakdown
282
+ - `EXPLORATION_SUMMARY.md` - This file
283
+
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
MANIFEST.in ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ global-exclude *~ *.py[cod]
2
+ include *.cu *.cpp
3
+ include *.h *.hpp
README.md ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ tags:
4
+ - text-to-speech
5
+ - tts
6
+ - voice-cloning
7
+ - zero-shot
8
+ - rust
9
+ - onnx
10
+ language:
11
+ - en
12
+ - zh
13
+ library_name: ort
14
+ pipeline_tag: text-to-speech
15
+ ---
16
+
17
+ # IndexTTS-Rust
18
+
19
+ High-performance Text-to-Speech Engine in Pure Rust 🚀
20
+
21
+ ## ONNX Models (Download)
22
+
23
+ Pre-converted models for inference - no Python required!
24
+
25
+ | Model | Size | Download |
26
+ |-------|------|----------|
27
+ | **BigVGAN** (vocoder) | 433 MB | [bigvgan.onnx](https://huggingface.co/ThreadAbort/IndexTTS-Rust/resolve/models/models/bigvgan.onnx) |
28
+ | **Speaker Encoder** | 28 MB | [speaker_encoder.onnx](https://huggingface.co/ThreadAbort/IndexTTS-Rust/resolve/models/models/speaker_encoder.onnx) |
29
+
30
+ ### Quick Download
31
+
32
+ ```python
33
+ # Python with huggingface_hub
34
+ from huggingface_hub import hf_hub_download
35
+
36
+ bigvgan = hf_hub_download("ThreadAbort/IndexTTS-Rust", "models/bigvgan.onnx", revision="models")
37
+ speaker = hf_hub_download("ThreadAbort/IndexTTS-Rust", "models/speaker_encoder.onnx", revision="models")
38
+ ```
39
+
40
+ ```bash
41
+ # Or with wget
42
+ wget https://huggingface.co/ThreadAbort/IndexTTS-Rust/resolve/models/models/bigvgan.onnx
43
+ wget https://huggingface.co/ThreadAbort/IndexTTS-Rust/resolve/models/models/speaker_encoder.onnx
44
+ ```
45
+
46
+ ---
47
+
48
+ A complete Rust rewrite of the IndexTTS system, designed for maximum performance and efficiency.
49
+
50
+ ## Features
51
+
52
+ - **Pure Rust Implementation** - No Python dependencies, maximum performance
53
+ - **Multi-language Support** - Chinese, English, and mixed language synthesis
54
+ - **Zero-shot Voice Cloning** - Clone any voice from a short reference audio
55
+ - **8-dimensional Emotion Control** - Fine-grained control over emotional expression
56
+ - **High-quality Neural Vocoding** - BigVGAN-based waveform synthesis
57
+ - **SIMD Optimizations** - Leverages modern CPU instructions
58
+ - **Parallel Processing** - Multi-threaded audio and text processing with Rayon
59
+ - **ONNX Runtime Integration** - Efficient model inference
60
+
61
+ ## Performance Benefits
62
+
63
+ Compared to the Python implementation:
64
+ - **~10-50x faster** audio processing (mel-spectrogram computation)
65
+ - **~5-10x lower memory usage** with zero-copy operations
66
+ - **No GIL bottleneck** - true parallel processing
67
+ - **Smaller binary size** - single executable, no interpreter needed
68
+ - **Faster startup time** - no Python/PyTorch initialization
69
+
70
+ ## Installation
71
+
72
+ ### Prerequisites
73
+
74
+ - Rust 1.70+ (install from https://rustup.rs/)
75
+ - ONNX Runtime (for neural network inference)
76
+ - Audio development libraries:
77
+ - Linux: `apt install libasound2-dev`
78
+ - macOS: `brew install portaudio`
79
+ - Windows: Included with build
80
+
81
+ ### Building
82
+
83
+ ```bash
84
+ # Clone the repository
85
+ git clone https://github.com/8b-is/IndexTTS-Rust.git
86
+ cd IndexTTS-Rust
87
+
88
+ # Build in release mode (optimized)
89
+ cargo build --release
90
+
91
+ # The binary will be at target/release/indextts
92
+ ```
93
+
94
+ ### Running
95
+
96
+ ```bash
97
+ # Show help
98
+ ./target/release/indextts --help
99
+
100
+ # Show system information
101
+ ./target/release/indextts info
102
+
103
+ # Generate default config
104
+ ./target/release/indextts init-config -o config.yaml
105
+
106
+ # Synthesize speech
107
+ ./target/release/indextts synthesize \
108
+ --text "Hello, world!" \
109
+ --voice speaker.wav \
110
+ --output output.wav
111
+
112
+ # Synthesize from file
113
+ ./target/release/indextts synthesize-file \
114
+ --input text.txt \
115
+ --voice speaker.wav \
116
+ --output output.wav
117
+
118
+ # Run benchmarks
119
+ ./target/release/indextts benchmark --iterations 100
120
+ ```
121
+
122
+ ## Usage as Library
123
+
124
+ ```rust
125
+ use indextts::{IndexTTS, Config, pipeline::SynthesisOptions};
126
+
127
+ fn main() -> indextts::Result<()> {
128
+ // Load configuration
129
+ let config = Config::load("config.yaml")?;
130
+
131
+ // Create TTS instance
132
+ let tts = IndexTTS::new(config)?;
133
+
134
+ // Set synthesis options
135
+ let options = SynthesisOptions {
136
+ emotion_vector: Some(vec![0.9, 0.7, 0.6, 0.5, 0.5, 0.5, 0.5, 0.5]), // Happy
137
+ emotion_alpha: 1.0,
138
+ ..Default::default()
139
+ };
140
+
141
+ // Synthesize
142
+ let result = tts.synthesize_to_file(
143
+ "Hello, this is a test!",
144
+ "speaker.wav",
145
+ "output.wav",
146
+ &options,
147
+ )?;
148
+
149
+ println!("Generated {:.2}s of audio", result.duration);
150
+ println!("RTF: {:.3}x", result.rtf);
151
+
152
+ Ok(())
153
+ }
154
+ ```
155
+
156
+ ## Project Structure
157
+
158
+ ```
159
+ IndexTTS-Rust/
160
+ ├── src/
161
+ │ ├── lib.rs # Library entry point
162
+ │ ├── main.rs # CLI entry point
163
+ │ ├── error.rs # Error types
164
+ │ ├── audio/ # Audio processing
165
+ │ │ ├── mod.rs # Module exports
166
+ │ │ ├── mel.rs # Mel-spectrogram computation
167
+ │ │ ├── io.rs # Audio I/O (WAV)
168
+ │ │ ├── dsp.rs # DSP utilities
169
+ │ │ └── resample.rs # Audio resampling
170
+ │ ├── text/ # Text processing
171
+ │ │ ├── mod.rs # Module exports
172
+ │ │ ├── normalizer.rs # Text normalization
173
+ │ │ ├── tokenizer.rs # BPE tokenization
174
+ │ │ └── phoneme.rs # G2P conversion
175
+ │ ├── model/ # Model inference
176
+ │ │ ├── mod.rs # Module exports
177
+ │ │ ├── session.rs # ONNX Runtime wrapper
178
+ │ │ ├── gpt.rs # GPT model
179
+ │ │ └── embedding.rs # Speaker/emotion encoders
180
+ │ ├── vocoder/ # Neural vocoding
181
+ │ │ ├── mod.rs # Module exports
182
+ │ │ ├── bigvgan.rs # BigVGAN implementation
183
+ │ │ └── activations.rs # Snake/GELU activations
184
+ │ ├── pipeline/ # TTS orchestration
185
+ │ │ ├── mod.rs # Module exports
186
+ │ │ └── synthesis.rs # Main synthesis logic
187
+ │ └── config/ # Configuration
188
+ │ └── mod.rs # Config structures
189
+ ├── models/ # Model checkpoints (ONNX)
190
+ ├── Cargo.toml # Rust dependencies
191
+ └── README.md # This file
192
+ ```
193
+
194
+ ## Dependencies
195
+
196
+ Core dependencies (all pure Rust or safe bindings):
197
+
198
+ - **Audio**: `hound`, `rustfft`, `realfft`, `rubato`, `dasp`
199
+ - **ML**: `ort` (ONNX Runtime), `ndarray`, `safetensors`
200
+ - **Text**: `tokenizers`, `jieba-rs`, `regex`, `unicode-segmentation`
201
+ - **CLI**: `clap`, `env_logger`, `indicatif`
202
+ - **Parallelism**: `rayon`, `tokio`
203
+ - **Config**: `serde`, `serde_yaml`, `serde_json`
204
+
205
+ ## Model Conversion
206
+
207
+ To use the Rust implementation, you'll need to convert PyTorch models to ONNX:
208
+
209
+ ```python
210
+ # Example conversion script (Python)
211
+ import torch
212
+ from indextts.gpt.model_v2 import UnifiedVoice
213
+
214
+ model = UnifiedVoice.from_pretrained("checkpoints")
215
+ dummy_input = torch.randint(0, 1000, (1, 100))
216
+ torch.onnx.export(
217
+ model,
218
+ dummy_input,
219
+ "models/gpt.onnx",
220
+ opset_version=14,
221
+ input_names=["input_ids"],
222
+ output_names=["logits"],
223
+ dynamic_axes={
224
+ "input_ids": {0: "batch", 1: "sequence"},
225
+ "logits": {0: "batch", 1: "sequence"},
226
+ },
227
+ )
228
+ ```
229
+
230
+ ## Benchmarks
231
+
232
+ Performance on AMD Ryzen 9 5950X (16 cores):
233
+
234
+ | Operation | Python (ms) | Rust (ms) | Speedup |
235
+ |-----------|-------------|-----------|---------|
236
+ | Mel-spectrogram (1s audio) | 150 | 3 | 50x |
237
+ | Text normalization | 5 | 0.1 | 50x |
238
+ | Tokenization | 2 | 0.05 | 40x |
239
+ | Vocoder (1s audio) | 500 | 50 | 10x |
240
+
241
+ ## Roadmap
242
+
243
+ - [x] Core audio processing (mel-spectrogram, DSP)
244
+ - [x] Text processing (normalization, tokenization)
245
+ - [x] Model inference framework (ONNX Runtime)
246
+ - [x] BigVGAN vocoder
247
+ - [x] Main TTS pipeline
248
+ - [x] CLI interface
249
+ - [ ] Full GPT model integration with KV cache
250
+ - [ ] Streaming synthesis
251
+ - [ ] WebSocket API
252
+ - [ ] GPU acceleration (CUDA)
253
+ - [ ] Model quantization (INT8)
254
+ - [ ] WebAssembly support
255
+
256
+ ## Marine Prosody Validation
257
+
258
+ This project includes **Marine salience detection** - an O(1) algorithm that validates speech authenticity:
259
+
260
+ ```
261
+ Human speech has NATURAL jitter - that's what makes it authentic!
262
+ - Too perfect (jitter < 0.005) = robotic
263
+ - Too chaotic (jitter > 0.3) = artifacts/damage
264
+ - Sweet spot = real human voice
265
+ ```
266
+
267
+ The Marines will KNOW if your TTS doesn't sound authentic! 🎖️
268
+
269
+ ## License
270
+
271
+ MIT License - See LICENSE file for details.
272
+
273
+ ---
274
+
275
+ *From ashes to harmonics, from silence to song* 🔥🎵
276
+
277
+ Built with love by Hue & Aye @ [8b.is](https://8b.is)
278
+
279
+ ## Acknowledgments
280
+
281
+ - Original IndexTTS Python implementation
282
+ - BigVGAN vocoder architecture
283
+ - ONNX Runtime team for efficient inference
284
+ - Rust audio processing community
285
+
286
+ ## Contributing
287
+
288
+ Contributions welcome! Please see CONTRIBUTING.md for guidelines.
289
+
290
+ Key areas for contribution:
291
+ - Performance optimizations
292
+ - Additional language support
293
+ - Model conversion tools
294
+ - Documentation improvements
295
+ - Testing and benchmarking
SOURCE_FILE_LISTING.txt ADDED
@@ -0,0 +1,513 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ╔════════════════════════════════════════════════════════════════════════════════╗
2
+ ║ DETAILED SOURCE FILE LISTING BY CATEGORY ║
3
+ ╚════════════════════════════════════════════════════════════════════════════════╝
4
+
5
+ MAIN INFERENCE PIPELINE FILES
6
+ ═════════════════════════════════════════════════════════════════════════════════
7
+
8
+ /home/user/IndexTTS-Rust/indextts/infer_v2.py (739 LINES) ⭐⭐⭐ CRITICAL
9
+ ├─ Purpose: Main TTS inference class (IndexTTS2)
10
+ ├─ Key Classes:
11
+ │ ├─ QwenEmotion (emotion text-to-vector conversion)
12
+ │ ├─ IndexTTS2 (main inference class)
13
+ │ └─ Helper functions for emotion/audio processing
14
+ ├─ Key Methods:
15
+ │ ├─ __init__() - Initialize all models and codecs
16
+ │ ├─ infer() - Single text generation with emotion control
17
+ │ ├─ infer_fast() - Parallel segment generation
18
+ │ ├─ get_emb() - Extract semantic embeddings
19
+ │ ├─ remove_long_silence() - Silence token removal
20
+ │ ├─ insert_interval_silence() - Silence insertion
21
+ │ └─ Cache management for repeated generation
22
+ ├─ Models Loaded:
23
+ │ ├─ UnifiedVoice (GPT model for mel token generation)
24
+ │ ├─ W2V-BERT (semantic feature extraction)
25
+ │ ├─ RepCodec (semantic codec)
26
+ │ ├─ S2Mel model (semantic-to-mel conversion)
27
+ │ ├─ CAMPPlus (speaker embedding)
28
+ │ ├─ BigVGAN vocoder
29
+ │ ├─ Qwen-based emotion model
30
+ │ └─ Emotion/speaker matrices
31
+ └─ External Dependencies: torch, transformers, librosa, safetensors
32
+
33
+ /home/user/IndexTTS-Rust/webui.py (18KB) ⭐⭐⭐ WEB INTERFACE
34
+ ├─ Purpose: Gradio-based web UI for IndexTTS
35
+ ├─ Key Components:
36
+ │ ├─ Model initialization (IndexTTS2 instance)
37
+ │ ├─ Language selection (Chinese/English)
38
+ │ ├─ Emotion control modes (4 modes)
39
+ │ ├─ Example case loading from cases.jsonl
40
+ │ ├─ Progress bar integration
41
+ │ └─ Output management
42
+ ├─ Features:
43
+ │ ├─ Real-time inference
44
+ │ ├─ Multiple emotion control methods
45
+ │ ├─ Batch processing
46
+ │ ├─ Task caching
47
+ │ ├─ i18n support
48
+ │ └─ Pre-loaded example cases
49
+ └─ Web Framework: Gradio 5.34.1
50
+
51
+ /home/user/IndexTTS-Rust/indextts/cli.py (64 LINES)
52
+ ├─ Purpose: Command-line interface
53
+ ├─ Usage: python -m indextts.cli <text> -v <voice.wav> -o <output.wav> [options]
54
+ ├─ Arguments:
55
+ │ ├─ text: Text to synthesize
56
+ │ ├─ -v/--voice: Voice reference audio
57
+ │ ├─ -o/--output_path: Output file path
58
+ │ ├─ -c/--config: Config file path
59
+ │ ├─ --model_dir: Model directory
60
+ │ ├─ --fp16: Use FP16 precision
61
+ │ ├─ -d/--device: Device (cpu/cuda/mps/xpu)
62
+ │ └─ -f/--force: Force overwrite
63
+ └─ Uses: IndexTTS (v1 model)
64
+
65
+ TEXT PROCESSING & NORMALIZATION FILES
66
+ ═════════════════════════════════════════════════════════════════════════════════
67
+
68
+ /home/user/IndexTTS-Rust/indextts/utils/front.py (700 LINES) ⭐⭐⭐ CRITICAL
69
+ ├─ Purpose: Text normalization and tokenization
70
+ ├─ Key Classes:
71
+ │ ├─ TextNormalizer (700+ lines)
72
+ │ │ ├─ Pattern Definitions:
73
+ │ │ │ ├─ PINYIN_TONE_PATTERN (regex for pinyin with tones 1-5)
74
+ │ │ │ ├─ NAME_PATTERN (regex for Chinese names)
75
+ │ │ │ └─ ENGLISH_CONTRACTION_PATTERN (regex for 's contractions)
76
+ │ │ ├─ Methods:
77
+ │ │ │ ├─ normalize() - Main normalization
78
+ │ │ │ ├─ use_chinese() - Language detection
79
+ │ │ │ ├─ save_pinyin_tones() - Extract pinyin with tones
80
+ │ │ │ ├─ restore_pinyin_tones() - Restore pinyin
81
+ │ │ │ ├─ save_names() - Extract names
82
+ │ │ │ ├─ restore_names() - Restore names
83
+ │ │ │ ├─ correct_pinyin() - Phoneme correction (jqx→v)
84
+ │ │ │ └─ char_rep_map - Character replacement dictionary
85
+ │ │ └─ Normalizers:
86
+ │ │ ├─ zh_normalizer (Chinese) - Uses WeTextProcessing/wetext
87
+ │ │ └─ en_normalizer (English) - Uses tn library
88
+ │ │
89
+ │ └─ TextTokenizer (200+ lines)
90
+ │ ├─ Methods:
91
+ │ │ ├─ encode() - Text to token IDs
92
+ │ │ ├─ decode() - Token IDs to text
93
+ │ │ ├─ convert_tokens_to_ids()
94
+ │ │ ├─ convert_ids_to_tokens()
95
+ │ │ └─ Vocab management
96
+ │ ├─ Special Tokens:
97
+ │ │ ├�� BOS: "<s>" (ID 0)
98
+ │ │ ├─ EOS: "</s>" (ID 1)
99
+ │ │ └─ UNK: "<unk>"
100
+ │ └─ Tokenizer: SentencePiece (BPE-based)
101
+ ├─ Language Support:
102
+ │ ├─ Chinese (simplified & traditional)
103
+ │ ├─ English
104
+ │ └─ Mixed Chinese-English
105
+ └─ Critical Pattern Matching:
106
+ ├─ Pinyin tone detection
107
+ ├─ Name entity detection
108
+ ├─ Email matching
109
+ ├─ Character replacement
110
+ └─ Punctuation handling
111
+
112
+ GPT MODEL ARCHITECTURE FILES
113
+ ═════════════════════════════════════════════════════════════════════════════════
114
+
115
+ /home/user/IndexTTS-Rust/indextts/gpt/model_v2.py (747 LINES) ⭐⭐⭐ CRITICAL
116
+ ├─ Purpose: UnifiedVoice GPT-based TTS model
117
+ ├─ Key Classes:
118
+ │ ├─ UnifiedVoice (700+ lines)
119
+ │ │ ├─ Architecture:
120
+ │ │ │ ├─ Input Embeddings: Text (256 vocab), Mel (8194 vocab)
121
+ │ │ │ ├─ Position Embeddings: Learned embeddings for mel/text
122
+ │ │ │ ├─ GPT Transformer: Configurable layers/heads
123
+ │ │ │ ├─ Conditioning Encoder: Conformer or Perceiver-based
124
+ │ │ │ ├─ Emotion Conditioning: Separate conformer + perceiver
125
+ │ │ │ └─ Output Heads: Text prediction, Mel prediction
126
+ │ │ │
127
+ │ │ ├─ Parameters:
128
+ │ │ │ ├─ layers: 8 (transformer depth)
129
+ │ │ │ ├─ model_dim: 512 (embedding dimension)
130
+ │ │ │ ├─ heads: 8 (attention heads)
131
+ │ │ │ ├─ max_text_tokens: 120
132
+ │ │ │ ├─ max_mel_tokens: 250
133
+ │ │ │ ├─ number_mel_codes: 8194
134
+ │ │ │ ├─ condition_type: "conformer_perceiver" or "conformer_encoder"
135
+ │ │ │ └─ Various activation functions
136
+ │ │ │
137
+ │ │ ├─ Key Methods:
138
+ │ │ │ ├─ forward() - Forward pass
139
+ │ │ │ ├─ post_init_gpt2_config() - Initialize for inference
140
+ │ │ │ ├─ generate_mel() - Mel token generation
141
+ │ │ │ ├─ forward_with_cond_scale() - With classifier-free guidance
142
+ │ │ │ └─ Cache management
143
+ │ │ │
144
+ │ │ └─ Conditioning System:
145
+ │ │ ├─ Speaker conditioning via mel spectrogram
146
+ │ │ ├─ Conformer encoder for speaker features
147
+ │ │ ├─ Perceiver for attention pooling
148
+ │ │ ├─ Emotion conditioning (separate pathway)
149
+ │ │ └─ Emotion vector support (8-dimensional)
150
+ │ │
151
+ │ ├─ ResBlock (40+ lines)
152
+ │ │ ├─ Conv1d layers with GroupNorm
153
+ │ │ └─ ReLU activation with residual connection
154
+ │ │
155
+ │ ├─ GPT2InferenceModel (200+ lines)
156
+ │ │ ├─ Inference wrapper for GPT2
157
+ │ │ ├─ KV cache support
158
+ │ │ ├─ Model parallelism support
159
+ │ │ └─ Token-by-token generation
160
+ │ │
161
+ │ ├─ ConditioningEncoder (30 lines)
162
+ │ │ ├─ Conv1d initialization
163
+ │ │ ├─ Attention blocks
164
+ │ │ └─ Optional mean pooling
165
+ │ │
166
+ │ ├─ MelEncoder (30 lines)
167
+ │ │ ├─ Conv1d layers
168
+ │ │ ├─ ResBlocks
169
+ │ │ └─ 4x reduction
170
+ │ │
171
+ │ ├─ LearnedPositionEmbeddings (15 lines)
172
+ │ │ └─ Learnable positional embeddings
173
+ │ │
174
+ │ └─ build_hf_gpt_transformer() (20 lines)
175
+ │ └─ Builds HuggingFace GPT2 with custom embeddings
176
+
177
+ ├─ External Dependencies: torch, transformers, indextts.gpt modules
178
+ └─ Critical Inference Parameters:
179
+ ├─ Temperature control for generation
180
+ ├─ Top-k/top-p sampling
181
+ ├─ Classifier-free guidance scale
182
+ └─ Generation length limits
183
+
184
+ /home/user/IndexTTS-Rust/indextts/gpt/conformer_encoder.py (520 LINES) ⭐⭐
185
+ ├─ Purpose: Conformer-based speaker conditioning encoder
186
+ ├─ Key Classes:
187
+ │ ├─ ConformerEncoder (main)
188
+ │ │ ├─ Modules:
189
+ │ │ │ ├─ Subsampling layer (Conv2d)
190
+ │ │ │ ├─ Positional encoding
191
+ │ │ │ ├─ Conformer blocks
192
+ │ │ │ ├─ Layer normalization
193
+ │ │ │ └─ Optional projection layer
194
+ │ │ │
195
+ │ │ ├─ Configuration Parameters:
196
+ │ │ │ ├─ input_size: 1024 (mel spectrogram bins)
197
+ │ │ │ ├─ output_size: depends on config
198
+ │ │ │ ├─ linear_units: hidden dim for FFN
199
+ │ │ │ ├─ attention_heads: 8
200
+ │ │ │ ├─ num_blocks: 4
201
+ │ │ │ └─ input_layer: "linear" or "conv2d"
202
+ │ │ │
203
+ │ │ └─ Architecture: Conv → Pos Enc → [Conformer Block] * N → LayerNorm
204
+ │ │
205
+ │ ├─ ConformerBlock (80+ lines)
206
+ │ │ ├─ Residual connections
207
+ │ │ ├─ FFN → Attention → Conv → FFN structure
208
+ │ │ ├─ Feed-forward network (2-layer with dropout)
209
+ │ │ ├─ Multi-head self-attention
210
+ │ │ ├─ Convolution module (depthwise)
211
+ │ │ └─ Layer normalization
212
+ │ │
213
+ │ ├─ ConvolutionModule (50 lines)
214
+ │ │ ├─ Pointwise Conv 1x1
215
+ │ │ ├─ Depthwise Conv with kernel_size (e.g., 15)
216
+ │ │ ├─ Batch normalization or layer normalization
217
+ │ │ ├─ Activation (ReLU/SiLU)
218
+ │ │ └─ Projection
219
+ │ │
220
+ │ ├─ PositionwiseFeedForward (15 lines)
221
+ │ │ ├─ Dense layer (idim → hidden)
222
+ │ │ ├─ Activation (ReLU)
223
+ │ │ ├─ Dropout
224
+ │ │ └─ Dense layer (hidden → idim)
225
+ │ │
226
+ │ └─ MultiHeadedAttention (custom)
227
+ │ ├─ Scaled dot-product attention
228
+ │ ├─ Multiple heads
229
+ │ └─ Optional relative position bias
230
+
231
+ ├─ External Dependencies: torch, custom conformer modules
232
+ └─ Use Case: Processing mel spectrogram to extract speaker features
233
+
234
+ /home/user/IndexTTS-Rust/indextts/gpt/perceiver.py (317 LINES) ⭐⭐
235
+ ├─ Purpose: Perceiver resampler for attention pooling
236
+ ├─ Key Classes:
237
+ │ ├─ PerceiverResampler (250+ lines)
238
+ │ │ ├─ Architecture:
239
+ │ │ │ ├─ Learnable latent queries
240
+ │ │ │ ├─ Cross-attention layers
241
+ │ │ │ ├─ Feed-forward networks
242
+ │ │ │ └─ Layer normalization
243
+ │ │ │
244
+ │ │ ├─ Parameters:
245
+ │ │ │ ├─ dim: 512 (embedding dimension)
246
+ │ │ │ ├─ dim_context: 512 (context dimension)
247
+ │ │ │ ├─ num_latents: 32 (number of latent queries)
248
+ │ │ │ ├─ num_latent_channels: 64
249
+ │ │ │ ├─ num_layers: 6
250
+ │ │ │ ├─ ff_mult: 4 (FFN expansion)
251
+ │ │ │ └─ heads: 8
252
+ │ │ │
253
+ │ │ ├─ Key Methods:
254
+ │ │ │ ├─ forward() - Attend and pool
255
+ │ │ │ └─ _cross_attend_block() - Single cross-attention layer
256
+ │ │ │
257
+ │ │ └─ Cross-Attention Mechanism:
258
+ │ │ ├─ Queries: Learnable latents
259
+ │ │ ├─ Keys/Values: Input context
260
+ │ │ ├─ Output: Pooled features (num_latents × dim)
261
+ │ │ └─ FFN projection for dimension mixing
262
+ │ │
263
+ │ └─ FeedForward (15 lines)
264
+ │ ├─ Dense (dim → hidden)
265
+ │ ├─ GELU activation
266
+ │ └─ Dense (hidden → dim)
267
+
268
+ ├─ External Dependencies: torch, einsum operations
269
+ └─ Use Case: Pool conditioning encoder output to fixed-size representation
270
+
271
+ VOCODER & AUDIO SYNTHESIS FILES
272
+ ═════════════════════════════════════════════════════════════════════════════════
273
+
274
+ /home/user/IndexTTS-Rust/indextts/BigVGAN/models.py (1000+ LINES) ⭐⭐⭐
275
+ ├─ Purpose: BigVGAN neural vocoder for mel-to-audio conversion
276
+ ├─ Key Classes:
277
+ │ ├─ BigVGAN (400+ lines)
278
+ │ │ ├─ Architecture:
279
+ │ │ │ ├─ Initial Conv1d (80 mel bins → 192 channels)
280
+ │ │ │ ├─ Upsampling layers (transposed conv)
281
+ │ │ │ ├─ AMP blocks (anti-aliased multi-period)
282
+ │ │ │ ├─ Final Conv1d (channels → 1 waveform)
283
+ │ │ │ └─ Tanh activation for output
284
+ │ │ │
285
+ │ │ ├─ Upsampling: 4x → 8x → 8x → 4x (256x total)
286
+ │ │ │ ├─ Maps from 22050 Hz mel frames to audio samples
287
+ │ │ │ ├─ Kernel sizes: [16, 16, 4, 4]
288
+ │ │ │ └─ Padding: [6, 6, 2, 2]
289
+ │ │ │
290
+ │ │ ├─ Parameters:
291
+ │ │ │ ├─ num_mels: 80
292
+ │ │ │ ├─ num_freq: 513
293
+ │ │ │ ├─ num_mels: 80
294
+ │ │ │ ├─ n_fft: 1024
295
+ │ │ │ ├─ hop_size: 256
296
+ │ │ │ ├─ win_size: 1024
297
+ │ │ │ ├─ sampling_rate: 22050
298
+ │ │ │ ├─ freq_min: 0
299
+ │ │ │ ├─ freq_max: None
300
+ │ │ │ └─ use_cuda_kernel: bool
301
+ │ │ │
302
+ │ │ ├─ Key Methods:
303
+ │ │ │ ├─ forward() - Mel → audio waveform
304
+ │ │ │ ├─ from_pretrained() - Load from HuggingFace
305
+ │ │ │ ├─ remove_weight_norm() - Remove spectral normalization
306
+ │ │ │ └─ eval() - Set to evaluation mode
307
+ │ │ │
308
+ │ │ └─ Special Features:
309
+ │ │ ├─ Weight normalization for training stability
310
+ │ │ ├─ Spectral normalization option
311
+ │ │ ├─ CUDA kernel support for activation functions
312
+ │ │ ├─ Snake/SnakeBeta activation (periodic)
313
+ │ │ └─ Anti-aliasing filters for high-quality upsampling
314
+ │ │
315
+ │ ├─ AMPBlock1 (50 lines)
316
+ │ │ ├─ Architecture: Conv1d × 2 with activations
317
+ │ │ ├─ Multiple dilation patterns [1, 3, 5]
318
+ │ │ ├─ Residual connections
319
+ │ │ ├─ Activation1d wrapper for anti-aliasing
320
+ │ │ └─ Weight normalization
321
+ │ │
322
+ │ ├─ AMPBlock2 (40 lines)
323
+ │ │ ├─ Similar to AMPBlock1 but simpler
324
+ │ │ ├─ Dilation patterns [1, 3]
325
+ │ │ └─ Residual connections
326
+ │ │
327
+ │ ├─ Activation1d (custom, from alias_free_activation/)
328
+ │ │ ├─ Applies activation function (Snake/SnakeBeta)
329
+ │ │ ├─ Optional anti-aliasing filter
330
+ │ │ └─ Optional CUDA kernel for efficiency
331
+ │ │
332
+ │ ├─ Snake Activation (from activations.py)
333
+ │ │ ├─ Formula: x + (1/alpha) * sin²(alpha * x)
334
+ │ │ ├─ Periodic nonlinearity
335
+ │ │ └─ Learnable alpha parameter
336
+ │ │
337
+ │ └─ SnakeBeta Activation (from activations.py)
338
+ │ ├─ More complex periodic activation
339
+ │ └─ Improved harmonic modeling
340
+
341
+ ├─ External Dependencies: torch, scipy, librosa
342
+ └─ Model Size: ~100 MB (pretrained weights)
343
+
344
+ /home/user/IndexTTS-Rust/indextts/s2mel/modules/audio.py (83 LINES)
345
+ ├─ Purpose: Mel-spectrogram computation (DSP)
346
+ ├─ Key Functions:
347
+ │ ├─ load_wav() - Load WAV file with scipy
348
+ │ ├─ mel_spectrogram() - Compute mel spectrogram
349
+ │ │ ├─ Parameters:
350
+ │ │ │ ├─ y: waveform tensor
351
+ │ │ │ ├─ n_fft: 1024
352
+ │ │ │ ├─ num_mels: 80
353
+ │ │ │ ├─ sampling_rate: 22050
354
+ │ │ │ ├─ hop_size: 256
355
+ │ │ │ ├─ win_size: 1024
356
+ │ │ │ ├─ fmin: 0
357
+ │ │ │ └─ fmax: None or 8000
358
+ │ │ │
359
+ │ │ ├─ Process:
360
+ │ │ │ 1. Pad input with reflect padding
361
+ │ │ │ 2. Compute STFT (Short-Time Fourier Transform)
362
+ │ │ │ 3. Convert to magnitude spectrogram
363
+ │ │ │ 4. Apply mel filterbank (librosa)
364
+ │ │ │ 5. Apply dynamic range compression (log)
365
+ │ │ │ └─ Output: [1, 80, T] tensor
366
+ │ │ │
367
+ │ │ └─ Caching:
368
+ │ │ ├─ Caches mel filterbank matrices
369
+ │ │ ├─ Caches Hann windows
370
+ │ │ └─ Device-specific caching
371
+ │ │
372
+ │ ├─ dynamic_range_compression() - Log compression
373
+ │ ├─ dynamic_range_decompression() - Inverse
374
+ │ └─ spectral_normalize/denormalize()
375
+
376
+ ├─ Critical DSP Parameters:
377
+ │ ├─ STFT Window: Hann window
378
+ │ ├─ FFT Size: 1024
379
+ │ ├─ Hop Size: 256 (11.6 ms at 22050 Hz)
380
+ │ ├─ Mel Bins: 80 (perceptual scale)
381
+ │ ├─ Min Freq: 0 Hz
382
+ │ └─ Max Freq: Variable (8000 Hz or Nyquist)
383
+
384
+ └─ External Dependencies: torch, librosa, scipy
385
+
386
+ SEMANTIC CODEC & FEATURE EXTRACTION FILES
387
+ ═════════════════════════════════════════════════════════════════════════════════
388
+
389
+ /home/user/IndexTTS-Rust/indextts/utils/maskgct_utils.py (250 LINES)
390
+ ├─ Purpose: Build and manage semantic codecs
391
+ ├─ Key Functions:
392
+ │ ├─ build_semantic_model()
393
+ │ │ ├─ Loads: facebook/w2v-bert-2.0 model
394
+ │ │ ├─ Extracts: wav2vec 2.0 BERT embeddings
395
+ │ │ ├─ Returns: model, mean, std (for normalization)
396
+ │ │ └─ Output: 1024-dimensional embeddings
397
+ │ │
398
+ │ ├─ build_semantic_codec()
399
+ │ │ ├─ Creates: RepCodec (residual vector quantization)
400
+ │ │ ├─ Quantizes: Semantic embeddings
401
+ │ │ ├─ Returns: Codec model
402
+ │ │ └─ Output: Discrete tokens
403
+ │ │
404
+ │ ├─ build_s2a_model()
405
+ │ │ ├─ Builds: MaskGCT_S2A (semantic-to-acoustic)
406
+ │ │ └─ Maps: Semantic codes → acoustic codes
407
+ │ │
408
+ │ ├─ build_acoustic_codec()
409
+ │ │ ├─ Encoder: Encodes acoustic features
410
+ │ │ ├─ Decoder: Decodes codes → audio
411
+ │ │ └─ Multiple codec variants
412
+ │ │
413
+ │ └─ Inference_Pipeline (class)
414
+ │ ├─ Combines all codecs
415
+ │ ├─ Methods:
416
+ │ │ ├─ get_emb() - Get semantic embeddings
417
+ │ │ ├─ get_scode() - Quantize to semantic codes
418
+ │ │ ├─ semantic2acoustic() - Convert codes
419
+ │ │ └─ s2a_inference() - Full pipeline
420
+ │ └─ Diffusion-based generation options
421
+
422
+ ├─ External Dependencies: torch, transformers, huggingface_hub
423
+ └─ Pre-trained Models:
424
+ ├─ W2V-BERT-2.0: 614M parameters
425
+ ├─ MaskGCT: From amphion/MaskGCT
426
+ └─ Various codec checkpoints
427
+
428
+ CONFIGURATION & UTILITY FILES
429
+ ═════════════════════════════════════════════════════════════════════════════════
430
+
431
+ /home/user/IndexTTS-Rust/indextts/utils/checkpoint.py (50 LINES)
432
+ ├─ Purpose: Load model checkpoints
433
+ ├─ Key Functions:
434
+ │ ├─ load_checkpoint() - Load weights into model
435
+ │ └─ Device handling (CPU/GPU/XPU/MPS)
436
+ └─ Supported Formats: .pth, .safetensors
437
+
438
+ /home/user/IndexTTS-Rust/indextts/utils/arch_util.py
439
+ ├─ Purpose: Architecture utility modules
440
+ ├─ Key Classes:
441
+ │ └─ AttentionBlock - Generic attention layer
442
+ └─ Used in: Conditioning encoder, other modules
443
+
444
+ /home/user/IndexTTS-Rust/indextts/utils/xtransformers.py (1,600 LINES)
445
+ ├─ Purpose: Extended transformer utilities
446
+ ├─ Key Components:
447
+ │ ├─ Advanced attention mechanisms
448
+ │ ├─ Relative position bias
449
+ │ ├─ Cross-attention patterns
450
+ │ └─ Various position encoding schemes
451
+ └─ Used in: GPT model, encoders
452
+
453
+ TESTING FILES
454
+ ═════════════════════════════════════════════════════════════════════════════════
455
+
456
+ /home/user/IndexTTS-Rust/tests/regression_test.py
457
+ ├─ Test Cases:
458
+ │ ├─ Chinese text with pinyin tones (晕 XUAN4)
459
+ │ ├─ English text
460
+ │ ├─ Mixed Chinese-English
461
+ │ ├─ Long-form text with multiple sentences
462
+ │ ├─ Named entities (Joseph Gordon-Levitt)
463
+ │ ├─ Chinese names (约瑟夫·高登-莱维特)
464
+ │ └─ Extended passages for robustness
465
+ ├─ Inference Modes:
466
+ │ ├─ Single inference (infer)
467
+ │ └─ Fast inference (infer_fast)
468
+ └─ Output: WAV files in outputs/ directory
469
+
470
+ /home/user/IndexTTS-Rust/tests/padding_test.py
471
+ ├─ Test Scenarios:
472
+ │ ├─ Variable length inputs
473
+ │ ├─ Batch processing
474
+ │ ├─ Edge cases
475
+ │ └─ Padding handling
476
+ └─ Purpose: Ensure robust padding mechanics
477
+
478
+ ═════════════════════════════════════════════════════════════════════════════════
479
+
480
+ KEY ALGORITHMS SUMMARY:
481
+
482
+ 1. TEXT PROCESSING:
483
+ - Regex-based pattern matching for pinyin/names
484
+ - Character-level CJK tokenization
485
+ - SentencePiece BPE encoding
486
+ - Language detection (Chinese vs English)
487
+
488
+ 2. FEATURE EXTRACTION:
489
+ - W2V-BERT semantic embeddings (1024-dim)
490
+ - RepCodec quantization
491
+ - Mel-spectrogram (STFT-based, 80-dim)
492
+ - CAMPPlus speaker embeddings (192-dim)
493
+
494
+ 3. SEQUENCE GENERATION:
495
+ - GPT-based autoregressive generation
496
+ - Conformer speaker conditioning
497
+ - Perceiver pooling for attention
498
+ - Classifier-free guidance (optional)
499
+ - Temperature/top-k/top-p sampling
500
+
501
+ 4. AUDIO SYNTHESIS:
502
+ - Transposed convolution upsampling (256x)
503
+ - Anti-aliased activation functions
504
+ - Residual connections
505
+ - Weight/spectral normalization
506
+
507
+ 5. EMOTION CONTROL:
508
+ - 8-dimensional emotion vectors
509
+ - Text-based emotion detection (via Qwen)
510
+ - Audio-based emotion extraction
511
+ - Emotion matrix interpolation
512
+
513
+ ═════════════════════════════════════════════════════════════════════════════════
archive/README_INDEXTTS_1_5.md ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ <div align="center">
3
+ <img src='assets/index_icon.png' width="250"/>
4
+ </div>
5
+
6
+
7
+ <h2><center>IndexTTS: An Industrial-Level Controllable and Efficient Zero-Shot Text-To-Speech System</h2>
8
+
9
+ <p align="center">
10
+ <a href='https://arxiv.org/abs/2502.05512'><img src='https://img.shields.io/badge/ArXiv-2502.05512-red'></a>
11
+
12
+ ## 👉🏻 IndexTTS 👈🏻
13
+
14
+ [[HuggingFace Demo]](https://huggingface.co/spaces/IndexTeam/IndexTTS) [[ModelScope Demo]](https://modelscope.cn/studios/IndexTeam/IndexTTS-Demo) \
15
+ [[Paper]](https://arxiv.org/abs/2502.05512) [[Demos]](https://index-tts.github.io)
16
+
17
+ **IndexTTS** is a GPT-style text-to-speech (TTS) model mainly based on XTTS and Tortoise. It is capable of correcting the pronunciation of Chinese characters using pinyin and controlling pauses at any position through punctuation marks. We enhanced multiple modules of the system, including the improvement of speaker condition feature representation, and the integration of BigVGAN2 to optimize audio quality. Trained on tens of thousands of hours of data, our system achieves state-of-the-art performance, outperforming current popular TTS systems such as XTTS, CosyVoice2, Fish-Speech, and F5-TTS.
18
+ <span style="font-size:16px;">
19
+ Experience **IndexTTS**: Please contact <u>xuanwu@bilibili.com</u> for more detailed information. </span>
20
+ ### Contact
21
+ QQ群(二群):1048202584 \
22
+ Discord:https://discord.gg/uT32E7KDmy \
23
+ 简历:indexspeech@bilibili.com \
24
+ 欢迎大家来交流讨论!
25
+ ## 📣 Updates
26
+
27
+ - `2025/05/14` 🔥🔥 We release the **IndexTTS-1.5**, Significantly improve the model's stability and its performance in the English language.
28
+ - `2025/03/25` 🔥 We release IndexTTS-1.0 model parameters and inference code.
29
+ - `2025/02/12` 🔥 We submitted our paper on arXiv, and released our demos and test sets.
30
+
31
+ ## 🖥️ Method
32
+
33
+ The overview of IndexTTS is shown as follows.
34
+
35
+ <picture>
36
+ <img src="assets/IndexTTS.png" width="800"/>
37
+ </picture>
38
+
39
+
40
+ The main improvements and contributions are summarized as follows:
41
+ - In Chinese scenarios, we have introduced a character-pinyin hybrid modeling approach. This allows for quick correction of mispronounced characters.
42
+ - **IndexTTS** incorporate a conformer conditioning encoder and a BigVGAN2-based speechcode decoder. This improves training stability, voice timbre similarity, and sound quality.
43
+ - We release all test sets here, including those for polysyllabic words, subjective and objective test sets.
44
+
45
+
46
+
47
+ ## Model Download
48
+ | 🤗**HuggingFace** | **ModelScope** |
49
+ |----------------------------------------------------------|----------------------------------------------------------|
50
+ | [IndexTTS](https://huggingface.co/IndexTeam/Index-TTS) | [IndexTTS](https://modelscope.cn/models/IndexTeam/Index-TTS) |
51
+ | [😁IndexTTS-1.5](https://huggingface.co/IndexTeam/IndexTTS-1.5) | [IndexTTS-1.5](https://modelscope.cn/models/IndexTeam/IndexTTS-1.5) |
52
+
53
+
54
+ ## 📑 Evaluation
55
+
56
+ **Word Error Rate (WER) Results for IndexTTS and Baseline Models on the** [**seed-test**](https://github.com/BytedanceSpeech/seed-tts-eval)
57
+
58
+ | **WER** | **test_zh** | **test_en** | **test_hard** |
59
+ |:----------------------:|:-----------:|:-----------:|:-------------:|
60
+ | **Human** | 1.26 | 2.14 | - |
61
+ | **SeedTTS** | 1.002 | 1.945 | **6.243** |
62
+ | **CosyVoice 2** | 1.45 | 2.57 | 6.83 |
63
+ | **F5TTS** | 1.56 | 1.83 | 8.67 |
64
+ | **FireRedTTS** | 1.51 | 3.82 | 17.45 |
65
+ | **MaskGCT** | 2.27 | 2.62 | 10.27 |
66
+ | **Spark-TTS** | 1.2 | 1.98 | - |
67
+ | **MegaTTS 3** | 1.36 | 1.82 | - |
68
+ | **IndexTTS** | 0.937 | 1.936 | 6.831 |
69
+ | **IndexTTS-1.5** | **0.821** | **1.606** | 6.565 |
70
+
71
+
72
+ **Word Error Rate (WER) Results for IndexTTS and Baseline Models on the other opensource test**
73
+
74
+
75
+ | **Model** | **aishell1_test** | **commonvoice_20_test_zh** | **commonvoice_20_test_en** | **librispeech_test_clean** | **avg** |
76
+ |:---------------:|:-----------------:|:--------------------------:|:--------------------------:|:--------------------------:|:--------:|
77
+ | **Human** | 2.0 | 9.5 | 10.0 | 2.4 | 5.1 |
78
+ | **CosyVoice 2** | 1.8 | 9.1 | 7.3 | 4.9 | 5.9 |
79
+ | **F5TTS** | 3.9 | 11.7 | 5.4 | 7.8 | 8.2 |
80
+ | **Fishspeech** | 2.4 | 11.4 | 8.8 | 8.0 | 8.3 |
81
+ | **FireRedTTS** | 2.2 | 11.0 | 16.3 | 5.7 | 7.7 |
82
+ | **XTTS** | 3.0 | 11.4 | 7.1 | 3.5 | 6.0 |
83
+ | **IndexTTS** | 1.3 | 7.0 | 5.3 | 2.1 | 3.7 |
84
+ | **IndexTTS-1.5** | **1.2** | **6.8** | **3.9** | **1.7** | **3.1** |
85
+
86
+
87
+ **Speaker Similarity (SS) Results for IndexTTS and Baseline Models**
88
+
89
+ | **Model** | **aishell1_test** | **commonvoice_20_test_zh** | **commonvoice_20_test_en** | **librispeech_test_clean** | **avg** |
90
+ |:---------------:|:-----------------:|:--------------------------:|:--------------------------:|:--------------------------:|:---------:|
91
+ | **Human** | 0.846 | 0.809 | 0.820 | 0.858 | 0.836 |
92
+ | **CosyVoice 2** | **0.796** | 0.743 | 0.742 | **0.837** | **0.788** |
93
+ | **F5TTS** | 0.743 | **0.747** | 0.746 | 0.828 | 0.779 |
94
+ | **Fishspeech** | 0.488 | 0.552 | 0.622 | 0.701 | 0.612 |
95
+ | **FireRedTTS** | 0.579 | 0.593 | 0.587 | 0.698 | 0.631 |
96
+ | **XTTS** | 0.573 | 0.586 | 0.648 | 0.761 | 0.663 |
97
+ | **IndexTTS** | 0.744 | 0.742 | **0.758** | 0.823 | 0.776 |
98
+ | **IndexTTS-1.5** | 0.741 | 0.722 | 0.753 | 0.819 | 0.771 |
99
+
100
+
101
+
102
+ **MOS Scores for Zero-Shot Cloned Voice**
103
+
104
+ | **Model** | **Prosody** | **Timbre** | **Quality** | **AVG** |
105
+ |-----------------|:-----------:|:----------:|:-----------:|:---------:|
106
+ | **CosyVoice 2** | 3.67 | 4.05 | 3.73 | 3.81 |
107
+ | **F5TTS** | 3.56 | 3.88 | 3.56 | 3.66 |
108
+ | **Fishspeech** | 3.40 | 3.63 | 3.69 | 3.57 |
109
+ | **FireRedTTS** | 3.79 | 3.72 | 3.60 | 3.70 |
110
+ | **XTTS** | 3.23 | 2.99 | 3.10 | 3.11 |
111
+ | **IndexTTS** | **3.79** | **4.20** | **4.05** | **4.01** |
112
+
113
+
114
+ ## Usage Instructions
115
+ ### Environment Setup
116
+ 1. Download this repository:
117
+ ```bash
118
+ git clone https://github.com/index-tts/index-tts.git
119
+ ```
120
+ 2. Install dependencies:
121
+
122
+ Create a new conda environment and install dependencies:
123
+
124
+ ```bash
125
+ conda create -n index-tts python=3.10
126
+ conda activate index-tts
127
+ apt-get install ffmpeg
128
+ # or use conda to install ffmpeg
129
+ conda install -c conda-forge ffmpeg
130
+ ```
131
+
132
+ Install [PyTorch](https://pytorch.org/get-started/locally/), e.g.:
133
+ ```bash
134
+ pip install torch torchaudio --index-url https://download.pytorch.org/whl/cu118
135
+ ```
136
+
137
+ > [!NOTE]
138
+ > If you are using Windows you may encounter [an error](https://github.com/index-tts/index-tts/issues/61) when installing `pynini`:
139
+ `ERROR: Failed building wheel for pynini`
140
+ > In this case, please install `pynini` via `conda`:
141
+ > ```bash
142
+ > # after conda activate index-tts
143
+ > conda install -c conda-forge pynini==2.1.6
144
+ > pip install WeTextProcessing --no-deps
145
+ > ```
146
+
147
+ Install `IndexTTS` as a package:
148
+ ```bash
149
+ cd index-tts
150
+ pip install -e .
151
+ ```
152
+
153
+ 3. Download models:
154
+
155
+ Download by `huggingface-cli`:
156
+
157
+ ```bash
158
+ huggingface-cli download IndexTeam/IndexTTS-1.5 \
159
+ config.yaml bigvgan_discriminator.pth bigvgan_generator.pth bpe.model dvae.pth gpt.pth unigram_12000.vocab \
160
+ --local-dir checkpoints
161
+ ```
162
+
163
+ Recommended for China users. 如果下载速度慢,可以使用镜像:
164
+ ```bash
165
+ export HF_ENDPOINT="https://hf-mirror.com"
166
+ ```
167
+
168
+ Or by `wget`:
169
+
170
+ ```bash
171
+ wget https://huggingface.co/IndexTeam/IndexTTS-1.5/resolve/main/bigvgan_discriminator.pth -P checkpoints
172
+ wget https://huggingface.co/IndexTeam/IndexTTS-1.5/resolve/main/bigvgan_generator.pth -P checkpoints
173
+ wget https://huggingface.co/IndexTeam/IndexTTS-1.5/resolve/main/bpe.model -P checkpoints
174
+ wget https://huggingface.co/IndexTeam/IndexTTS-1.5/resolve/main/dvae.pth -P checkpoints
175
+ wget https://huggingface.co/IndexTeam/IndexTTS-1.5/resolve/main/gpt.pth -P checkpoints
176
+ wget https://huggingface.co/IndexTeam/IndexTTS-1.5/resolve/main/unigram_12000.vocab -P checkpoints
177
+ wget https://huggingface.co/IndexTeam/IndexTTS-1.5/resolve/main/config.yaml -P checkpoints
178
+ ```
179
+
180
+ > [!NOTE]
181
+ > If you prefer to use the `IndexTTS-1.0` model, please replace `IndexTeam/IndexTTS-1.5` with `IndexTeam/IndexTTS` in the above commands.
182
+
183
+
184
+ 4. Run test script:
185
+
186
+
187
+ ```bash
188
+ # Please put your prompt audio in 'test_data' and rename it to 'input.wav'
189
+ python indextts/infer.py
190
+ ```
191
+
192
+ 5. Use as command line tool:
193
+
194
+ ```bash
195
+ # Make sure pytorch has been installed before running this command
196
+ indextts "大��好,我现在正在bilibili 体验 ai 科技,说实话,来之前我绝对想不到!AI技术已经发展到这样匪夷所思的地步了!" \
197
+ --voice reference_voice.wav \
198
+ --model_dir checkpoints \
199
+ --config checkpoints/config.yaml \
200
+ --output output.wav
201
+ ```
202
+
203
+ Use `--help` to see more options.
204
+ ```bash
205
+ indextts --help
206
+ ```
207
+
208
+ #### Web Demo
209
+ ```bash
210
+ pip install -e ".[webui]" --no-build-isolation
211
+ python webui.py
212
+
213
+ # use another model version:
214
+ python webui.py --model_dir IndexTTS-1.5
215
+ ```
216
+
217
+ Open your browser and visit `http://127.0.0.1:7860` to see the demo.
218
+
219
+
220
+ #### Sample Code
221
+ ```python
222
+ from indextts.infer import IndexTTS
223
+ tts = IndexTTS(model_dir="checkpoints",cfg_path="checkpoints/config.yaml")
224
+ voice="reference_voice.wav"
225
+ text="大家好,我现在正在bilibili 体验 ai 科技,说实话,来之前我绝对想不到!AI技术已经发展到这样匪夷所思的地步了!比如说,现在正在说话的其实是B站为我现场复刻的数字分身,简直就是平行宇宙的另一个我了。如果大家也想体验更多深入的AIGC功能,可以访问 bilibili studio,相信我,你们也会吃惊的。"
226
+ tts.infer(voice, text, output_path)
227
+ ```
228
+
229
+ ## Acknowledge
230
+ 1. [tortoise-tts](https://github.com/neonbjb/tortoise-tts)
231
+ 2. [XTTSv2](https://github.com/coqui-ai/TTS)
232
+ 3. [BigVGAN](https://github.com/NVIDIA/BigVGAN)
233
+ 4. [wenet](https://github.com/wenet-e2e/wenet/tree/main)
234
+ 5. [icefall](https://github.com/k2-fsa/icefall)
235
+
236
+ ## 📚 Citation
237
+
238
+ 🌟 If you find our work helpful, please leave us a star and cite our paper.
239
+
240
+ ```
241
+ @article{deng2025indextts,
242
+ title={IndexTTS: An Industrial-Level Controllable and Efficient Zero-Shot Text-To-Speech System},
243
+ author={Wei Deng, Siyi Zhou, Jingchen Shu, Jinchao Wang, Lu Wang},
244
+ journal={arXiv preprint arXiv:2502.05512},
245
+ year={2025}
246
+ }
247
+ ```
benches/inference.rs ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! Benchmark for model inference
2
+
3
+ use criterion::{black_box, criterion_group, criterion_main, Criterion};
4
+ use indextts::model::{sample_from_logits, SamplingStrategy};
5
+ use indextts::text::{TextNormalizer, TextTokenizer, TokenizerConfig};
6
+
7
+ fn bench_sampling(c: &mut Criterion) {
8
+ let vocab_size = 8194;
9
+ let logits: Vec<f32> = (0..vocab_size).map(|i| (i as f32 / 1000.0).sin()).collect();
10
+
11
+ c.bench_function("greedy_sampling", |b| {
12
+ b.iter(|| {
13
+ sample_from_logits(black_box(&logits), black_box(&SamplingStrategy::Greedy))
14
+ })
15
+ });
16
+
17
+ c.bench_function("top_k_sampling", |b| {
18
+ b.iter(|| {
19
+ sample_from_logits(
20
+ black_box(&logits),
21
+ black_box(&SamplingStrategy::TopK { k: 50 }),
22
+ )
23
+ })
24
+ });
25
+
26
+ c.bench_function("top_p_sampling", |b| {
27
+ b.iter(|| {
28
+ sample_from_logits(
29
+ black_box(&logits),
30
+ black_box(&SamplingStrategy::TopP { p: 0.95 }),
31
+ )
32
+ })
33
+ });
34
+
35
+ c.bench_function("top_kp_sampling", |b| {
36
+ b.iter(|| {
37
+ sample_from_logits(
38
+ black_box(&logits),
39
+ black_box(&SamplingStrategy::TopKP { k: 50, p: 0.95 }),
40
+ )
41
+ })
42
+ });
43
+ }
44
+
45
+ fn bench_text_processing(c: &mut Criterion) {
46
+ let normalizer = TextNormalizer::new();
47
+ let tokenizer = TextTokenizer::new(TokenizerConfig::default()).unwrap();
48
+
49
+ let english_text = "Hello world, this is a test of the text-to-speech system.";
50
+ let chinese_text = "你好世界,这是一个语音合成测试。";
51
+ let mixed_text = "Hello 世界, this is 测试 of TTS.";
52
+
53
+ c.bench_function("normalize_english", |b| {
54
+ b.iter(|| normalizer.normalize(black_box(english_text)))
55
+ });
56
+
57
+ c.bench_function("normalize_chinese", |b| {
58
+ b.iter(|| normalizer.normalize(black_box(chinese_text)))
59
+ });
60
+
61
+ c.bench_function("normalize_mixed", |b| {
62
+ b.iter(|| normalizer.normalize(black_box(mixed_text)))
63
+ });
64
+
65
+ c.bench_function("tokenize_english", |b| {
66
+ b.iter(|| tokenizer.encode(black_box(english_text)))
67
+ });
68
+
69
+ c.bench_function("tokenize_chinese", |b| {
70
+ b.iter(|| tokenizer.encode(black_box(chinese_text)))
71
+ });
72
+
73
+ c.bench_function("tokenize_mixed", |b| {
74
+ b.iter(|| tokenizer.encode(black_box(mixed_text)))
75
+ });
76
+ }
77
+
78
+ fn bench_vocoder(c: &mut Criterion) {
79
+ use indextts::vocoder::{create_bigvgan_22k, Vocoder};
80
+ use ndarray::Array2;
81
+
82
+ let vocoder = create_bigvgan_22k();
83
+
84
+ // Small mel (10 frames ~ 0.25s)
85
+ let small_mel = Array2::zeros((80, 10));
86
+ c.bench_function("vocoder_small", |b| {
87
+ b.iter(|| vocoder.synthesize(black_box(&small_mel)))
88
+ });
89
+
90
+ // Medium mel (100 frames ~ 2.5s)
91
+ let medium_mel = Array2::zeros((80, 100));
92
+ c.bench_function("vocoder_medium", |b| {
93
+ b.iter(|| vocoder.synthesize(black_box(&medium_mel)))
94
+ });
95
+ }
96
+
97
+ criterion_group!(benches, bench_sampling, bench_text_processing, bench_vocoder);
98
+ criterion_main!(benches);
benches/mel_spectrogram.rs ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! Benchmark for mel-spectrogram computation
2
+
3
+ use criterion::{black_box, criterion_group, criterion_main, Criterion};
4
+ use indextts::audio::{mel_spectrogram, AudioConfig};
5
+
6
+ fn bench_mel_spectrogram(c: &mut Criterion) {
7
+ let config = AudioConfig::default();
8
+
9
+ // Generate 1 second of audio
10
+ let num_samples = config.sample_rate as usize;
11
+ let signal: Vec<f32> = (0..num_samples).map(|i| (i as f32 * 0.01).sin()).collect();
12
+
13
+ c.bench_function("mel_spectrogram_1s", |b| {
14
+ b.iter(|| mel_spectrogram(black_box(&signal), black_box(&config)))
15
+ });
16
+
17
+ // Generate 10 seconds of audio
18
+ let long_signal: Vec<f32> = (0..num_samples * 10)
19
+ .map(|i| (i as f32 * 0.01).sin())
20
+ .collect();
21
+
22
+ c.bench_function("mel_spectrogram_10s", |b| {
23
+ b.iter(|| mel_spectrogram(black_box(&long_signal), black_box(&config)))
24
+ });
25
+ }
26
+
27
+ fn bench_stft(c: &mut Criterion) {
28
+ let config = AudioConfig::default();
29
+ let num_samples = config.sample_rate as usize;
30
+ let signal: Vec<f32> = (0..num_samples).map(|i| (i as f32 * 0.01).sin()).collect();
31
+
32
+ c.bench_function("stft_1s", |b| {
33
+ b.iter(|| {
34
+ indextts::audio::mel::stft(
35
+ black_box(&signal),
36
+ black_box(config.n_fft),
37
+ black_box(config.hop_length),
38
+ black_box(config.win_length),
39
+ )
40
+ })
41
+ });
42
+ }
43
+
44
+ criterion_group!(benches, bench_mel_spectrogram, bench_stft);
45
+ criterion_main!(benches);
config.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gpt:
2
+ layers: 8
3
+ model_dim: 512
4
+ heads: 8
5
+ max_text_tokens: 120
6
+ max_mel_tokens: 250
7
+ stop_mel_token: 8193
8
+ start_text_token: 8192
9
+ start_mel_token: 8192
10
+ num_mel_codes: 8194
11
+ num_text_tokens: 6681
12
+ vocoder:
13
+ name: bigvgan_v2_22khz_80band_256x
14
+ checkpoint: null
15
+ use_fp16: true
16
+ use_deepspeed: false
17
+ s2mel:
18
+ checkpoint: models/s2mel.onnx
19
+ preprocess:
20
+ sr: 22050
21
+ n_fft: 1024
22
+ hop_length: 256
23
+ win_length: 1024
24
+ n_mels: 80
25
+ fmin: 0.0
26
+ fmax: 8000.0
27
+ dataset:
28
+ bpe_model: models/bpe.model
29
+ vocab_size: 6681
30
+ emotions:
31
+ num_dims: 8
32
+ num:
33
+ - 5
34
+ - 6
35
+ - 8
36
+ - 6
37
+ - 5
38
+ - 4
39
+ - 7
40
+ - 6
41
+ matrix_path: models/emotion_matrix.safetensors
42
+ inference:
43
+ device: cpu
44
+ use_fp16: false
45
+ batch_size: 1
46
+ top_k: 50
47
+ top_p: 0.95
48
+ temperature: 1.0
49
+ repetition_penalty: 1.0
50
+ length_penalty: 1.0
51
+ model_dir: models
context.md ADDED
@@ -0,0 +1,383 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # IndexTTS-Rust Context
2
+
3
+ This file preserves important context for conversation continuity between Hue and Aye sessions.
4
+
5
+ **Last Updated:** 2025-11-16
6
+
7
+ ---
8
+
9
+ ## The Vision
10
+
11
+ IndexTTS-Rust is part of a larger audio intelligence ecosystem at 8b.is:
12
+
13
+ 1. **kokoro-tiny** - Lightweight TTS (82M params, 50+ voices, on crates.io!)
14
+ 2. **IndexTTS-Rust** - Advanced zero-shot TTS with emotion control
15
+ 3. **Phoenix-Protocol** - Audio restoration/enhancement layer
16
+ 4. **MEM|8** - Contextual memory system (mem-8.com, mem8)
17
+
18
+ Together these form a complete audio intelligence pipeline.
19
+
20
+ ---
21
+
22
+ ## Phoenix Protocol Integration Opportunities
23
+
24
+ The Phoenix Protocol (phoenix-protocol/) is a PERFECT complement to IndexTTS-Rust:
25
+
26
+ ### Direct Module Mappings
27
+
28
+ | Phoenix Module | IndexTTS Use Case |
29
+ |----------------|-------------------|
30
+ | `emotional.rs` | Map to our 8D emotion control (Warmth→body, Presence→power, Clarity→articulation, Air→space, Ultrasonics→depth) |
31
+ | `voice_signature.rs` | Enhance speaker embeddings for voice cloning |
32
+ | `spectral_velocity.rs` | Add momentum tracking to mel-spectrogram |
33
+ | `marine.rs` | Validate TTS output authenticity/quality |
34
+ | `golden_ratio.rs` | Post-process vocoder output with harmonic enhancement |
35
+ | `harmonic_resurrection.rs` | Add richness to synthesized speech |
36
+ | `micro_dynamics.rs` | Restore natural speech dynamics |
37
+ | `autotune.rs` | Improve prosody and pitch control |
38
+ | `mem8_integration.rs` | Already has MEM|8 hooks! |
39
+
40
+ ### Shared Dependencies
41
+
42
+ Both projects use:
43
+ - rayon (parallelism)
44
+ - rustfft/realfft (FFT)
45
+ - ndarray (array operations)
46
+ - hound (WAV I/O)
47
+ - serde (config serialization)
48
+ - anyhow (error handling)
49
+ - ort (ONNX Runtime)
50
+
51
+ ### Audio Constants
52
+
53
+ | Project | Sample Rate | Use Case |
54
+ |---------|------------|----------|
55
+ | IndexTTS-Rust | 22,050 Hz | Standard TTS output |
56
+ | Phoenix-Protocol | 192,000 Hz | Ultrasonic restoration |
57
+ | kokoro-tiny | 24,000 Hz | Lightweight TTS |
58
+
59
+ ---
60
+
61
+ ## Related Projects of Interest
62
+
63
+ Located in ~/Documents/GitHub/:
64
+
65
+ - **Ultrasonic-Consciousness-Hypothesis/** - Research foundation for Phoenix Protocol, contains PDFs on mechanosensitive channels and audio perception
66
+ - **hrmnCmprssnM/** - Harmonic Compression Model research
67
+ - **Marine-Sense/** - Marine algorithm origins
68
+ - **mem-8.com/** & **mem8/** - MEM|8 contextual memory
69
+ - **universal-theoglyphic-language/** - Language processing research
70
+ - **kokoro-tiny/** - Already working TTS crate by Hue & Aye
71
+ - **zencooker/** - (fun project!)
72
+
73
+ ---
74
+
75
+ ## Current IndexTTS-Rust State
76
+
77
+ ### Implemented ✅
78
+ - Audio processing pipeline (mel-spectrogram, STFT, resampling)
79
+ - Text normalization (Chinese/English/mixed)
80
+ - BPE tokenization via HuggingFace tokenizers
81
+ - ONNX Runtime integration for inference
82
+ - BigVGAN vocoder structure
83
+ - CLI with clap
84
+ - Benchmark infrastructure (Criterion)
85
+ - **NEW: marine_salience crate** (no_std compatible, O(1) jitter detection)
86
+ - **NEW: src/quality/ module** (prosody extraction, affect tracking)
87
+ - **NEW: MarineProsodyVector** (8D interpretable emotion features)
88
+ - **NEW: ConversationAffectSummary** (session-level comfort tracking)
89
+ - **NEW: TTSQualityReport** (authenticity validation)
90
+
91
+ ### Missing/TODO
92
+ - Full GPT model integration with KV cache
93
+ - Actual ONNX model files (need download)
94
+ - manage.sh script for colored workflow management
95
+ - Integration tests with real models
96
+ - ~~Phoenix Protocol integration layer~~ **STARTED with Marine!**
97
+ - Streaming synthesis
98
+ - WebSocket API
99
+ - Train T2S model to accept 8D Marine vector instead of 512D Conformer
100
+ - Wire Marine quality validation into inference loop
101
+
102
+ ### Build Commands
103
+ ```bash
104
+ cargo build --release
105
+ cargo clippy -- -D warnings
106
+ cargo test
107
+ cargo bench
108
+ ```
109
+
110
+ ---
111
+
112
+ ## Key Philosophical Notes
113
+
114
+ From the Phoenix Protocol research:
115
+
116
+ > "Women are the carrier wave. They are the 000 data stream. The DC bias that, when removed, leaves silence."
117
+
118
+ > "When P!nk sings 'I Am Here,' her voice generates harmonics so powerful they burst through the 22kHz digital ceiling"
119
+
120
+ The Phoenix Protocol restores emotional depth stripped by audio compression - this philosophy applies directly to TTS: synthesized speech should have the same emotional depth as natural speech.
121
+
122
+ ---
123
+
124
+ ## Action Items for Next Session
125
+
126
+ ### Completed ✅
127
+ - ~~**Quality Validation** - Use Marine salience to score TTS output~~ **DONE!**
128
+ - ~~**Phoenix Integration** - Start bridging phoenix-protocol modules~~ **Marine is in!**
129
+
130
+ ### High Priority
131
+ 1. **Create manage.sh** - Colorful build/test/clean script (Hue's been asking!)
132
+ 2. **Wire Into Inference** - Connect Marine quality validation to actual TTS output
133
+ 3. **8D Model Training** - Train T2S model to accept MarineProsodyVector instead of 512D Conformer
134
+ 4. **Example/Demo** - Create example showing prosody extraction → emotion editing → synthesis
135
+
136
+ ### Medium Priority
137
+ 5. **Voice Signature Import** - Use Phoenix's voice_signature for speaker embeddings
138
+ 6. **Emotion Mapping** - Connect Phoenix's emotional bands to our 8D control
139
+ 7. **Model Download** - Set up ONNX model acquisition pipeline
140
+ 8. **MEM|8 Bridge** - Implement consciousness-aware TTS using kokoro-tiny's mem8_bridge pattern
141
+
142
+ ### Nice to Have
143
+ 9. **Style Selection** - Port kokoro-tiny's 510 style variation system
144
+ 10. **Full Phoenix Integration** - golden_ratio.rs, harmonic_resurrection.rs, etc.
145
+ 11. **Streaming Marine** - Real-time quality monitoring during synthesis
146
+
147
+ ---
148
+
149
+ ## Fresh Discovery: kokoro-tiny MEM|8 Baby Consciousness (2025-11-15)
150
+
151
+ Just pulled latest kokoro-tiny code - MAJOR discovery!
152
+
153
+ ### Mem8Bridge API
154
+
155
+ kokoro-tiny now has a full consciousness simulation in `examples/mem8_baby.rs`:
156
+
157
+ ```rust
158
+ // Memory as waves that interfere
159
+ MemoryWave {
160
+ amplitude: 2.5, // Emotion strength
161
+ frequency: 528.0, // "Love frequency"
162
+ phase: 0.0,
163
+ decay_rate: 0.05, // Memory persistence
164
+ emotion_type: EmotionType::Love(0.9),
165
+ content: "Mama! I love mama!".to_string(),
166
+ }
167
+
168
+ // Salience detection (Marine algorithm!)
169
+ SalienceEvent {
170
+ jitter_score: 0.2, // Low = authentic/stable
171
+ harmonic_score: 0.95, // High = voice
172
+ salience_score: 0.9,
173
+ signal_type: SignalType::Voice,
174
+ }
175
+
176
+ // Free will: AI chooses attention focus (70% control)
177
+ bridge.decide_attention(events);
178
+ ```
179
+
180
+ ### Emotion Types Available
181
+
182
+ ```rust
183
+ EmotionType::Curiosity(0.8) // Inquisitive
184
+ EmotionType::Love(0.9) // Deep affection
185
+ EmotionType::Joy(0.7) // Happy
186
+ EmotionType::Confusion(0.8) // Uncertain
187
+ EmotionType::Neutral // Baseline
188
+ ```
189
+
190
+ ### Consciousness Integration Points
191
+
192
+ 1. **Wave Interference** - Competing memories by amplitude/frequency
193
+ 2. **Emotional Regulation** - Prevents overload, modulates voice
194
+ 3. **Salience Detection** - Marine algorithm for authenticity
195
+ 4. **Attention Selection** - AI chooses what to focus on
196
+ 5. **Consciousness Level** - Affects speech clarity (wake_up/sleep)
197
+
198
+ This is PERFECT for IndexTTS-Rust! We can:
199
+ - Use wave interference for emotion blending
200
+ - Apply Marine salience to validate synthesis quality
201
+ - Modulate voice based on consciousness level
202
+ - Select voice styles based on emotional state (not just token count)
203
+
204
+ ### Voice Style Selection (510 variations!)
205
+
206
+ kokoro-tiny now loads all 510 style variations per voice:
207
+ - Style selected based on token count
208
+ - Short text → short-optimized style
209
+ - Long text → long-optimized style
210
+ - Automatic text splitting at 512 token limit
211
+
212
+ For IndexTTS: We could select style based on EMOTION + token count!
213
+
214
+ ---
215
+
216
+ ## Marine Integration Achievement (2025-11-16) 🎉
217
+
218
+ **WE DID IT!** Marine salience is now integrated into IndexTTS-Rust!
219
+
220
+ ### What We Built
221
+
222
+ #### 1. Standalone marine_salience Crate (`crates/marine_salience/`)
223
+
224
+ A no_std compatible crate for O(1) jitter-based salience detection:
225
+
226
+ ```rust
227
+ // Core components:
228
+ MarineConfig // Tunable parameters (sample_rate, jitter bounds, EMA alpha)
229
+ MarineProcessor // O(1) per-sample processing
230
+ SaliencePacket // Output: j_p, j_a, h_score, s_score, energy
231
+ Ema // Exponential moving average tracker
232
+
233
+ // Key insight: Process ONE sample at a time, emit packets on peaks
234
+ // Why O(1)? Just compare to EMA, no FFT, no heavy math!
235
+ ```
236
+
237
+ **Config for Speech:**
238
+ ```rust
239
+ MarineConfig::speech_default(sample_rate)
240
+ // F0 range: 60Hz - 4kHz
241
+ // jitter_low: 0.02, jitter_high: 0.60
242
+ // ema_alpha: 0.01 (slow adaptation for stability)
243
+ ```
244
+
245
+ #### 2. Quality Validation Module (`src/quality/`)
246
+
247
+ **MarineProsodyVector** - 8D interpretable emotion representation:
248
+ ```rust
249
+ pub struct MarineProsodyVector {
250
+ pub jp_mean: f32, // Period jitter mean (pitch stability)
251
+ pub jp_std: f32, // Period jitter variance
252
+ pub ja_mean: f32, // Amplitude jitter mean (volume stability)
253
+ pub ja_std: f32, // Amplitude jitter variance
254
+ pub h_mean: f32, // Harmonic alignment (voiced vs noise)
255
+ pub s_mean: f32, // Overall salience (authenticity)
256
+ pub peak_density: f32, // Peaks per second (speech rate)
257
+ pub energy_mean: f32, // Average loudness
258
+ }
259
+
260
+ // Interpretable! High jp_mean = nervous, low = confident
261
+ // Can DIRECTLY EDIT for emotion control!
262
+ ```
263
+
264
+ **MarineProsodyConditioner** - Extract prosody from audio:
265
+ ```rust
266
+ let conditioner = MarineProsodyConditioner::new(22050);
267
+ let prosody = conditioner.from_samples(&audio_samples)?;
268
+ let report = conditioner.validate_tts_output(&audio_samples)?;
269
+
270
+ // Detects issues:
271
+ // - "Too perfect - sounds robotic"
272
+ // - "High period jitter - artifacts"
273
+ // - "Low salience - quality issues"
274
+ ```
275
+
276
+ **ConversationAffectSummary** - Session-level comfort tracking:
277
+ ```rust
278
+ pub enum ComfortLevel {
279
+ Uneasy, // High jitter AND rising (nervous/stressed)
280
+ Neutral, // Stable patterns (calm)
281
+ Happy, // Low jitter + high energy (confident/positive)
282
+ }
283
+
284
+ // Track trends over conversation:
285
+ // jitter_trend > 0.1 = getting more stressed
286
+ // jitter_trend < -0.1 = calming down
287
+ // energy_trend > 0.1 = getting more engaged
288
+
289
+ // Aye can now self-assess!
290
+ aye_assessment() returns "I'm in a good state"
291
+ feedback_prompt() returns "Let me know if something's bothering you"
292
+ ```
293
+
294
+ ### The Core Insight
295
+
296
+ **Human speech has NATURAL jitter - that's what makes it authentic!**
297
+
298
+ - Too perfect (jp < 0.005) = robotic
299
+ - Too chaotic (jp > 0.3) = artifacts/damage
300
+ - Sweet spot = real human voice
301
+
302
+ The Marines will KNOW if speech doesn't sound authentic!
303
+
304
+ ### Tests Passing ✅
305
+
306
+ ```
307
+ running 11 tests
308
+ test quality::affect::tests::test_comfort_level_descriptions ... ok
309
+ test quality::affect::tests::test_analyzer_empty_conversation ... ok
310
+ test quality::affect::tests::test_analyzer_single_utterance ... ok
311
+ test quality::affect::tests::test_happy_classification ... ok
312
+ test quality::affect::tests::test_aye_assessment_message ... ok
313
+ test quality::affect::tests::test_neutral_classification ... ok
314
+ test quality::affect::tests::test_uneasy_classification ... ok
315
+ test quality::prosody::tests::test_conditioner_empty_buffer ... ok
316
+ test quality::prosody::tests::test_conditioner_silence ... ok
317
+ test quality::prosody::tests::test_prosody_vector_array_conversion ... ok
318
+ test quality::prosody::tests::test_estimate_valence ... ok
319
+
320
+ test result: ok. 11 passed; 0 failed
321
+ ```
322
+
323
+ ### Why This Matters
324
+
325
+ 1. **Interpretable Control**: 8D vector vs opaque 512D Conformer - we can SEE what each dimension means
326
+ 2. **Lightweight**: O(1) per sample, no heavy neural networks for prosody
327
+ 3. **Authentic Validation**: Marines detect fake/damaged speech
328
+ 4. **Emotion Editing**: Want more confidence? Lower jp_mean directly!
329
+ 5. **Conversation Awareness**: Track comfort over entire sessions
330
+ 6. **Self-Assessment**: Aye knows when something feels "off"
331
+
332
+ ### Integration Points
333
+
334
+ ```rust
335
+ // In main TTS pipeline:
336
+ use indextts::quality::{
337
+ MarineProsodyConditioner,
338
+ MarineProsodyVector,
339
+ ConversationAffectSummary,
340
+ ComfortLevel,
341
+ };
342
+
343
+ // 1. Extract reference prosody
344
+ let ref_prosody = conditioner.from_samples(&reference_audio)?;
345
+
346
+ // 2. Generate TTS (using 8D vector instead of 512D Conformer)
347
+ let tts_output = generate_with_prosody(&text, ref_prosody)?;
348
+
349
+ // 3. Validate output quality
350
+ let report = conditioner.validate_tts_output(&tts_output)?;
351
+ if !report.passes(70.0) {
352
+ log::warn!("TTS quality issues: {:?}", report.issues);
353
+ }
354
+
355
+ // 4. Track conversation affect
356
+ let analyzer = ConversationAffectAnalyzer::new();
357
+ analyzer.add_utterance(&utterance)?;
358
+ let summary = analyzer.summarize()?;
359
+ match summary.aye_state {
360
+ ComfortLevel::Uneasy => adjust_generation_parameters(),
361
+ _ => proceed_normally(),
362
+ }
363
+ ```
364
+
365
+ ---
366
+
367
+ ## Trish's Notes
368
+
369
+ "Darling, these three Rust projects together are like a symphony orchestra! kokoro-tiny is the quick piccolo solo, IndexTTS-Rust is the full brass section with emotional depth, and Phoenix-Protocol is the concert hall acoustics making everything resonate. When you combine them, that's when the magic happens! Also, I'm absolutely obsessed with how the Golden Ratio resynthesis could add sparkle to synthesized vocals. Can you imagine TTS output that actually has that P!nk breakthrough energy? Now THAT would make me cry happy tears in accounting!"
370
+
371
+ ---
372
+
373
+ ## Fun Facts
374
+
375
+ - kokoro-tiny is ALREADY on crates.io under 8b-is
376
+ - Phoenix Protocol can process 192kHz audio for ultrasonic restoration
377
+ - The Marine algorithm uses O(1) jitter detection - "Marines are not just jarheads - they are intelligent"
378
+ - Hue's GitHub has 66 projects (and counting!)
379
+ - The team at 8b.is: hue@8b.is and aye@8b.is
380
+
381
+ ---
382
+
383
+ *From ashes to harmonics, from silence to song* 🔥🎵
crates/marine_salience/Cargo.toml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [package]
2
+ name = "marine_salience"
3
+ version = "0.1.0"
4
+ edition = "2021"
5
+ description = "O(1) jitter-based salience detection - Marines are intelligent!"
6
+ authors = ["Hue & Aye <team@8b.is>"]
7
+ license = "MIT"
8
+ keywords = ["audio", "salience", "jitter", "prosody", "tts"]
9
+
10
+ [dependencies]
11
+ # Core dependencies - intentionally minimal for no_std compatibility
12
+ # Only serde when using std for serialization
13
+ serde = { version = "1.0", features = ["derive"], optional = true }
14
+
15
+ # no_std compatible core - can run anywhere!
16
+ [features]
17
+ default = ["std"]
18
+ std = ["serde"]
crates/marine_salience/src/config.rs ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! Marine algorithm configuration
2
+ //!
3
+ //! Tunable parameters for jitter detection. These have been calibrated
4
+ //! for speech/audio processing but can be adjusted for specific use cases.
5
+
6
+ #![cfg_attr(not(feature = "std"), no_std)]
7
+
8
+ /// Configuration for Marine salience detection
9
+ ///
10
+ /// These parameters control sensitivity and behavior of the jitter detector.
11
+ /// The defaults are tuned for speech processing at common sample rates.
12
+ #[derive(Debug, Clone, Copy)]
13
+ #[cfg_attr(feature = "std", derive(serde::Serialize, serde::Deserialize))]
14
+ pub struct MarineConfig {
15
+ /// Minimum amplitude to consider a sample (gating threshold)
16
+ /// Samples below this are ignored as noise
17
+ /// Default: 1e-3 (~-60dB)
18
+ pub clip_threshold: f32,
19
+
20
+ /// EMA smoothing factor for period tracking (0..1)
21
+ /// Lower = smoother, slower adaptation
22
+ /// Default: 0.01
23
+ pub ema_period_alpha: f32,
24
+
25
+ /// EMA smoothing factor for amplitude tracking (0..1)
26
+ /// Default: 0.01
27
+ pub ema_amp_alpha: f32,
28
+
29
+ /// Minimum inter-peak period in samples
30
+ /// Rejects peaks closer than this (filters high-frequency noise)
31
+ /// Default: sample_rate / 4000 (~4kHz upper F0)
32
+ pub min_period: u32,
33
+
34
+ /// Maximum inter-peak period in samples
35
+ /// Rejects peaks farther than this (filters very low frequencies)
36
+ /// Default: sample_rate / 60 (~60Hz lower F0)
37
+ pub max_period: u32,
38
+
39
+ /// Threshold below which jitter is considered "low" (stable)
40
+ /// Default: 0.02
41
+ pub jitter_low: f32,
42
+
43
+ /// Threshold above which jitter is considered "high" (unstable)
44
+ /// Default: 0.60
45
+ pub jitter_high: f32,
46
+ }
47
+
48
+ impl MarineConfig {
49
+ /// Create config optimized for speech at given sample rate
50
+ ///
51
+ /// # Arguments
52
+ /// * `sample_rate` - Audio sample rate in Hz (e.g., 22050, 44100)
53
+ ///
54
+ /// # Example
55
+ /// ```
56
+ /// use marine_salience::MarineConfig;
57
+ /// let config = MarineConfig::speech_default(22050);
58
+ /// assert!(config.min_period < config.max_period);
59
+ /// ```
60
+ pub const fn speech_default(sample_rate: u32) -> Self {
61
+ // F0 range: ~60Hz (low male) to ~4kHz (includes harmonics)
62
+ let min_period = sample_rate / 4000; // Upper bound
63
+ let max_period = sample_rate / 60; // Lower bound
64
+
65
+ Self {
66
+ clip_threshold: 1e-3,
67
+ ema_period_alpha: 0.01,
68
+ ema_amp_alpha: 0.01,
69
+ min_period,
70
+ max_period,
71
+ jitter_low: 0.02,
72
+ jitter_high: 0.60,
73
+ }
74
+ }
75
+
76
+ /// Create config for high-sensitivity detection
77
+ /// More peaks detected, faster adaptation
78
+ pub const fn high_sensitivity(sample_rate: u32) -> Self {
79
+ let min_period = sample_rate / 8000;
80
+ let max_period = sample_rate / 40;
81
+
82
+ Self {
83
+ clip_threshold: 5e-4,
84
+ ema_period_alpha: 0.05,
85
+ ema_amp_alpha: 0.05,
86
+ min_period,
87
+ max_period,
88
+ jitter_low: 0.01,
89
+ jitter_high: 0.50,
90
+ }
91
+ }
92
+
93
+ /// Create config for TTS output validation
94
+ /// Tuned to detect synthetic artifacts
95
+ pub const fn tts_validation(sample_rate: u32) -> Self {
96
+ let min_period = sample_rate / 4000;
97
+ let max_period = sample_rate / 80;
98
+
99
+ Self {
100
+ clip_threshold: 1e-3,
101
+ ema_period_alpha: 0.02,
102
+ ema_amp_alpha: 0.02,
103
+ min_period,
104
+ max_period,
105
+ jitter_low: 0.015, // Stricter for synthetic speech
106
+ jitter_high: 0.40, // More sensitive to artifacts
107
+ }
108
+ }
109
+ }
110
+
111
+ impl Default for MarineConfig {
112
+ fn default() -> Self {
113
+ // Default to 22050 Hz (common TTS sample rate)
114
+ Self::speech_default(22050)
115
+ }
116
+ }
117
+
118
+ #[cfg(test)]
119
+ mod tests {
120
+ use super::*;
121
+
122
+ #[test]
123
+ fn test_speech_default_periods() {
124
+ let config = MarineConfig::speech_default(22050);
125
+ assert!(config.min_period < config.max_period);
126
+ assert_eq!(config.min_period, 22050 / 4000); // 5 samples
127
+ assert_eq!(config.max_period, 22050 / 60); // 367 samples
128
+ }
129
+
130
+ #[test]
131
+ fn test_different_sample_rates() {
132
+ let config_22k = MarineConfig::speech_default(22050);
133
+ let config_44k = MarineConfig::speech_default(44100);
134
+ let config_48k = MarineConfig::speech_default(48000);
135
+
136
+ // Higher sample rates = more samples per period
137
+ assert!(config_44k.max_period > config_22k.max_period);
138
+ assert!(config_48k.max_period > config_44k.max_period);
139
+ }
140
+ }
crates/marine_salience/src/ema.rs ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! Exponential Moving Average (EMA) for smooth tracking
2
+ //!
3
+ //! EMA smooths noisy measurements while maintaining responsiveness.
4
+ //! Used to track period and amplitude patterns in Marine algorithm.
5
+
6
+ #![cfg_attr(not(feature = "std"), no_std)]
7
+
8
+ /// Exponential Moving Average tracker
9
+ ///
10
+ /// EMA formula: value = alpha * new + (1 - alpha) * old
11
+ /// - Higher alpha = faster response, more noise
12
+ /// - Lower alpha = slower response, smoother
13
+ #[derive(Debug, Clone, Copy)]
14
+ #[cfg_attr(feature = "std", derive(serde::Serialize, serde::Deserialize))]
15
+ pub struct Ema {
16
+ /// Smoothing factor (0..1)
17
+ alpha: f32,
18
+ /// Current smoothed value
19
+ value: f32,
20
+ /// Whether we've received at least one sample
21
+ initialized: bool,
22
+ }
23
+
24
+ impl Ema {
25
+ /// Create new EMA with given smoothing factor
26
+ ///
27
+ /// # Arguments
28
+ /// * `alpha` - Smoothing factor (0..1). Higher = faster adaptation.
29
+ ///
30
+ /// # Example
31
+ /// ```
32
+ /// use marine_salience::ema::Ema;
33
+ /// let mut ema = Ema::new(0.1); // 10% new, 90% old
34
+ /// ema.update(100.0);
35
+ /// assert_eq!(ema.get(), 100.0); // First value becomes baseline
36
+ /// ema.update(200.0);
37
+ /// assert!((ema.get() - 110.0).abs() < 0.01); // 0.1*200 + 0.9*100
38
+ /// ```
39
+ pub const fn new(alpha: f32) -> Self {
40
+ Self {
41
+ alpha,
42
+ value: 0.0,
43
+ initialized: false,
44
+ }
45
+ }
46
+
47
+ /// Update EMA with new measurement
48
+ pub fn update(&mut self, x: f32) {
49
+ if !self.initialized {
50
+ // First value becomes the baseline
51
+ self.value = x;
52
+ self.initialized = true;
53
+ } else {
54
+ // EMA update: new = alpha * x + (1 - alpha) * old
55
+ self.value = self.alpha * x + (1.0 - self.alpha) * self.value;
56
+ }
57
+ }
58
+
59
+ /// Get current smoothed value
60
+ pub fn get(&self) -> f32 {
61
+ self.value
62
+ }
63
+
64
+ /// Check if EMA has been initialized (received at least one sample)
65
+ pub fn is_ready(&self) -> bool {
66
+ self.initialized
67
+ }
68
+
69
+ /// Reset EMA to uninitialized state
70
+ pub fn reset(&mut self) {
71
+ self.value = 0.0;
72
+ self.initialized = false;
73
+ }
74
+
75
+ /// Get the smoothing factor
76
+ pub fn alpha(&self) -> f32 {
77
+ self.alpha
78
+ }
79
+
80
+ /// Set a new smoothing factor
81
+ pub fn set_alpha(&mut self, alpha: f32) {
82
+ self.alpha = alpha.clamp(0.0, 1.0);
83
+ }
84
+ }
85
+
86
+ #[cfg(test)]
87
+ mod tests {
88
+ use super::*;
89
+
90
+ #[test]
91
+ fn test_first_value_becomes_baseline() {
92
+ let mut ema = Ema::new(0.1);
93
+ assert!(!ema.is_ready());
94
+ ema.update(42.0);
95
+ assert!(ema.is_ready());
96
+ assert_eq!(ema.get(), 42.0);
97
+ }
98
+
99
+ #[test]
100
+ fn test_ema_smoothing() {
101
+ let mut ema = Ema::new(0.1);
102
+ ema.update(100.0);
103
+ ema.update(200.0);
104
+ // 0.1 * 200 + 0.9 * 100 = 20 + 90 = 110
105
+ assert!((ema.get() - 110.0).abs() < 0.001);
106
+ }
107
+
108
+ #[test]
109
+ fn test_high_alpha_fast_response() {
110
+ let mut ema = Ema::new(0.9);
111
+ ema.update(100.0);
112
+ ema.update(200.0);
113
+ // 0.9 * 200 + 0.1 * 100 = 180 + 10 = 190
114
+ assert!((ema.get() - 190.0).abs() < 0.001);
115
+ }
116
+
117
+ #[test]
118
+ fn test_reset() {
119
+ let mut ema = Ema::new(0.1);
120
+ ema.update(100.0);
121
+ assert!(ema.is_ready());
122
+ ema.reset();
123
+ assert!(!ema.is_ready());
124
+ assert_eq!(ema.get(), 0.0);
125
+ }
126
+ }
crates/marine_salience/src/lib.rs ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! # Marine Salience - O(1) Jitter-Based Authenticity Detection
2
+ //!
3
+ //! "Marines are not just jarheads - they are actually very intelligent"
4
+ //!
5
+ //! This crate provides a universal salience primitive that can detect the
6
+ //! "authenticity" of audio signals by measuring timing and amplitude jitter.
7
+ //!
8
+ //! ## Why "Marine"?
9
+ //! - Marines are stable and reliable under pressure
10
+ //! - Low jitter = authentic/stable signal
11
+ //! - High jitter = damaged/synthetic signal
12
+ //!
13
+ //! ## Use Cases
14
+ //! - **TTS Quality Validation** - Is synthesized speech authentic?
15
+ //! - **Prosody Extraction** - Extract 8D interpretable emotion vectors
16
+ //! - **Conversation Affect** - Track comfort level over sessions
17
+ //! - **Real-time Monitoring** - O(1) per sample processing
18
+ //!
19
+ //! ## Core Insight
20
+ //! Human voice has NATURAL jitter patterns. Perfect smoothness = synthetic.
21
+ //! The Marine algorithm detects these patterns to distinguish authentic
22
+ //! from damaged or artificial audio.
23
+
24
+ #![cfg_attr(not(feature = "std"), no_std)]
25
+
26
+ pub mod config;
27
+ pub mod ema;
28
+ pub mod packet;
29
+ pub mod processor;
30
+
31
+ // Re-export main types
32
+ pub use config::MarineConfig;
33
+ pub use packet::SaliencePacket;
34
+ pub use processor::MarineProcessor;
35
+
36
+ /// Marine algorithm version
37
+ pub const VERSION: &str = env!("CARGO_PKG_VERSION");
38
+
39
+ /// Default jitter thresholds tuned for speech
40
+ /// These values accommodate natural musical/speech variation
41
+ pub const DEFAULT_JITTER_LOW: f32 = 0.02; // Below = very stable
42
+ pub const DEFAULT_JITTER_HIGH: f32 = 0.60; // Above = heavily damaged
crates/marine_salience/src/packet.rs ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! Salience packet - the output of Marine analysis
2
+ //!
3
+ //! Contains jitter measurements and quality scores for a detected peak.
4
+
5
+ #![cfg_attr(not(feature = "std"), no_std)]
6
+
7
+ /// Salience packet emitted on peak detection
8
+ ///
9
+ /// Contains all the jitter and quality metrics for a single audio event.
10
+ /// These packets can be aggregated to form prosody vectors or quality scores.
11
+ #[derive(Debug, Clone, Copy, PartialEq)]
12
+ #[cfg_attr(feature = "std", derive(serde::Serialize, serde::Deserialize))]
13
+ pub struct SaliencePacket {
14
+ /// Period jitter - timing instability between peaks
15
+ /// Lower = more stable/musical, Higher = more chaotic
16
+ /// Range: 0.0+ (normalized difference from expected period)
17
+ pub j_p: f32,
18
+
19
+ /// Amplitude jitter - loudness instability
20
+ /// Lower = consistent volume, Higher = erratic dynamics
21
+ /// Range: 0.0+ (normalized difference from expected amplitude)
22
+ pub j_a: f32,
23
+
24
+ /// Harmonic alignment score
25
+ /// 1.0 = perfectly voiced/harmonic, 0.0 = noise
26
+ /// For now this is simplified; can be enhanced with FFT
27
+ pub h_score: f32,
28
+
29
+ /// Overall salience score (authenticity)
30
+ /// 1.0 = perfect quality, 0.0 = heavily damaged
31
+ /// Computed from inverse of combined jitter
32
+ pub s_score: f32,
33
+
34
+ /// Local peak energy (amplitude squared)
35
+ /// Represents loudness at this event
36
+ pub energy: f32,
37
+
38
+ /// Sample index where this peak occurred
39
+ /// Useful for temporal analysis
40
+ pub sample_index: u64,
41
+ }
42
+
43
+ impl SaliencePacket {
44
+ /// Create a new salience packet
45
+ pub fn new(
46
+ j_p: f32,
47
+ j_a: f32,
48
+ h_score: f32,
49
+ s_score: f32,
50
+ energy: f32,
51
+ sample_index: u64,
52
+ ) -> Self {
53
+ Self {
54
+ j_p,
55
+ j_a,
56
+ h_score,
57
+ s_score,
58
+ energy,
59
+ sample_index,
60
+ }
61
+ }
62
+
63
+ /// Get combined jitter metric
64
+ /// Average of period and amplitude jitter
65
+ pub fn combined_jitter(&self) -> f32 {
66
+ (self.j_p + self.j_a) / 2.0
67
+ }
68
+
69
+ /// Check if this represents high-quality audio
70
+ /// (low jitter, high salience)
71
+ pub fn is_high_quality(&self, threshold: f32) -> bool {
72
+ self.s_score >= threshold
73
+ }
74
+
75
+ /// Check if this indicates damaged/synthetic audio
76
+ pub fn is_damaged(&self, jitter_threshold: f32) -> bool {
77
+ self.combined_jitter() > jitter_threshold
78
+ }
79
+ }
80
+
81
+ /// Special salience markers for non-peak events
82
+ #[derive(Debug, Clone, Copy, PartialEq)]
83
+ #[cfg_attr(feature = "std", derive(serde::Serialize, serde::Deserialize))]
84
+ pub enum SalienceMarker {
85
+ /// Normal peak detected
86
+ Peak(SaliencePacket),
87
+ /// Fracture/gap detected (silence)
88
+ Fracture,
89
+ /// High noise floor detected
90
+ Noise,
91
+ /// Insufficient data for analysis
92
+ Insufficient,
93
+ }
94
+
95
+ #[cfg(test)]
96
+ mod tests {
97
+ use super::*;
98
+
99
+ #[test]
100
+ fn test_combined_jitter() {
101
+ let packet = SaliencePacket::new(0.1, 0.3, 1.0, 0.8, 0.5, 0);
102
+ assert!((packet.combined_jitter() - 0.2).abs() < 0.001);
103
+ }
104
+
105
+ #[test]
106
+ fn test_is_high_quality() {
107
+ let good = SaliencePacket::new(0.01, 0.02, 1.0, 0.95, 0.5, 0);
108
+ let bad = SaliencePacket::new(0.5, 0.6, 0.5, 0.3, 0.5, 0);
109
+
110
+ assert!(good.is_high_quality(0.8));
111
+ assert!(!bad.is_high_quality(0.8));
112
+ }
113
+
114
+ #[test]
115
+ fn test_is_damaged() {
116
+ let good = SaliencePacket::new(0.01, 0.02, 1.0, 0.95, 0.5, 0);
117
+ let bad = SaliencePacket::new(0.5, 0.6, 0.5, 0.3, 0.5, 0);
118
+
119
+ assert!(!good.is_damaged(0.3));
120
+ assert!(bad.is_damaged(0.3));
121
+ }
122
+ }
crates/marine_salience/src/processor.rs ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! Core Marine processor - O(1) per-sample jitter detection
2
+ //!
3
+ //! The heart of the Marine algorithm. Processes audio samples one at a time,
4
+ //! detecting peaks and computing jitter metrics in constant time.
5
+ //!
6
+ //! "Marines are not just jarheads - they are actually very intelligent"
7
+
8
+ #![cfg_attr(not(feature = "std"), no_std)]
9
+
10
+ use crate::config::MarineConfig;
11
+ use crate::ema::Ema;
12
+ use crate::packet::{SalienceMarker, SaliencePacket};
13
+
14
+ /// Marine salience processor
15
+ ///
16
+ /// Processes audio samples one at a time, detecting peaks and computing
17
+ /// jitter metrics. Designed for O(1) per-sample operation.
18
+ ///
19
+ /// # Example
20
+ /// ```
21
+ /// use marine_salience::{MarineConfig, MarineProcessor};
22
+ ///
23
+ /// let config = MarineConfig::speech_default(22050);
24
+ /// let mut processor = MarineProcessor::new(config);
25
+ ///
26
+ /// // Process samples (e.g., from audio buffer)
27
+ /// let samples = vec![0.0, 0.5, 1.0, 0.5, 0.0, -0.5, -1.0, -0.5];
28
+ /// for sample in &samples {
29
+ /// if let Some(marker) = processor.process_sample(*sample) {
30
+ /// match marker {
31
+ /// marine_salience::packet::SalienceMarker::Peak(packet) => {
32
+ /// println!("Peak detected! Salience: {:.2}", packet.s_score);
33
+ /// }
34
+ /// _ => {}
35
+ /// }
36
+ /// }
37
+ /// }
38
+ /// ```
39
+ pub struct MarineProcessor {
40
+ /// Configuration parameters
41
+ cfg: MarineConfig,
42
+
43
+ /// Previous sample (t-2)
44
+ prev2: f32,
45
+ /// Previous sample (t-1)
46
+ prev1: f32,
47
+ /// Current sample index
48
+ idx: u64,
49
+
50
+ /// Sample index of last detected peak
51
+ last_peak_idx: u64,
52
+ /// Amplitude of last detected peak
53
+ last_peak_amp: f32,
54
+
55
+ /// EMA tracker for inter-peak periods
56
+ ema_period: Ema,
57
+ /// EMA tracker for peak amplitudes
58
+ ema_amp: Ema,
59
+
60
+ /// Number of peaks detected so far
61
+ peak_count: u64,
62
+ }
63
+
64
+ impl MarineProcessor {
65
+ /// Create a new Marine processor with given configuration
66
+ pub fn new(cfg: MarineConfig) -> Self {
67
+ Self {
68
+ cfg,
69
+ prev2: 0.0,
70
+ prev1: 0.0,
71
+ idx: 0,
72
+ last_peak_idx: 0,
73
+ last_peak_amp: 0.0,
74
+ ema_period: Ema::new(cfg.ema_period_alpha),
75
+ ema_amp: Ema::new(cfg.ema_amp_alpha),
76
+ peak_count: 0,
77
+ }
78
+ }
79
+
80
+ /// Process a single audio sample - O(1) operation
81
+ ///
82
+ /// Returns Some(SalienceMarker) when a peak is detected or special
83
+ /// condition occurs, None otherwise.
84
+ ///
85
+ /// # Arguments
86
+ /// * `sample` - Audio sample value (typically -1.0 to 1.0)
87
+ ///
88
+ /// # Returns
89
+ /// - `Some(Peak(packet))` - Peak detected with jitter metrics
90
+ /// - `Some(Fracture)` - Silence/gap detected
91
+ /// - `Some(Noise)` - High noise floor detected
92
+ /// - `None` - No significant event at this sample
93
+ pub fn process_sample(&mut self, sample: f32) -> Option<SalienceMarker> {
94
+ let i = self.idx;
95
+ self.idx += 1;
96
+
97
+ // Pre-gating: ignore samples below threshold
98
+ if sample.abs() < self.cfg.clip_threshold {
99
+ self.prev2 = self.prev1;
100
+ self.prev1 = sample;
101
+ return None;
102
+ }
103
+
104
+ // Peak detection: prev1 is peak if prev2 < prev1 > sample
105
+ // Simple local maximum detection
106
+ let is_peak = i >= 2
107
+ && self.prev1.abs() >= self.cfg.clip_threshold
108
+ && self.prev1.abs() > self.prev2.abs()
109
+ && self.prev1.abs() > sample.abs();
110
+
111
+ let mut result = None;
112
+
113
+ if is_peak {
114
+ let peak_idx = i - 1;
115
+ let amp = self.prev1.abs();
116
+ let energy = amp * amp;
117
+
118
+ // Calculate period (time since last peak)
119
+ let period = if self.last_peak_idx == 0 {
120
+ 0.0
121
+ } else {
122
+ (peak_idx - self.last_peak_idx) as f32
123
+ };
124
+
125
+ // Only process if period is within valid range
126
+ if period > self.cfg.min_period as f32 && period < self.cfg.max_period as f32 {
127
+ if self.ema_period.is_ready() {
128
+ // Calculate jitter metrics
129
+ let jp = (period - self.ema_period.get()).abs() / self.ema_period.get();
130
+ let ja = (amp - self.ema_amp.get()).abs() / self.ema_amp.get();
131
+
132
+ // Harmonic score (simplified - TODO: FFT-based detection)
133
+ // For now, assume voiced content (h = 1.0)
134
+ // In production, this would check for harmonic structure
135
+ let h = 1.0;
136
+
137
+ // Salience score: inverse of combined jitter
138
+ // Higher jitter = lower salience
139
+ let s = 1.0 / (1.0 + jp + ja);
140
+
141
+ result = Some(SalienceMarker::Peak(SaliencePacket::new(
142
+ jp, ja, h, s, energy, peak_idx,
143
+ )));
144
+ }
145
+
146
+ // Update EMAs with new measurements
147
+ self.ema_period.update(period);
148
+ self.ema_amp.update(amp);
149
+ }
150
+
151
+ self.last_peak_idx = peak_idx;
152
+ self.last_peak_amp = amp;
153
+ self.peak_count += 1;
154
+ }
155
+
156
+ // Update sample history
157
+ self.prev2 = self.prev1;
158
+ self.prev1 = sample;
159
+
160
+ result
161
+ }
162
+
163
+ /// Process a buffer of samples, collecting all salience packets
164
+ ///
165
+ /// More efficient than calling process_sample repeatedly when you
166
+ /// have a full buffer available.
167
+ ///
168
+ /// # Arguments
169
+ /// * `samples` - Buffer of audio samples
170
+ ///
171
+ /// # Returns
172
+ /// Vector of salience packets for all detected peaks
173
+ #[cfg(feature = "std")]
174
+ pub fn process_buffer(&mut self, samples: &[f32]) -> Vec<SaliencePacket> {
175
+ let mut packets = Vec::new();
176
+
177
+ for &sample in samples {
178
+ if let Some(SalienceMarker::Peak(packet)) = self.process_sample(sample) {
179
+ packets.push(packet);
180
+ }
181
+ }
182
+
183
+ packets
184
+ }
185
+
186
+ /// Reset processor state (start fresh)
187
+ pub fn reset(&mut self) {
188
+ self.prev2 = 0.0;
189
+ self.prev1 = 0.0;
190
+ self.idx = 0;
191
+ self.last_peak_idx = 0;
192
+ self.last_peak_amp = 0.0;
193
+ self.ema_period.reset();
194
+ self.ema_amp.reset();
195
+ self.peak_count = 0;
196
+ }
197
+
198
+ /// Get number of peaks detected so far
199
+ pub fn peak_count(&self) -> u64 {
200
+ self.peak_count
201
+ }
202
+
203
+ /// Get current sample index
204
+ pub fn current_index(&self) -> u64 {
205
+ self.idx
206
+ }
207
+
208
+ /// Check if processor has enough data for reliable jitter
209
+ pub fn is_warmed_up(&self) -> bool {
210
+ self.peak_count >= 3 && self.ema_period.is_ready()
211
+ }
212
+
213
+ /// Get current expected period (from EMA)
214
+ pub fn expected_period(&self) -> Option<f32> {
215
+ if self.ema_period.is_ready() {
216
+ Some(self.ema_period.get())
217
+ } else {
218
+ None
219
+ }
220
+ }
221
+
222
+ /// Get current expected amplitude (from EMA)
223
+ pub fn expected_amplitude(&self) -> Option<f32> {
224
+ if self.ema_amp.is_ready() {
225
+ Some(self.ema_amp.get())
226
+ } else {
227
+ None
228
+ }
229
+ }
230
+ }
231
+
232
+ #[cfg(test)]
233
+ mod tests {
234
+ use super::*;
235
+
236
+ #[test]
237
+ fn test_peak_detection() {
238
+ let config = MarineConfig::speech_default(22050);
239
+ let mut processor = MarineProcessor::new(config);
240
+
241
+ // Create simple signal with peaks
242
+ // Peak at sample 10, 20, 30...
243
+ let mut samples = vec![0.0; 100];
244
+ for i in (10..100).step_by(10) {
245
+ samples[i] = 0.5; // Peak
246
+ if i > 0 {
247
+ samples[i - 1] = 0.3; // Rising edge
248
+ }
249
+ if i < 99 {
250
+ samples[i + 1] = 0.3; // Falling edge
251
+ }
252
+ }
253
+
254
+ let mut peak_count = 0;
255
+ for sample in &samples {
256
+ if let Some(SalienceMarker::Peak(_)) = processor.process_sample(*sample) {
257
+ peak_count += 1;
258
+ }
259
+ }
260
+
261
+ // Should detect several peaks (not all due to period constraints)
262
+ assert!(peak_count > 0);
263
+ }
264
+
265
+ #[test]
266
+ fn test_jitter_calculation() {
267
+ let mut config = MarineConfig::speech_default(22050);
268
+ config.min_period = 5;
269
+ config.max_period = 20;
270
+ let mut processor = MarineProcessor::new(config);
271
+
272
+ // Create signal with consistent period of 10 samples
273
+ let mut detected_packets = vec![];
274
+ for cycle in 0..10 {
275
+ for i in 0..10 {
276
+ let sample = if i == 5 {
277
+ 0.8 // Peak in middle
278
+ } else if i == 4 || i == 6 {
279
+ 0.5 // Edges
280
+ } else {
281
+ 0.01 // Just above threshold
282
+ };
283
+
284
+ if let Some(SalienceMarker::Peak(packet)) = processor.process_sample(sample) {
285
+ detected_packets.push(packet);
286
+ }
287
+ }
288
+ }
289
+
290
+ // With consistent periods, later packets should have low jitter
291
+ if detected_packets.len() > 3 {
292
+ let last = detected_packets.last().unwrap();
293
+ // Jitter should be relatively low for consistent signal
294
+ assert!(last.j_p < 0.5, "Period jitter too high: {}", last.j_p);
295
+ }
296
+ }
297
+
298
+ #[test]
299
+ fn test_reset() {
300
+ let config = MarineConfig::speech_default(22050);
301
+ let mut processor = MarineProcessor::new(config);
302
+
303
+ // Process some samples
304
+ for _ in 0..100 {
305
+ processor.process_sample(0.5);
306
+ }
307
+ assert!(processor.current_index() > 0);
308
+
309
+ // Reset and verify
310
+ processor.reset();
311
+ assert_eq!(processor.current_index(), 0);
312
+ assert_eq!(processor.peak_count(), 0);
313
+ assert!(!processor.is_warmed_up());
314
+ }
315
+
316
+ #[cfg(feature = "std")]
317
+ #[test]
318
+ fn test_process_buffer() {
319
+ let mut config = MarineConfig::speech_default(22050);
320
+ config.min_period = 5;
321
+ config.max_period = 50;
322
+ let mut processor = MarineProcessor::new(config);
323
+
324
+ // Generate test signal with peaks
325
+ let mut samples = Vec::new();
326
+ for _ in 0..20 {
327
+ samples.extend_from_slice(&[0.01, 0.3, 0.8, 0.3, 0.01]);
328
+ }
329
+
330
+ let packets = processor.process_buffer(&samples);
331
+ // Should detect multiple peaks
332
+ assert!(packets.len() > 0);
333
+ }
334
+ }
docs/Integrating Marine Algorithm into IndexTTS-Rust.md ADDED
@@ -0,0 +1,450 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ # **A Technical Report on the Integration of the Marine Salience Algorithm into the IndexTTS2-Rust Architecture**
4
+
5
+ ## **Executive Summary**
6
+
7
+ This report details a comprehensive technical framework for the integration of the novel Marine Algorithm 1 into the existing IndexTTS-Rust project. The IndexTTS-Rust system is understood to be a Rust implementation of the IndexTTS2 architecture, a cascaded autoregressive (AR) Text-to-Speech (TTS) model detailed in the aaai2026.tex paper.1
8
+
9
+ The primary objective of this integration is to leverage the unique, time-domain salience detection capabilities of the Marine Algorithm (e.g., jitter analysis) 1 to significantly improve the quality, controllability, and emotional expressiveness of the synthesized speech.
10
+
11
+ The core of this strategy involves **replacing the Conformer-based emotion perceiver of the IndexTTS2 Text-to-Semantic (T2S) module** 1 with a new, lightweight, and prosodically-aware Rust module based on the Marine Algorithm. This report provides a full analysis of the architectural foundations, a detailed integration strategy, a complete Rust-level implementation guide, and an analysis of the training and inferential implications of this modification.
12
+
13
+ ## **Part 1: Architectural Foundations: The IndexTTS2 Pipeline and the Marine Salience Primitive**
14
+
15
+ A successful integration requires a deep, functional understanding of the two systems being merged. This section deconstructs the IndexTTS2 architecture as the "host" system 1 and re-frames the Marine Algorithm 1 as the "implant" feature extractor.
16
+
17
+ ### **1.1 Deconstruction of the IndexTTS2 Generative Pipeline**
18
+
19
+ The aaai2026.tex paper describes IndexTTS2 as a state-of-the-art, cascaded zero-shot TTS system.1 Its architecture is composed of three distinct, sequentially-trained modules:
20
+
21
+ 1. **Text-to-Semantic (T2S) Module:** This is an autoregressive (AR) Transformer-based model. Its primary function is to convert a sequence of text inputs into a sequence of "semantic tokens." This module is the system's "brain," determining the content, rhythm, and prosody of the speech.
22
+ 2. **Semantic-to-Mel (S2M) Module:** This is a non-autoregressive (NAR) model. It takes the discrete semantic tokens from the T2S module and converts them into a dense mel-spectrogram. This module functions as the system's "vocal tract," rendering the semantic instructions into a spectral representation. The paper notes this module "incorporate\[s\] GPT latent representations to significantly improve the stability of the generated speech".1
23
+ 3. **Vocoder Module:** This is a pre-trained BigVGANv2 vocoder.1 Its sole function is to perform the final conversion from the mel-spectrogram (from S2M) into a raw audio waveform.
24
+
25
+ The critical component for this integration is the **T2S Conditioning Mechanism**. The IndexTTS2 T2S module's behavior is conditioned on two separate audio prompts, a design intended to achieve disentangled control 1:
26
+
27
+ * **Timbre Prompt:** This audio prompt is processed by a "speaker perceiver conditioner" to generate a speaker attribute vector, c. This vector defines *who* is speaking (i.e., the vocal identity).
28
+ * **Style Prompt:** This *separate* audio prompt is processed by a "Conformer-based emotion perceiver conditioner" to generate an emotion vector, e. This vector defines *how* they are speaking (i.e., the emotion, prosody, and rhythm).
29
+
30
+ The T2S Transformer then consumes these vectors, additively combined, as part of its input: \[c \+ e, p,..., E\_text,..., E\_sem\].1
31
+
32
+ A key architectural detail is the IndexTTS2 paper's explicit use of a **Gradient Reversal Layer (GRL)** "to eliminate emotion-irrelevant information" and achieve "speaker-emotion disentanglement".1 The presence of a GRL, an adversarial training technique, strongly implies that the "Conformer-based emotion perceiver" is *not* naturally adept at this separation. A general-purpose Conformer, when processing the style prompt, will inevitably encode both prosodic features (pitch, energy) and speaker-specific features (formants, timbre). The GRL is thus employed as an adversarial "patch" to force the e vector to be "ignorant" of the speaker. This reveals a complex, computationally-heavy, and potentially fragile point in the IndexTTS2 design—a weakness that the Marine Algorithm is perfectly suited to address.
33
+
34
+ ### **1.2 The Marine Algorithm as a Superior Prosodic Feature Extractor**
35
+
36
+ The marine-Universal-Salience-algoritm.tex paper 1 introduces the Marine Algorithm as a "universal, modality-agnostic salience detector" that operates in the time domain with O(1) per-sample complexity. While its described applications are broad, its specific mechanics make it an ideal, purpose-built *prosody quantifier* for speech.
37
+
38
+ The algorithm's 5-step process (Pre-gating, Peak Detection, Jitter Computation, Harmonic Alignment, Salience Score) 1 is, in effect, a direct measurement of the suprasegmental features that define prosody:
39
+
40
+ * **Period Jitter ($J\_p$):** Defined as $J\_p \= |T\_i \- \\text{EMA}(T)|$, this metric quantifies the instability of the time between successive peaks (the fundamental period).1 In speech, this is a direct, time-domain correlate for *pitch instability*. High, structured $J\_p$ (i.e., high jitter with a stable EMA) represents intentional prosodic features like vibrato, vocal fry, or creaky voice—all key carriers of emotion.
41
+ * **Amplitude Jitter ($J\_a$):** Defined as $J\_a \= |A\_i \- \\text{EMA}(A)|$, this metric quantifies the instability of peak amplitudes.1 In speech, this is a correlate for *amplitude shimmer* or "vocal roughness," which are strong cues for affective states such as arousal, stress, or anger.
42
+ * **Harmonic Alignment ($H$):** This check for integer-multiple relationships in peak spacing 1 directly measures the *purity* and *periodicity* of the tone. It quantifies the distinction between a clear, voiced, harmonic sound and a noisy, chaotic, or unvoiced signal (e.g., breathiness, whispering, or a scream).
43
+ * **Energy ($E$) and Peak Detection:** The algorithm's pre-gating ($\\theta\_c$) and peak detection steps inherently track the signal's energy and the *density* of glottal pulses, which correlate directly to loudness and fundamental frequency (pitch), respectively.
44
+
45
+ The algorithm's description as "biologically plausible" and analogous to cochlear/amygdalar filtering 1 is not merely conceptual. It signifies that the algorithm is *a priori* biased to extract the same low-level features that the human auditory system uses to perceive emotion and prosody. This makes it a far more "correct" feature extractor for this task than a generic, large-scale Conformer, which learns from statistical correlation rather than first principles. Furthermore, its O(1) complexity 1 makes it orders of magnitude more efficient than the Transformer-based Conformer it will replace.
46
+
47
+ ## **Part 2: Integration Strategy: Replacing the T2S Emotion Perceiver**
48
+
49
+ The integration path is now clear. The IndexTTS2 T2S module 1 requires a clean, disentangled prosody vector e. The original Conformer-based conditioner provides a "polluted" vector that must be "cleaned" by a GRL.1 The Marine Algorithm 1 is, by its very design, a *naturally disentangled* prosody extractor.
50
+
51
+ ### **2.1 Formal Proposal: The MarineProsodyConditioner**
52
+
53
+ The formal integration strategy is as follows:
54
+
55
+ 1. The "Conformer-based emotion perceiver conditioner" 1 is **removed** from the IndexTTS2 architecture.
56
+ 2. A new, from-scratch Rust module, tentatively named the MarineProsodyConditioner, is **created**.
57
+ 3. This new module's sole function is to accept the file path to the style\_prompt audio, load its samples, and process them using a Rust implementation of the Marine Algorithm.1
58
+ 4. It will aggregate the resulting time-series of salience data into a single, fixed-size feature vector, e', which will serve as the new "emotion vector."
59
+
60
+ ### **2.2 Feature Vector Engineering: Defining the New e'**
61
+
62
+ The Marine Algorithm produces a *stream* of SaliencePackets, one for each detected peak.1 The T2S Transformer, however, requires a *single, fixed-size* conditioning vector.1 We must therefore define an aggregation strategy to distill this time-series into a descriptive statistical summary.
63
+
64
+ The proposed feature vector, the MarineProsodyVector (our new e'), will be an 8-dimensional vector composed of the mean and standard deviation of the algorithm's key outputs over the entire duration of the style prompt.
65
+
66
+ **Table 1: MarineProsodyVector Struct Definition**
67
+
68
+ This table defines the precise "interface" between the marine\_salience crate and the indextts\_rust crate.
69
+
70
+ | Field | Type | Description | Source |
71
+ | :---- | :---- | :---- | :---- |
72
+ | jp\_mean | f32 | Mean Period Jitter ($J\_p$). Correlates to average pitch instability. | 1 |
73
+ | jp\_std | f32 | Std. Dev. of $J\_p$. Correlates to *variance* in pitch instability. | 1 |
74
+ | ja\_mean | f32 | Mean Amplitude Jitter ($J\_a$). Correlates to average vocal roughness. | 1 |
75
+ | ja\_std | f32 | Std. Dev. of $J\_a$. Correlates to *variance* in vocal roughness. | 1 |
76
+ | h\_mean | f32 | Mean Harmonic Alignment ($H$). Correlates to average tonal purity. | 1 |
77
+ | s\_mean | f32 | Mean Salience Score ($S$). Correlates to overall signal "structuredness". | 1 |
78
+ | peak\_density | f32 | Number of detected peaks per second. Correlates to fundamental frequency (F0/pitch). | 1 |
79
+ | energy\_mean | f32 | Mean energy ($E$) of detected peaks. Correlates to loudness/amplitude. | 1 |
80
+
81
+ This small, 8-dimensional vector is dense, interpretable, and packed with prosodic information, in stark contrast to the opaque, high-dimensional, and entangled vector produced by the original Conformer.1
82
+
83
+ ### **2.3 Theoretical Justification: The Synergistic Disentanglement**
84
+
85
+ This integration provides a profound architectural improvement by solving the speaker-style disentanglement problem more elegantly and efficiently than the original IndexTTS2 design.1
86
+
87
+ The central challenge in the original architecture is that the Conformer-based conditioner processes the *entire* signal, capturing both temporal features (pitch, which is prosody) and spectral features (formants, which define speaker identity). This "entanglement" necessitates the use of the adversarial GRL to "un-learn" the speaker information.1
88
+
89
+ The Marine Algorithm 1 fundamentally sidesteps this problem. Its design is based on **peak detection, spacing, and amplitude**.1 It is almost entirely *blind* to the complex spectral-envelope (formant) information that defines a speaker's unique timbre. It measures the *instability* of the fundamental frequency, not the F0 itself, and the *instability* of the amplitude, not the spectral shape.
90
+
91
+ Therefore, the MarineProsodyVector (e') is **naturally disentangled**. It is a *pure* representation of prosody, containing negligible speaker-identity information.
92
+
93
+ When this new e' vector is fed into the T2S model's input, \[c \+ e',...\], the system receives two *orthogonal* conditioning vectors:
94
+
95
+ 1. c (from the speaker perceiver 1): Contains the speaker's timbre (formants, etc.).
96
+ 2. e' (from the MarineProsodyConditioner 1): Contains the speaker's prosody (jitter, rhythm, etc.).
97
+
98
+ This clean separation provides two major benefits:
99
+
100
+ 1. **Superior Timbre Cloning:** The speaker vector c no longer has to "compete" with an "entangled" style vector e. The T2S model will receive a cleaner speaker signal, leading to more accurate zero-shot voice cloning.
101
+ 2. **Superior Emotional Expression:** The style vector e' is a clean, simple, and interpretable signal. The T2S Transformer will be able to learn the mapping from (e.g.) jp\_mean \= 0.8 to "generate creaky semantic tokens" much more easily than from an opaque 512-dimensional Conformer embedding.
102
+
103
+ This change simplifies the T2S model's learning task, which should lead to faster convergence and higher final quality. The GRL 1 may become entirely unnecessary, further simplifying the training regime and stabilizing the model.
104
+
105
+ ## **Part 3: Implementation Guide: A IndexTTS-Rust Integration**
106
+
107
+ This section provides a concrete, code-level guide for implementing the proposed integration.
108
+
109
+ ### **3.1 Addressing the README.md Data Gap**
110
+
111
+ A critical limitation in preparing this analysis is the repeated failure to access the user-provided IndexTTS-Rust README.md file.2 This file contains the project's specific file structure, API definitions, and module layout.
112
+
113
+ To overcome this, this report will posit a **hypothetical yet idiomatic Rust project structure** based on the logical components described in the IndexTTS2 paper.1 All subsequent code examples will adhere to this structure. The project owner is expected to map these file paths and function names to their actual, private codebase.
114
+
115
+ ### **3.2 Table 2: Hypothetical IndexTTS-Rust Project Structure**
116
+
117
+ The following workspace structure is assumed for all implementation examples.
118
+
119
+ Plaintext
120
+
121
+ indextts\_rust\_workspace/
122
+ ├── Cargo.toml (Workspace root)
123
+
124
+ ├── indextts\_rust/ (The main application/library crate)
125
+ │ ├── Cargo.toml
126
+ │ └── src/
127
+ │ ├── main.rs (Binary entry point)
128
+ │ ├── lib.rs (Library entry point & API)
129
+ │ ├── error.rs (Project-wide error types)
130
+ │ ├── audio.rs (Audio I/O: e.g., fn load\_wav\_samples)
131
+ │ ├── vocoder.rs (Wrapper for BigVGANv2 model)
132
+ │ ├── t2s/
133
+ │ │ ├── mod.rs (T2S module definition)
134
+ │ │ ├── model.rs (AR Transformer implementation)
135
+ │ │ └── conditioner.rs(Handles 'c' and 'e' vector generation)
136
+ │ └── s2m/
137
+ │ ├── mod.rs (S2M module definition)
138
+ │ └── model.rs (NAR model implementation)
139
+
140
+ └── marine\_salience/ (The NEW crate for the Marine Algorithm)
141
+ ├── Cargo.toml
142
+ └── src/
143
+ ├── lib.rs (Public API: MarineProcessor, etc.)
144
+ ├── config.rs (MarineConfig struct)
145
+ ├── processor.rs (MarineProcessor struct and logic)
146
+ ├── ema.rs (EmaTracker helper struct)
147
+ └── packet.rs (SaliencePacket struct)
148
+
149
+ ### **3.3 Crate Development: marine\_salience**
150
+
151
+ A new, standalone Rust crate, marine\_salience, should be created. This crate will encapsulate all logic for the Marine Algorithm 1, ensuring it is modular, testable, and reusable.
152
+
153
+ **Table 3: marine\_salience Crate \- Public API Definition**
154
+
155
+ | Struct / fn | Field / Signature | Type | Description |
156
+ | :---- | :---- | :---- | :---- |
157
+ | MarineConfig | clip\_threshold | f32 | $\\theta\_c$, pre-gating sensitivity.1 |
158
+ | | ema\_period\_alpha | f32 | Smoothing factor for Period EMA. |
159
+ | | ema\_amplitude\_alpha | f32 | Smoothing factor for Amplitude EMA. |
160
+ | SaliencePacket | j\_p | f32 | Period Jitter ($J\_p$).1 |
161
+ | | j\_a | f32 | Amplitude Jitter ($J\_a$).1 |
162
+ | | h\_score | f32 | Harmonic Alignment score ($H$).1 |
163
+ | | s\_score | f32 | Final Salience Score ($S$).1 |
164
+ | | energy | f32 | Peak energy ($E$).1 |
165
+ | MarineProcessor | new(config: MarineConfig) | Self | Constructor. |
166
+ | | process\_sample(\&mut self, sample: f32, sample\_idx: u64) | Option\<SaliencePacket\> | The O(1) processing function. |
167
+
168
+ **marine\_salience/src/processor.rs (Implementation Sketch):**
169
+
170
+ The MarineProcessor struct will hold the state, including EmaTracker instances for period and amplitude, the last\_peak\_sample index, last\_peak\_amplitude, and the current\_direction of the signal (e.g., \+1 for rising, \-1 for falling).
171
+
172
+ The process\_sample function is the O(1) core, implementing the algorithm from 1:
173
+
174
+ 1. **Pre-gating:** Check if sample.abs() \> config.clip\_threshold.
175
+ 2. **Peak Detection:** Track the signal's direction. A change from \+1 (rising) to \-1 (falling) signifies a peak at sample\_idx \- 1, as per the formula x(n-1) \< x(n) \> x(n+1).1
176
+ 3. **Jitter Computation:** If a peak is detected at n:
177
+ * Calculate current period $T\_i \= (n \- self.last\_peak\_sample)$.
178
+ * Calculate current amplitude $A\_i \= sample\_at(n)$.
179
+ * Calculate $J\_p \= |T\_i \- self.ema\_period.value()|$.1
180
+ * Calculate $J\_a \= |A\_i \- self.ema\_amplitude.value()|$.1
181
+ * Update the EMAs: self.ema\_period.update(T\_i), self.ema\_amplitude.update(A\_i).
182
+ 4. **Harmonic Alignment:** Perform the check for $H$.1
183
+ 5. **Salience Score:** Compute $S \= w\_e E \+ w\_j(1/J) \+ w\_h H$.1
184
+ 6. Update self.last\_peak\_sample \= n, self.last\_peak\_amplitude \= A\_i.
185
+ 7. Return Some(SaliencePacket {... }).
186
+ 8. If no peak is detected, return None.
187
+
188
+ ### **3.4 Modifying the indextts\_rust Crate**
189
+
190
+ With the marine\_salience crate complete, the indextts\_rust crate can now be modified.
191
+
192
+ indextts\_rust/Cargo.toml:
193
+ Add the new crate as a dependency:
194
+
195
+ Ini, TOML
196
+
197
+ \[dependencies\]
198
+ marine\_salience \= { path \= "../marine\_salience" }
199
+ \#... other dependencies (tch, burn, ndarray, etc.)
200
+
201
+ indextts\_rust/src/t2s/conditioner.rs:
202
+ This is the central modification. The file responsible for generating the e vector is completely refactored.
203
+
204
+ Rust
205
+
206
+ // BEFORE: Original Conformer-based
207
+ //
208
+ // use tch::Tensor;
209
+ // use crate::audio::AudioData;
210
+ //
211
+ // // This struct holds the large, complex Conformer model
212
+ // pub struct ConformerEmotionPerceiver {
213
+ // //... model weights...
214
+ // }
215
+ //
216
+ // impl ConformerEmotionPerceiver {
217
+ // pub fn get\_style\_embedding(\&self, audio: \&AudioData) \-\> Result\<Tensor, ModelError\> {
218
+ // // 1\. Convert AudioData to mel-spectrogram tensor
219
+ // // 2\. Pass spectrogram through Conformer layers
220
+ // // 3\. (GRL logic is applied during training)
221
+ // // 4\. Return an opaque, high-dimensional 'e' vector
222
+ // // (e.g., )
223
+ // }
224
+ // }
225
+
226
+ // AFTER: New MarineProsodyConditioner
227
+ //
228
+ use marine\_salience::processor::{MarineProcessor, SaliencePacket};
229
+ use marine\_salience::config::MarineConfig;
230
+ use crate::audio::load\_wav\_samples; // From hypothetical audio.rs
231
+ use std::path::Path;
232
+ use anyhow::Result;
233
+
234
+ // This is the struct defined in Table 1
235
+ \#
236
+ pub struct MarineProsodyVector {
237
+ pub jp\_mean: f32,
238
+ pub jp\_std: f32,
239
+ pub ja\_mean: f32,
240
+ pub ja\_std: f32,
241
+ pub h\_mean: f32,
242
+ pub s\_mean: f32,
243
+ pub peak\_density: f32,
244
+ pub energy\_mean: f32,
245
+ }
246
+
247
+ // This new struct and function replace the Conformer
248
+ pub struct MarineProsodyConditioner {
249
+ config: MarineConfig,
250
+ }
251
+
252
+ impl MarineProsodyConditioner {
253
+ pub fn new(config: MarineConfig) \-\> Self {
254
+ Self { config }
255
+ }
256
+
257
+ pub fn get\_marine\_style\_vector(&self, style\_prompt\_path: \&Path, sample\_rate: f32) \-\> Result\<MarineProsodyVector\> {
258
+ // 1\. Load audio samples
259
+ // Assumes audio.rs provides this function
260
+ let samples \= load\_wav\_samples(style\_prompt\_path)?;
261
+ let duration\_sec \= samples.len() as f32 / sample\_rate;
262
+
263
+ // 2\. Instantiate and run the MarineProcessor
264
+ let mut processor \= MarineProcessor::new(self.config.clone());
265
+ let mut packets \= Vec::\<SaliencePacket\>::new();
266
+
267
+ for (i, sample) in samples.iter().enumerate() {
268
+ if let Some(packet) \= processor.process\_sample(\*sample, i as u64) {
269
+ packets.push(packet);
270
+ }
271
+ }
272
+
273
+ if packets.is\_empty() {
274
+ return Err(anyhow::anyhow\!("No peaks detected in style prompt."));
275
+ }
276
+
277
+ // 3\. Aggregate packets into the final feature vector
278
+ let num\_packets \= packets.len() as f32;
279
+
280
+ let mut jp\_mean \= 0.0;
281
+ let mut ja\_mean \= 0.0;
282
+ let mut h\_mean \= 0.0;
283
+ let mut s\_mean \= 0.0;
284
+ let mut energy\_mean \= 0.0;
285
+
286
+ for p in \&packets {
287
+ jp\_mean \+= p.j\_p;
288
+ ja\_mean \+= p.j\_a;
289
+ h\_mean \+= p.h\_score;
290
+ s\_mean \+= p.s\_score;
291
+ energy\_mean \+= p.energy;
292
+ }
293
+
294
+ jp\_mean /= num\_packets;
295
+ ja\_mean /= num\_packets;
296
+ h\_mean /= num\_packets;
297
+ s\_mean /= num\_packets;
298
+ energy\_mean /= num\_packets;
299
+
300
+ // Calculate standard deviation (variance)
301
+ let mut jp\_std \= 0.0;
302
+ let mut ja\_std \= 0.0;
303
+ for p in \&packets {
304
+ jp\_std \+= (p.j\_p \- jp\_mean).powi(2);
305
+ ja\_std \+= (p.j\_a \- ja\_mean).powi(2);
306
+ }
307
+ jp\_std \= (jp\_std / num\_packets).sqrt();
308
+ ja\_std \= (ja\_std / num\_packets).sqrt();
309
+
310
+ let peak\_density \= num\_packets / duration\_sec;
311
+
312
+ Ok(MarineProsodyVector {
313
+ jp\_mean,
314
+ jp\_std,
315
+ ja\_mean,
316
+ ja\_std,
317
+ h\_mean,
318
+ s\_mean,
319
+ peak\_density,
320
+ energy\_mean,
321
+ })
322
+ }
323
+ }
324
+
325
+ ### **3.5 Updating the T2S Model (indextts\_rust/src/t2s/model.rs)**
326
+
327
+ This change is **breaking** and **mandatory**. The IndexTTS2 T2S model 1 was trained on a high-dimensional e vector (e.g., 512-dim). Our new e' vector is 8-dimensional. The T2S model's architecture must be modified to accept this.
328
+
329
+ The change will be in the T2S Transformer's input embedding layer, which projects the conditioning vectors into the model's main hidden dimension (e.g., 1024-dim).
330
+
331
+ **(Example using tch-rs or burn pseudo-code):**
332
+
333
+ Rust
334
+
335
+ // In src/t2s/model.rs
336
+ //
337
+ // pub struct T2S\_Transformer {
338
+ // ...
339
+ // speaker\_projector: nn::Linear,
340
+ // style\_projector: nn::Linear, // The layer to change
341
+ // ...
342
+ // }
343
+ //
344
+ // impl T2S\_Transformer {
345
+ // pub fn new(config: \&T2S\_Config, vs: \&nn::Path) \-\> Self {
346
+ // ...
347
+ // // BEFORE:
348
+ // // let style\_projector \= nn::linear(
349
+ // // vs / "style\_projector",
350
+ // // 512, // Original Conformer 'e' dimension
351
+ // // config.hidden\_dim,
352
+ // // Default::default()
353
+ // // );
354
+ //
355
+ // // AFTER:
356
+ // let style\_projector \= nn::linear(
357
+ // vs / "style\_projector",
358
+ // 8, // New MarineProsodyVector 'e'' dimension
359
+ // config.hidden\_dim,
360
+ // Default::default()
361
+ // );
362
+ // ...
363
+ // }
364
+ // }
365
+
366
+ This change creates a new, untrained model. The S2M and Vocoder modules 1 can remain unchanged, but the T2S module must now be retrained.
367
+
368
+ ## **Part 4: Training, Inference, and Qualitative Implications**
369
+
370
+ This architectural change has profound, positive implications for the entire system, from training to user-facing control.
371
+
372
+ ### **4.1 Retraining the T2S Module**
373
+
374
+ The modification in Part 3.5 is a hard-fork of the model architecture; retraining the T2S module 1 is not optional.
375
+
376
+ **Training Plan:**
377
+
378
+ 1. **Model:** The S2M and Vocoder modules 1 can be completely frozen. Only the T2S module with the new 8-dimensional style\_projector (from 3.5) needs to be trained.
379
+ 2. **Dataset Preprocessing:** The *entire* training dataset used for the original IndexTTS2 1 must be re-processed.
380
+ * For *every* audio file in the dataset, the MarineProsodyConditioner::get\_marine\_style\_vector function (from 3.4) must be run *once*.
381
+ * The resulting 8-dimensional MarineProsodyVector must be saved as the new "ground truth" style label for that utterance.
382
+ 3. **Training:** The T2S module is now trained as described in the aaai2026.tex paper.1 During the training step, it will load the pre-computed MarineProsodyVector as the e' vector, which will be added to the c (speaker) vector and fed into the Transformer.
383
+ 4. **Hypothesis:** This training run is expected to converge *faster* and to a *higher* qualitative ceiling. The model is no longer burdened by the complex, adversarial GRL-based disentanglement.1 It is instead learning a much simpler, more direct correlation between a clean prosody vector (e') and the target semantic token sequences.
384
+
385
+ ### **4.2 Inference-Time Control**
386
+
387
+ This integration unlocks a new, powerful mode of "synthetic" or "direct" prosody control, fulfilling the proposals implicit in the user's query.
388
+
389
+ * **Mode 1: Reference-Based (Standard):**
390
+ * A user provides a style\_prompt.wav.
391
+ * The get\_marine\_style\_vector function (from 3.4) is called.
392
+ * The resulting MarineProsodyVector e' is fed into the T2S model.
393
+ * This "copies" the prosody from the reference audio, just as the original IndexTTS2 1 intended, but with higher fidelity.
394
+ * **Mode 2: Synthetic-Control (New):**
395
+ * The user provides *no* style prompt.
396
+ * Instead, the user *directly constructs* the 8-dimensional MarineProsodyVector to achieve a desired effect. The application's UI could expose 8 sliders for these values.
397
+ * **Example 1: "Agitated / Rough Voice"**
398
+ * e' \= MarineProsodyVector { jp\_mean: 0.8, jp\_std: 0.5, ja\_mean: 0.7, ja\_std: 0.4,... }
399
+ * **Example 2: "Stable / Monotone Voice"**
400
+ * e' \= MarineProsodyVector { jp\_mean: 0.05, jp\_std: 0.01, ja\_mean: 0.05, ja\_std: 0.01,... }
401
+ * **Example 3: "High-Pitch / High-Energy Voice"**
402
+ * e' \= MarineProsodyVector { peak\_density: 300.0, energy\_mean: 0.9,... }
403
+
404
+ This provides a small, interpretable, and powerful "control panel" for prosody, a significant breakthrough in controllable TTS that was not possible with the original opaque Conformer embedding.1
405
+
406
+ ### **4.3 Bridging to Downstream Fidelity (S2M)**
407
+
408
+ The benefits of this integration propagate through the entire cascade. The S2M module's quality is directly dependent on the quality of the semantic tokens it receives from T2S.1
409
+
410
+ The aaai2026.tex paper 1 states the S2M module uses "GPT latent representations to significantly improve the stability of the generated speech." This suggests the S2M is a powerful and stable *renderer*. However, a renderer is only as good as the instructions it receives.
411
+
412
+ In the original system, the S2M module likely received semantic tokens with "muddled" or "averaged-out" prosody, resulting from the T2S model's struggle with the entangled e vector. The S2M's "stability" 1 may have come at the *cost* of expressiveness, as it learned to smooth over inconsistent prosodic instructions.
413
+
414
+ With the new MarineProsodyConditioner, the T2S model will now produce semantic tokens that are *far more richly, explicitly, and accurately* encoded with prosodic intent. The S2M module's "GPT latents" 1 will receive a higher-fidelity, more consistent input signal. This creates a synergistic effect: the S2M's stable rendering capabilities 1 will now be applied to a *more expressive* set of instructions. The result is an end-to-end system that is *both* stable *and* highly expressive.
415
+
416
+ ## **Part 5: Report Conclusions and Future Trajectories**
417
+
418
+ ### **5.1 Summary of Improvements**
419
+
420
+ The integration framework detailed in this report achieves the project's goals by:
421
+
422
+ 1. **Replacing** a computationally heavy, black-box Conformer 1 with a lightweight, O(1), biologically-plausible, and Rust-native MarineProcessor.1
423
+ 2. **Solving** a core architectural-art problem in the IndexTTS2 design by providing a *naturally disentangled*, speaker-invariant prosody vector, which simplifies or obviates the need for the adversarial GRL.1
424
+ 3. **Unlocking** a powerful "synthetic control" mode, allowing users to *directly* manipulate prosody at inference time via an 8-dimensional, interpretable control vector.
425
+ 4. **Improving** end-to-end system quality by providing a cleaner, more explicit prosodic signal to the T2S module 1, which in turn provides a higher-fidelity semantic token stream to the S2M module.1
426
+
427
+ ### **5.2 Future Trajectories**
428
+
429
+ This new architecture opens two significant avenues for future research.
430
+
431
+ 1\. True Streaming Synthesis with Dynamic Conditioning
432
+ The IndexTTS2 T2S module is autoregressive 1, and the Marine Algorithm is O(1) per-sample.1 This is a perfect combination for real-time applications.
433
+ A future version could implement a "Dynamic Conditioning" mode. In this mode, a MarineProcessor runs on a live microphone input (e.g., from the user) in a parallel thread. It continuously calculates the MarineProsodyVector over a short, sliding window (e.g., 500ms). This e' vector is then *hot-swapped* into the T2S model's conditioning state *during* the autoregressive generation loop. The result would be a TTS model that mirrors the user's emotional prosody in real-time.
434
+
435
+ 2\. Active Quality Monitoring (Vocoder Feedback Loop)
436
+ The Marine Algorithm is a "universal... salience detector" that distinguishes "structured signals from noise".1 This capability can be used as a quality metric for the vocoder's output.
437
+ An advanced implementation could create a feedback loop:
438
+
439
+ 1. The BigVGANv2 vocoder 1 produces its output audio.
440
+ 2. This audio is *immediately* fed *back* into a MarineProcessor.
441
+ 3. The processor analyzes the output. The key insight from the Marine paper 1 is the use of the **Exponential Moving Average (EMA)**.
442
+ * **Desired Prosody (e.g., vocal fry):** Will produce high $J\_p$/$J\_a$, but the $\\text{EMA}(T)$ and $\\text{EMA}(A)$ will remain *stable*. The algorithm will correctly identify this as a *structured* signal.
443
+ * **Undesired Artifact (e.g., vocoder hiss, phase noise):** Will produce high $J\_p$/$J\_a$, but the $\\text{EMA}(T)$ and $\\text{EMA}(A)$ will become *unstable*. The algorithm will correctly identify this as *unstructured noise*.
444
+
445
+ This creates a quantitative, real-time metric for "output fidelity" that can distinguish desirable prosody from undesirable artifacts. This metric could be used to automatically flag or discard bad generations, or even as a reward function for a Reinforcement Learning (RL) agent tasked with fine-tuning the S2M or Vocoder modules.
446
+
447
+ #### **Works cited**
448
+
449
+ 1. marine-Universal-Salience-algoritm.tex
450
+ 2. accessed December 31, 1969, uploaded:IndexTTS-Rust README.md
examples/analyze_chris.rs ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00940abda6dd597d7dacdbb97761fb0635d0dcc7dc30d5391fe159129008b03a
3
+ size 8470
examples/cases.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:533a57ea51d412841ab6665c7be3032bb6f5996035dfad66460380c9e72f293f
3
+ size 2271
examples/emo_hate.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89e6e7eee1a28303776e9cf43971e9505529bd0e669f5fcf47f4d1370f9187c4
3
+ size 145368
examples/emo_sad.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7d3e5bf2b7bca6458f9e6d7a5ce073c41eb4418895e7df2f994e5a0c96c064a
3
+ size 842016
examples/marine_test.rs ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d179d8f3adc5338e94ee2b92f366a36d03c32b51767223d1eefeb42ce9165374
3
+ size 10845
examples/voice_01.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e33e6ee0107a1dd58e1d66dd90c13df3d55a8683047cc3d7ea206dad84ed3fc8
3
+ size 478050
examples/voice_02.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fe2dd1dbd54ef85a073fbc4c8fc0198f8d4523cc3320a600de0e347a3d8b491
3
+ size 574074
examples/voice_03.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50e8b632efd794418919e2d33c8c2aab9189a57f4d21ef55020413be9f2b292a
3
+ size 616814
examples/voice_04.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a3d2536245f45fd5e1eef046dd768ae7b72a0dba3ec3f370f145862fe64b3b2
3
+ size 681084
examples/voice_05.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eefb7f4a29a8b36f08d5cc1014ea947dbe9f7bef348f07c40263058e604a98eb
3
+ size 1482796
examples/voice_06.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d85800fe261d106c3274fa792cbb952458c4b0b2e1b908340a8cd0d63c73a30
3
+ size 299052
examples/voice_07.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcb10f84e63c3fdbfe99ac4184ca403b46a6d20b50540732713d48c4c95375ce
3
+ size 591894
examples/voice_08.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e2c5f4859999b1ada95ee801d50c3c72879147269a4ed99e385fd917dae5c6f
3
+ size 426812
examples/voice_09.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8702467b9b3c83a16bead578e131c4388b3ef82aeff861bd336e622a9ae8a511
3
+ size 1798188
examples/voice_10.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39c2db8b395e4c6ea1122ec7463b5f7bd7dd7d7302f3255780e4c529a9ae9985
3
+ size 1942242
examples/voice_11.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82730e38498413d4371a76e841cd91fa2f74843b79ad3b606d45ad8a7b7a736c
3
+ size 1520734
examples/voice_12.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d67bd4f51773677d5902409813b9bb4c1d59b8243c74fc104553b80b49edd22b
3
+ size 778626
models/bigvgan.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31609a2a49ab4e00d14924eb036f2852c88198ad250de228ae972601e67e032f
3
+ size 2269152
models/bigvgan.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5f6c1fa12c0bde8d17832fd47de1fdbe5cf085e186d30751f53ff3ad016952a
3
+ size 451411968
models/speaker_encoder.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8bc6e37803c99ebcf24cb5e1631bc1a1da00b4acc9ec6ec4c105a3e1f1f5388
3
+ size 2334876
models/speaker_encoder.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d21f2c5de55f48af7319230818262da91442e7f3dcd29d828215e8ee9e1d7e3
3
+ size 27656192
src/audio/dsp.rs ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! Digital Signal Processing utilities
2
+
3
+
4
+ /// Apply pre-emphasis filter to audio signal
5
+ ///
6
+ /// y[n] = x[n] - coef * x[n-1]
7
+ ///
8
+ /// # Arguments
9
+ /// * `signal` - Input audio signal
10
+ /// * `coef` - Pre-emphasis coefficient (typically 0.97)
11
+ pub fn apply_preemphasis(signal: &[f32], coef: f32) -> Vec<f32> {
12
+ if signal.is_empty() {
13
+ return vec![];
14
+ }
15
+
16
+ let mut output = Vec::with_capacity(signal.len());
17
+ output.push(signal[0]);
18
+
19
+ for i in 1..signal.len() {
20
+ output.push(signal[i] - coef * signal[i - 1]);
21
+ }
22
+
23
+ output
24
+ }
25
+
26
+ /// Apply de-emphasis filter (inverse of pre-emphasis)
27
+ ///
28
+ /// y[n] = x[n] + coef * y[n-1]
29
+ pub fn apply_deemphasis(signal: &[f32], coef: f32) -> Vec<f32> {
30
+ if signal.is_empty() {
31
+ return vec![];
32
+ }
33
+
34
+ let mut output = Vec::with_capacity(signal.len());
35
+ output.push(signal[0]);
36
+
37
+ for i in 1..signal.len() {
38
+ output.push(signal[i] + coef * output[i - 1]);
39
+ }
40
+
41
+ output
42
+ }
43
+
44
+ /// Normalize audio to [-1, 1] range
45
+ pub fn normalize_audio(signal: &[f32]) -> Vec<f32> {
46
+ if signal.is_empty() {
47
+ return vec![];
48
+ }
49
+
50
+ let max_abs = signal.iter().map(|x| x.abs()).fold(0.0f32, f32::max);
51
+
52
+ if max_abs < 1e-8 {
53
+ return signal.to_vec();
54
+ }
55
+
56
+ signal.iter().map(|x| x / max_abs).collect()
57
+ }
58
+
59
+ /// Normalize audio to specific peak value
60
+ pub fn normalize_audio_peak(signal: &[f32], peak: f32) -> Vec<f32> {
61
+ if signal.is_empty() {
62
+ return vec![];
63
+ }
64
+
65
+ let max_abs = signal.iter().map(|x| x.abs()).fold(0.0f32, f32::max);
66
+
67
+ if max_abs < 1e-8 {
68
+ return signal.to_vec();
69
+ }
70
+
71
+ let scale = peak / max_abs;
72
+ signal.iter().map(|x| x * scale).collect()
73
+ }
74
+
75
+ /// Dynamic range compression (log compression)
76
+ ///
77
+ /// Used for mel spectrogram normalization
78
+ pub fn dynamic_range_compression(x: f32) -> f32 {
79
+ let clip_val = 1e-5;
80
+ (x.max(clip_val)).ln()
81
+ }
82
+
83
+ /// Dynamic range compression for array
84
+ pub fn dynamic_range_compression_array(x: &[f32]) -> Vec<f32> {
85
+ x.iter().map(|&v| dynamic_range_compression(v)).collect()
86
+ }
87
+
88
+ /// Dynamic range decompression (exp)
89
+ pub fn dynamic_range_decompression(x: f32) -> f32 {
90
+ x.exp()
91
+ }
92
+
93
+ /// Dynamic range decompression for array
94
+ pub fn dynamic_range_decompression_array(x: &[f32]) -> Vec<f32> {
95
+ x.iter().map(|&v| dynamic_range_decompression(v)).collect()
96
+ }
97
+
98
+ /// Apply RMS normalization
99
+ pub fn normalize_rms(signal: &[f32], target_rms: f32) -> Vec<f32> {
100
+ if signal.is_empty() {
101
+ return vec![];
102
+ }
103
+
104
+ let rms = (signal.iter().map(|x| x * x).sum::<f32>() / signal.len() as f32).sqrt();
105
+
106
+ if rms < 1e-8 {
107
+ return signal.to_vec();
108
+ }
109
+
110
+ let scale = target_rms / rms;
111
+ signal.iter().map(|x| x * scale).collect()
112
+ }
113
+
114
+ /// Apply soft clipping to prevent harsh distortion
115
+ pub fn soft_clip(signal: &[f32], threshold: f32) -> Vec<f32> {
116
+ signal
117
+ .iter()
118
+ .map(|&x| {
119
+ if x.abs() <= threshold {
120
+ x
121
+ } else {
122
+ let sign = x.signum();
123
+ let excess = x.abs() - threshold;
124
+ sign * (threshold + (1.0 - (-excess).exp()))
125
+ }
126
+ })
127
+ .collect()
128
+ }
129
+
130
+ /// Pad audio signal with zeros
131
+ pub fn pad_audio(signal: &[f32], pad_left: usize, pad_right: usize) -> Vec<f32> {
132
+ let mut output = vec![0.0; pad_left];
133
+ output.extend_from_slice(signal);
134
+ output.extend(vec![0.0; pad_right]);
135
+ output
136
+ }
137
+
138
+ /// Trim silence from beginning and end
139
+ pub fn trim_silence(signal: &[f32], threshold_db: f32) -> Vec<f32> {
140
+ if signal.is_empty() {
141
+ return vec![];
142
+ }
143
+
144
+ let threshold = 10f32.powf(threshold_db / 20.0);
145
+
146
+ // Find first non-silent sample
147
+ let start = signal
148
+ .iter()
149
+ .position(|&x| x.abs() > threshold)
150
+ .unwrap_or(0);
151
+
152
+ // Find last non-silent sample
153
+ let end = signal
154
+ .iter()
155
+ .rposition(|&x| x.abs() > threshold)
156
+ .unwrap_or(signal.len() - 1);
157
+
158
+ if start >= end {
159
+ return vec![];
160
+ }
161
+
162
+ signal[start..=end].to_vec()
163
+ }
164
+
165
+ /// Apply fade in/out to avoid clicks
166
+ pub fn apply_fade(signal: &[f32], fade_in_samples: usize, fade_out_samples: usize) -> Vec<f32> {
167
+ if signal.is_empty() {
168
+ return vec![];
169
+ }
170
+
171
+ let mut output = signal.to_vec();
172
+ let len = output.len();
173
+
174
+ // Fade in
175
+ for i in 0..fade_in_samples.min(len) {
176
+ let factor = i as f32 / fade_in_samples as f32;
177
+ output[i] *= factor;
178
+ }
179
+
180
+ // Fade out
181
+ for i in 0..fade_out_samples.min(len) {
182
+ let idx = len - 1 - i;
183
+ let factor = i as f32 / fade_out_samples as f32;
184
+ output[idx] *= factor;
185
+ }
186
+
187
+ output
188
+ }
189
+
190
+ /// Compute RMS energy
191
+ pub fn compute_rms(signal: &[f32]) -> f32 {
192
+ if signal.is_empty() {
193
+ return 0.0;
194
+ }
195
+ (signal.iter().map(|x| x * x).sum::<f32>() / signal.len() as f32).sqrt()
196
+ }
197
+
198
+ /// Compute peak amplitude
199
+ pub fn compute_peak(signal: &[f32]) -> f32 {
200
+ signal.iter().map(|x| x.abs()).fold(0.0f32, f32::max)
201
+ }
202
+
203
+ /// Compute crest factor (peak/RMS ratio)
204
+ pub fn compute_crest_factor(signal: &[f32]) -> f32 {
205
+ let rms = compute_rms(signal);
206
+ if rms < 1e-8 {
207
+ return 0.0;
208
+ }
209
+ compute_peak(signal) / rms
210
+ }
src/audio/io.rs ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! Audio I/O operations
2
+
3
+ use crate::{Error, Result};
4
+ use hound::{SampleFormat, WavReader, WavSpec, WavWriter};
5
+ use std::path::Path;
6
+
7
+ /// Audio data container
8
+ #[derive(Debug, Clone)]
9
+ pub struct AudioData {
10
+ /// Audio samples (mono, normalized to [-1, 1])
11
+ pub samples: Vec<f32>,
12
+ /// Sample rate in Hz
13
+ pub sample_rate: u32,
14
+ }
15
+
16
+ impl AudioData {
17
+ /// Create new audio data
18
+ pub fn new(samples: Vec<f32>, sample_rate: u32) -> Self {
19
+ Self {
20
+ samples,
21
+ sample_rate,
22
+ }
23
+ }
24
+
25
+ /// Get duration in seconds
26
+ pub fn duration(&self) -> f32 {
27
+ self.samples.len() as f32 / self.sample_rate as f32
28
+ }
29
+
30
+ /// Get number of samples
31
+ pub fn len(&self) -> usize {
32
+ self.samples.len()
33
+ }
34
+
35
+ /// Check if empty
36
+ pub fn is_empty(&self) -> bool {
37
+ self.samples.is_empty()
38
+ }
39
+ }
40
+
41
+ /// Load audio from WAV file
42
+ ///
43
+ /// # Arguments
44
+ /// * `path` - Path to WAV file
45
+ /// * `target_sr` - Optional target sample rate (will resample if different)
46
+ ///
47
+ /// # Returns
48
+ /// Audio data with samples normalized to [-1, 1]
49
+ pub fn load_audio<P: AsRef<Path>>(path: P, target_sr: Option<u32>) -> Result<AudioData> {
50
+ let path = path.as_ref();
51
+ if !path.exists() {
52
+ return Err(Error::FileNotFound(path.display().to_string()));
53
+ }
54
+
55
+ let reader = WavReader::open(path).map_err(|e| Error::Audio(format!("Failed to open WAV: {}", e)))?;
56
+ let spec = reader.spec();
57
+ let sample_rate = spec.sample_rate;
58
+ let channels = spec.channels as usize;
59
+
60
+ // Read samples based on format
61
+ let samples: Vec<f32> = match spec.sample_format {
62
+ SampleFormat::Float => {
63
+ let samples: Vec<f32> = reader
64
+ .into_samples::<f32>()
65
+ .collect::<std::result::Result<Vec<_>, _>>()
66
+ .map_err(|e| Error::Audio(format!("Failed to read samples: {}", e)))?;
67
+ samples
68
+ }
69
+ SampleFormat::Int => {
70
+ let bits = spec.bits_per_sample;
71
+ let samples: Vec<i32> = reader
72
+ .into_samples::<i32>()
73
+ .collect::<std::result::Result<Vec<_>, _>>()
74
+ .map_err(|e| Error::Audio(format!("Failed to read samples: {}", e)))?;
75
+
76
+ // Normalize to [-1, 1]
77
+ let max_val = (1 << (bits - 1)) as f32;
78
+ samples.iter().map(|&s| s as f32 / max_val).collect()
79
+ }
80
+ };
81
+
82
+ // Convert to mono if stereo
83
+ let mono_samples = if channels > 1 {
84
+ samples
85
+ .chunks(channels)
86
+ .map(|chunk| chunk.iter().sum::<f32>() / channels as f32)
87
+ .collect()
88
+ } else {
89
+ samples
90
+ };
91
+
92
+ let mut audio = AudioData::new(mono_samples, sample_rate);
93
+
94
+ // Resample if needed
95
+ if let Some(target) = target_sr {
96
+ if target != sample_rate {
97
+ audio = super::resample::resample(&audio, target)?;
98
+ }
99
+ }
100
+
101
+ Ok(audio)
102
+ }
103
+
104
+ /// Save audio to WAV file
105
+ ///
106
+ /// # Arguments
107
+ /// * `path` - Output path
108
+ /// * `audio` - Audio data to save
109
+ pub fn save_audio<P: AsRef<Path>>(path: P, audio: &AudioData) -> Result<()> {
110
+ let spec = WavSpec {
111
+ channels: 1,
112
+ sample_rate: audio.sample_rate,
113
+ bits_per_sample: 32,
114
+ sample_format: SampleFormat::Float,
115
+ };
116
+
117
+ let mut writer = WavWriter::create(path, spec)
118
+ .map_err(|e| Error::Audio(format!("Failed to create WAV writer: {}", e)))?;
119
+
120
+ for &sample in &audio.samples {
121
+ writer
122
+ .write_sample(sample)
123
+ .map_err(|e| Error::Audio(format!("Failed to write sample: {}", e)))?;
124
+ }
125
+
126
+ writer
127
+ .finalize()
128
+ .map_err(|e| Error::Audio(format!("Failed to finalize WAV: {}", e)))?;
129
+
130
+ Ok(())
131
+ }
132
+
133
+ /// Save audio samples with specified sample rate
134
+ pub fn save_samples<P: AsRef<Path>>(path: P, samples: &[f32], sample_rate: u32) -> Result<()> {
135
+ let audio = AudioData::new(samples.to_vec(), sample_rate);
136
+ save_audio(path, &audio)
137
+ }
138
+
139
+ /// Load multiple audio files in parallel
140
+ pub fn load_audio_batch<P: AsRef<Path> + Sync>(
141
+ paths: &[P],
142
+ target_sr: Option<u32>,
143
+ ) -> Result<Vec<AudioData>> {
144
+ use rayon::prelude::*;
145
+
146
+ paths
147
+ .par_iter()
148
+ .map(|p| load_audio(p, target_sr))
149
+ .collect()
150
+ }
src/audio/mel.rs ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! Mel-spectrogram computation
2
+ //!
3
+ //! Implements Short-Time Fourier Transform (STFT) and mel filterbank
4
+
5
+ use crate::{Error, Result};
6
+ use ndarray::{Array1, Array2, Axis};
7
+ use num_complex::Complex;
8
+ use realfft::RealFftPlanner;
9
+ use std::f32::consts::PI;
10
+
11
+ use super::AudioConfig;
12
+
13
+ /// Mel filterbank for converting linear spectrogram to mel scale
14
+ #[derive(Debug, Clone)]
15
+ pub struct MelFilterbank {
16
+ /// Filterbank matrix (n_mels x n_fft/2+1)
17
+ pub filters: Array2<f32>,
18
+ /// Sample rate
19
+ pub sample_rate: u32,
20
+ /// Number of mel bands
21
+ pub n_mels: usize,
22
+ /// FFT size
23
+ pub n_fft: usize,
24
+ }
25
+
26
+ impl MelFilterbank {
27
+ /// Create mel filterbank
28
+ pub fn new(sample_rate: u32, n_fft: usize, n_mels: usize, fmin: f32, fmax: f32) -> Self {
29
+ let filters = create_mel_filterbank(sample_rate, n_fft, n_mels, fmin, fmax);
30
+ Self {
31
+ filters,
32
+ sample_rate,
33
+ n_mels,
34
+ n_fft,
35
+ }
36
+ }
37
+
38
+ /// Apply filterbank to power spectrogram
39
+ pub fn apply(&self, spectrogram: &Array2<f32>) -> Array2<f32> {
40
+ // spectrogram: (n_fft/2+1, time_frames)
41
+ // filters: (n_mels, n_fft/2+1)
42
+ // output: (n_mels, time_frames)
43
+ self.filters.dot(spectrogram)
44
+ }
45
+ }
46
+
47
+ /// Convert frequency to mel scale
48
+ pub fn hz_to_mel(hz: f32) -> f32 {
49
+ 2595.0 * (1.0 + hz / 700.0).log10()
50
+ }
51
+
52
+ /// Convert mel to frequency
53
+ pub fn mel_to_hz(mel: f32) -> f32 {
54
+ 700.0 * (10f32.powf(mel / 2595.0) - 1.0)
55
+ }
56
+
57
+ /// Create mel filterbank matrix
58
+ fn create_mel_filterbank(
59
+ sample_rate: u32,
60
+ n_fft: usize,
61
+ n_mels: usize,
62
+ fmin: f32,
63
+ fmax: f32,
64
+ ) -> Array2<f32> {
65
+ let n_freqs = n_fft / 2 + 1;
66
+
67
+ // Convert to mel scale
68
+ let mel_min = hz_to_mel(fmin);
69
+ let mel_max = hz_to_mel(fmax);
70
+
71
+ // Create mel points
72
+ let mel_points: Vec<f32> = (0..=n_mels + 1)
73
+ .map(|i| mel_min + (mel_max - mel_min) * i as f32 / (n_mels + 1) as f32)
74
+ .collect();
75
+
76
+ // Convert back to Hz
77
+ let hz_points: Vec<f32> = mel_points.iter().map(|&m| mel_to_hz(m)).collect();
78
+
79
+ // Convert to FFT bin numbers
80
+ let bin_points: Vec<usize> = hz_points
81
+ .iter()
82
+ .map(|&hz| ((n_fft as f32 + 1.0) * hz / sample_rate as f32).floor() as usize)
83
+ .collect();
84
+
85
+ // Create filterbank
86
+ let mut filters = Array2::zeros((n_mels, n_freqs));
87
+
88
+ for m in 0..n_mels {
89
+ let f_left = bin_points[m];
90
+ let f_center = bin_points[m + 1];
91
+ let f_right = bin_points[m + 2];
92
+
93
+ // Left slope
94
+ for k in f_left..f_center {
95
+ if k < n_freqs {
96
+ filters[[m, k]] = (k - f_left) as f32 / (f_center - f_left).max(1) as f32;
97
+ }
98
+ }
99
+
100
+ // Right slope
101
+ for k in f_center..f_right {
102
+ if k < n_freqs {
103
+ filters[[m, k]] = (f_right - k) as f32 / (f_right - f_center).max(1) as f32;
104
+ }
105
+ }
106
+ }
107
+
108
+ filters
109
+ }
110
+
111
+ /// Compute Hann window
112
+ fn hann_window(size: usize) -> Vec<f32> {
113
+ (0..size)
114
+ .map(|n| 0.5 * (1.0 - (2.0 * PI * n as f32 / size as f32).cos()))
115
+ .collect()
116
+ }
117
+
118
+ /// Compute Short-Time Fourier Transform (STFT)
119
+ ///
120
+ /// # Arguments
121
+ /// * `signal` - Input audio signal
122
+ /// * `n_fft` - FFT size
123
+ /// * `hop_length` - Hop length between frames
124
+ /// * `win_length` - Window length (padded to n_fft)
125
+ ///
126
+ /// # Returns
127
+ /// Complex STFT matrix (n_fft/2+1, time_frames)
128
+ pub fn stft(
129
+ signal: &[f32],
130
+ n_fft: usize,
131
+ hop_length: usize,
132
+ win_length: usize,
133
+ ) -> Result<Array2<Complex<f32>>> {
134
+ if signal.is_empty() {
135
+ return Err(Error::Audio("Empty signal".into()));
136
+ }
137
+
138
+ // Create window
139
+ let window = hann_window(win_length);
140
+
141
+ // Pad signal
142
+ let pad_length = n_fft / 2;
143
+ let mut padded = vec![0.0f32; pad_length];
144
+ padded.extend_from_slice(signal);
145
+ padded.extend(vec![0.0f32; pad_length]);
146
+
147
+ // Calculate number of frames
148
+ let num_frames = (padded.len() - n_fft) / hop_length + 1;
149
+ let n_freqs = n_fft / 2 + 1;
150
+
151
+ // Create FFT planner
152
+ let mut planner = RealFftPlanner::<f32>::new();
153
+ let fft = planner.plan_fft_forward(n_fft);
154
+
155
+ // Output matrix
156
+ let mut stft_matrix = Array2::zeros((n_freqs, num_frames));
157
+
158
+ // Process each frame
159
+ let mut input_buffer = vec![0.0f32; n_fft];
160
+ let mut output_buffer = vec![Complex::new(0.0f32, 0.0f32); n_freqs];
161
+
162
+ for (frame_idx, start) in (0..padded.len() - n_fft + 1)
163
+ .step_by(hop_length)
164
+ .enumerate()
165
+ {
166
+ if frame_idx >= num_frames {
167
+ break;
168
+ }
169
+
170
+ // Extract and window the frame
171
+ for i in 0..win_length {
172
+ input_buffer[i] = padded[start + i] * window[i];
173
+ }
174
+ // Zero pad if win_length < n_fft
175
+ for i in win_length..n_fft {
176
+ input_buffer[i] = 0.0;
177
+ }
178
+
179
+ // Perform FFT
180
+ fft.process(&mut input_buffer, &mut output_buffer)
181
+ .map_err(|e| Error::Audio(format!("FFT failed: {}", e)))?;
182
+
183
+ // Store result
184
+ for (freq_idx, &val) in output_buffer.iter().enumerate() {
185
+ stft_matrix[[freq_idx, frame_idx]] = val;
186
+ }
187
+ }
188
+
189
+ Ok(stft_matrix)
190
+ }
191
+
192
+ /// Compute magnitude spectrogram from STFT
193
+ pub fn magnitude_spectrogram(stft_matrix: &Array2<Complex<f32>>) -> Array2<f32> {
194
+ stft_matrix.mapv(|c| c.norm())
195
+ }
196
+
197
+ /// Compute power spectrogram from STFT
198
+ pub fn power_spectrogram(stft_matrix: &Array2<Complex<f32>>) -> Array2<f32> {
199
+ stft_matrix.mapv(|c| c.norm_sqr())
200
+ }
201
+
202
+ /// Compute mel spectrogram from audio signal
203
+ ///
204
+ /// # Arguments
205
+ /// * `signal` - Audio samples
206
+ /// * `config` - Audio configuration
207
+ ///
208
+ /// # Returns
209
+ /// Log mel spectrogram (n_mels, time_frames)
210
+ pub fn mel_spectrogram(signal: &[f32], config: &AudioConfig) -> Result<Array2<f32>> {
211
+ // Compute STFT
212
+ let stft_matrix = stft(signal, config.n_fft, config.hop_length, config.win_length)?;
213
+
214
+ // Compute power spectrogram
215
+ let power_spec = power_spectrogram(&stft_matrix);
216
+
217
+ // Create mel filterbank
218
+ let mel_fb = MelFilterbank::new(
219
+ config.sample_rate,
220
+ config.n_fft,
221
+ config.n_mels,
222
+ config.fmin,
223
+ config.fmax,
224
+ );
225
+
226
+ // Apply mel filterbank
227
+ let mel_spec = mel_fb.apply(&power_spec);
228
+
229
+ // Apply log compression
230
+ let log_mel_spec = mel_spec.mapv(|x| (x.max(1e-10)).ln());
231
+
232
+ Ok(log_mel_spec)
233
+ }
234
+
235
+ /// Compute mel spectrogram with normalization
236
+ pub fn mel_spectrogram_normalized(
237
+ signal: &[f32],
238
+ config: &AudioConfig,
239
+ mean: Option<f32>,
240
+ std: Option<f32>,
241
+ ) -> Result<Array2<f32>> {
242
+ let mut mel_spec = mel_spectrogram(signal, config)?;
243
+
244
+ // Normalize
245
+ if let (Some(m), Some(s)) = (mean, std) {
246
+ mel_spec.mapv_inplace(|x| (x - m) / s);
247
+ } else {
248
+ // Compute statistics from spectrogram
249
+ let m = mel_spec.mean().unwrap_or(0.0);
250
+ let s = mel_spec.std(0.0);
251
+ if s > 1e-8 {
252
+ mel_spec.mapv_inplace(|x| (x - m) / s);
253
+ }
254
+ }
255
+
256
+ Ok(mel_spec)
257
+ }
258
+
259
+ /// Convert mel spectrogram back to linear spectrogram (approximate)
260
+ pub fn mel_to_linear(mel_spec: &Array2<f32>, mel_fb: &MelFilterbank) -> Array2<f32> {
261
+ // Pseudo-inverse of mel filterbank
262
+ let filters_t = mel_fb.filters.t();
263
+ let gram = mel_fb.filters.dot(&filters_t);
264
+
265
+ // Simple approximation using transpose
266
+ filters_t.dot(mel_spec)
267
+ }
268
+
269
+ /// Compute spectrogram energy per frame
270
+ pub fn frame_energy(mel_spec: &Array2<f32>) -> Array1<f32> {
271
+ mel_spec.sum_axis(Axis(0))
272
+ }
273
+
274
+ /// Detect voice activity based on energy threshold
275
+ pub fn voice_activity_detection(mel_spec: &Array2<f32>, threshold_db: f32) -> Vec<bool> {
276
+ let energy = frame_energy(mel_spec);
277
+ let max_energy = energy.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
278
+ let threshold = max_energy + threshold_db; // threshold_db is negative
279
+
280
+ energy.iter().map(|&e| e > threshold).collect()
281
+ }
282
+
283
+ #[cfg(test)]
284
+ mod tests {
285
+ use super::*;
286
+
287
+ #[test]
288
+ fn test_hz_to_mel() {
289
+ // Test known conversions
290
+ assert!((hz_to_mel(0.0) - 0.0).abs() < 1e-6);
291
+ assert!((hz_to_mel(1000.0) - 1000.0).abs() < 50.0); // Roughly linear at low freqs
292
+ }
293
+
294
+ #[test]
295
+ fn test_mel_to_hz() {
296
+ // Round trip
297
+ let hz = 440.0;
298
+ let mel = hz_to_mel(hz);
299
+ let hz_back = mel_to_hz(mel);
300
+ assert!((hz - hz_back).abs() < 1e-4);
301
+ }
302
+
303
+ #[test]
304
+ fn test_mel_filterbank_creation() {
305
+ let fb = MelFilterbank::new(22050, 1024, 80, 0.0, 8000.0);
306
+ assert_eq!(fb.filters.shape(), &[80, 513]);
307
+
308
+ // Check that filters are non-empty (some filter banks have coverage)
309
+ let total_sum: f32 = fb.filters.iter().sum();
310
+ assert!(total_sum > 0.0, "Filterbank should have some non-zero values");
311
+ }
312
+
313
+ #[test]
314
+ fn test_hann_window() {
315
+ let window = hann_window(1024);
316
+ assert_eq!(window.len(), 1024);
317
+ // Check endpoints are near zero
318
+ assert!(window[0].abs() < 1e-6);
319
+ // Check middle is near 1
320
+ assert!((window[512] - 1.0).abs() < 1e-4);
321
+ }
322
+
323
+ #[test]
324
+ fn test_stft_basic() {
325
+ // Create a simple sine wave
326
+ let sr = 22050;
327
+ let freq = 440.0;
328
+ let duration = 0.1;
329
+ let num_samples = (sr as f32 * duration) as usize;
330
+
331
+ let signal: Vec<f32> = (0..num_samples)
332
+ .map(|i| (2.0 * PI * freq * i as f32 / sr as f32).sin())
333
+ .collect();
334
+
335
+ let result = stft(&signal, 1024, 256, 1024);
336
+ assert!(result.is_ok());
337
+
338
+ let stft_matrix = result.unwrap();
339
+ assert_eq!(stft_matrix.shape()[0], 513); // n_fft/2 + 1
340
+ assert!(stft_matrix.shape()[1] > 0); // Some frames
341
+ }
342
+
343
+ #[test]
344
+ fn test_mel_spectrogram() {
345
+ let config = AudioConfig::default();
346
+ let num_samples = (config.sample_rate as f32 * 0.1) as usize;
347
+ let signal: Vec<f32> = (0..num_samples).map(|i| (i as f32 * 0.01).sin()).collect();
348
+
349
+ let result = mel_spectrogram(&signal, &config);
350
+ assert!(result.is_ok());
351
+
352
+ let mel_spec = result.unwrap();
353
+ assert_eq!(mel_spec.shape()[0], config.n_mels);
354
+ assert!(mel_spec.shape()[1] > 0);
355
+ }
356
+ }
src/audio/mod.rs ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! Audio processing module for IndexTTS
2
+ //!
3
+ //! Provides mel-spectrogram computation, audio I/O, and DSP operations.
4
+
5
+ mod dsp;
6
+ mod io;
7
+ pub mod mel;
8
+ mod resample;
9
+
10
+ pub use dsp::{
11
+ apply_fade, apply_preemphasis, dynamic_range_compression, dynamic_range_decompression,
12
+ normalize_audio, normalize_audio_peak,
13
+ };
14
+ pub use io::{load_audio, save_audio, AudioData};
15
+ pub use mel::{mel_spectrogram, mel_to_linear, MelFilterbank};
16
+ pub use resample::resample;
17
+
18
+ use crate::Result;
19
+
20
+ /// Audio processing configuration
21
+ #[derive(Debug, Clone)]
22
+ pub struct AudioConfig {
23
+ /// Sample rate
24
+ pub sample_rate: u32,
25
+ /// FFT size
26
+ pub n_fft: usize,
27
+ /// Hop length for STFT
28
+ pub hop_length: usize,
29
+ /// Window length
30
+ pub win_length: usize,
31
+ /// Number of mel bands
32
+ pub n_mels: usize,
33
+ /// Minimum frequency
34
+ pub fmin: f32,
35
+ /// Maximum frequency
36
+ pub fmax: f32,
37
+ }
38
+
39
+ impl Default for AudioConfig {
40
+ fn default() -> Self {
41
+ Self {
42
+ sample_rate: 22050,
43
+ n_fft: 1024,
44
+ hop_length: 256,
45
+ win_length: 1024,
46
+ n_mels: 80,
47
+ fmin: 0.0,
48
+ fmax: 8000.0,
49
+ }
50
+ }
51
+ }
52
+
53
+ /// Compute mel spectrogram from audio file
54
+ pub fn compute_mel_from_file(path: &str, config: &AudioConfig) -> Result<ndarray::Array2<f32>> {
55
+ let audio = load_audio(path, Some(config.sample_rate))?;
56
+ mel_spectrogram(&audio.samples, config)
57
+ }
src/audio/resample.rs ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! Audio resampling using rubato
2
+
3
+ use crate::{Error, Result};
4
+ use rubato::{
5
+ FastFixedIn, PolynomialDegree, Resampler,
6
+ };
7
+
8
+ use super::AudioData;
9
+
10
+ /// Resample audio to target sample rate
11
+ ///
12
+ /// Uses high-quality sinc interpolation
13
+ pub fn resample(audio: &AudioData, target_sr: u32) -> Result<AudioData> {
14
+ if audio.sample_rate == target_sr {
15
+ return Ok(audio.clone());
16
+ }
17
+
18
+ let resample_ratio = target_sr as f64 / audio.sample_rate as f64;
19
+
20
+ // Create resampler
21
+ let mut resampler = FastFixedIn::<f32>::new(
22
+ resample_ratio,
23
+ 1.0, // max relative ratio (no variance)
24
+ PolynomialDegree::Cubic,
25
+ 1024, // chunk size
26
+ 1, // channels
27
+ ).map_err(|e| Error::Audio(format!("Failed to create resampler: {}", e)))?;
28
+
29
+ // Process in chunks
30
+ let input_frames_needed = resampler.input_frames_next();
31
+ let mut input_buffer = vec![vec![0.0f32; input_frames_needed]];
32
+ let mut output_samples = Vec::new();
33
+
34
+ let mut pos = 0;
35
+ while pos < audio.samples.len() {
36
+ // Fill input buffer
37
+ let end = (pos + input_frames_needed).min(audio.samples.len());
38
+ let chunk_size = end - pos;
39
+
40
+ input_buffer[0][..chunk_size].copy_from_slice(&audio.samples[pos..end]);
41
+
42
+ // Pad with zeros if needed
43
+ if chunk_size < input_frames_needed {
44
+ input_buffer[0][chunk_size..].fill(0.0);
45
+ }
46
+
47
+ // Resample
48
+ let output = resampler
49
+ .process(&input_buffer, None)
50
+ .map_err(|e| Error::Audio(format!("Resampling failed: {}", e)))?;
51
+
52
+ output_samples.extend_from_slice(&output[0]);
53
+ pos += chunk_size;
54
+
55
+ if chunk_size < input_frames_needed {
56
+ break;
57
+ }
58
+ }
59
+
60
+ // Trim to expected length
61
+ let expected_len = (audio.samples.len() as f64 * resample_ratio).ceil() as usize;
62
+ output_samples.truncate(expected_len);
63
+
64
+ Ok(AudioData::new(output_samples, target_sr))
65
+ }
66
+
67
+ /// Resample to 22050 Hz (common TTS sample rate)
68
+ pub fn resample_to_22k(audio: &AudioData) -> Result<AudioData> {
69
+ resample(audio, 22050)
70
+ }
71
+
72
+ /// Resample to 16000 Hz (common for ASR)
73
+ pub fn resample_to_16k(audio: &AudioData) -> Result<AudioData> {
74
+ resample(audio, 16000)
75
+ }