addy-hypr4 commited on
Commit
c9658c4
·
1 Parent(s): dc9b7ec

feat: restructure repo to match runtime, add missing piper voices, and update README/manifest

Browse files
.gitattributes CHANGED
@@ -39,3 +39,10 @@ tts/*.onnx filter=lfs diff=lfs merge=lfs -text
39
  tts/*.bin filter=lfs diff=lfs merge=lfs -text
40
  tts/*.tar.gz filter=lfs diff=lfs merge=lfs -text
41
  vad/*.onnx filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
39
  tts/*.bin filter=lfs diff=lfs merge=lfs -text
40
  tts/*.tar.gz filter=lfs diff=lfs merge=lfs -text
41
  vad/*.onnx filter=lfs diff=lfs merge=lfs -text
42
+ tts/piper_hi/*.onnx filter=lfs diff=lfs merge=lfs -text
43
+ tts/piper_hi/*.tar.gz filter=lfs diff=lfs merge=lfs -text
44
+ llm/gemma4/*.gguf filter=lfs diff=lfs merge=lfs -text
45
+ stt/qwen3-asr/*.onnx filter=lfs diff=lfs merge=lfs -text
46
+ tts/kokoro/model.onnx filter=lfs diff=lfs merge=lfs -text
47
+ tts/kokoro/*.bin filter=lfs diff=lfs merge=lfs -text
48
+ tts/kokoro/*.tar.gz filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,47 @@
1
  ---
2
  license: apache-2.0
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
+ tags:
4
+ - voice
5
+ - stt
6
+ - tts
7
+ - llm
8
+ - vox
9
+ - real-time
10
+ - edge-inference
11
+ library_name: generic
12
  ---
13
+
14
+ # Vox Models
15
+
16
+ This repository serves as the official model host for **Vox**, a real-time, local-first voice-to-voice system. It contains specialized models for Voice Activity Detection (VAD), Speech-to-Text (STT), Large Language Models (LLM), and Text-to-Speech (TTS).
17
+
18
+ ## Directory Structure
19
+
20
+ The structure of this repository exactly mirrors the runtime expectations of the Vox backend:
21
+
22
+ ```text
23
+ .
24
+ ├── manifest.json # Single source of truth for all models
25
+ ├── llm/
26
+ │ └── gemma4/ # Large Language Models (GGUF)
27
+ ├── stt/
28
+ │ └── qwen3-asr/ # Speech-to-Text (ONNX)
29
+ │ └── tokenizer/ # STT Tokenizer configs
30
+ ├── tts/
31
+ │ ├── kokoro/ # English TTS (Kokoro ONNX)
32
+ │ └── piper_hi/ # Hindi TTS (Piper ONNX)
33
+ └── vad/
34
+ └── ten_vad.onnx # Voice Activity Detection (ONNX)
35
+ ```
36
+
37
+ ## Manifest
38
+
39
+ The `manifest.json` file in the root directory provides metadata for automated management, including:
40
+ - Relative file paths
41
+ - Exact byte sizes
42
+ - SHA256 hashes for integrity verification
43
+ - Archive markers for compressed assets (e.g., `espeak-ng-data`)
44
+
45
+ ## Usage
46
+
47
+ These models are intended to be downloaded and managed by the Vox application runtime. For manual use, ensure you have [Git LFS](https://git-lfs.github.com/) installed to correctly retrieve the large model weights.
llm/{google_gemma-4-E2B-it-Q4_K_M.gguf → gemma4/google_gemma-4-E2B-it-Q4_K_M.gguf} RENAMED
File without changes
manifests/manifest.json → manifest.json RENAMED
@@ -1,6 +1,6 @@
1
  {
2
  "version": "1.0.0",
3
- "total_size_bytes": 4924726403,
4
  "models": [
5
  {
6
  "id": "ten_vad",
@@ -10,82 +10,113 @@
10
  },
11
  {
12
  "id": "stt_conv_frontend",
13
- "path": "stt/conv_frontend.onnx",
14
  "size": 44148281,
15
- "sha256": "d22dc4423e0940e49884e903d2ea2f7e5567c14fc1aed97e4e26d6b8f208ef9e"
16
  },
17
  {
18
  "id": "stt_encoder",
19
- "path": "stt/encoder.int8.onnx",
20
  "size": 182491662,
21
- "sha256": "60748d3e6744a57c9c91e1b17424a6c2990567e8adceb0783940c03ed98fa9d9"
22
  },
23
  {
24
  "id": "stt_decoder",
25
- "path": "stt/decoder.int8.onnx",
26
  "size": 755914231,
27
- "sha256": "4f6885be5959ae26af3089d38ee7972c5fafbeeb1cf8d5e76eab6d8b61ca5771"
28
  },
29
  {
30
  "id": "stt_vocab",
31
- "path": "stt/vocab.json",
32
  "size": 2776833,
33
  "sha256": "ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910"
34
  },
35
  {
36
  "id": "stt_merges",
37
- "path": "stt/merges.txt",
38
  "size": 1671853,
39
  "sha256": "8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5"
40
  },
41
  {
42
  "id": "stt_config",
43
- "path": "stt/tokenizer_config.json",
44
  "size": 12487,
45
  "sha256": "4942d005604266809309cabc9f4e9cb89ce855d59b14681fdc0e1cc62ea26c4c"
46
  },
47
  {
48
  "id": "llm_gemma_4_q4_k_m",
49
- "path": "llm/google_gemma-4-E2B-it-Q4_K_M.gguf",
50
  "size": 3462678272,
51
  "sha256": "b5310340b3a23d31655d7119d100d5df1b2d8ee17b3ca8b0a23ad7e9eb5fa705"
52
  },
53
  {
54
  "id": "tts_kokoro_onnx",
55
- "path": "tts/kokoro-v0.19.onnx",
56
  "size": 345555491,
57
- "sha256": "10ff414106a038ce7e9e0126c6461e4dc8a86efaa89dc91d2009d69fe635e339"
58
  },
59
  {
60
  "id": "tts_kokoro_voices",
61
- "path": "tts/kokoro-voices.bin",
62
  "size": 5755904,
63
- "sha256": "a372c67b056ef0b695c375d39b99630d23fb07ad4c8d87aa32a19a62fca523ad"
64
  },
65
  {
66
  "id": "tts_kokoro_tokens",
67
- "path": "tts/kokoro-tokens.txt",
68
  "size": 1078,
69
  "sha256": "4f31c71282d14af4e926cd12462078fe9d20d00c589e63fe2750a8f56d6d7f7b"
70
  },
71
  {
72
- "id": "tts_espeak_ng_data",
73
- "path": "tts/espeak-ng-data.tar.gz",
74
  "size": 8990857,
75
  "sha256": "7ddfb7247e98108baeab2a3f9c79c29247e58ae7d60d837b32e28999f555bf8d",
76
  "archive": "tar.gz"
77
  },
78
  {
79
- "id": "tts_hi_piper_onnx",
80
- "path": "tts/hi_IN-priyamvada-medium.onnx",
 
 
 
 
 
 
 
 
 
 
 
 
81
  "size": 63145178,
82
- "sha256": "8871f3e07adb6ca490f8dbcd3956a8647c53c35b5d0a1c2a8d097b3bf721a31b"
83
  },
84
  {
85
- "id": "tts_hi_piper_config",
86
- "path": "tts/hi_IN-priyamvada-medium.onnx.json",
87
  "size": 4973,
88
  "sha256": "5efc0ccf7529f3528996d46e0fac1f969f681d44a8e55bfa6236ff8841b5d52d"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  }
90
  ]
91
  }
 
1
  {
2
  "version": "1.0.0",
3
+ "total_size_bytes": 5008946279,
4
  "models": [
5
  {
6
  "id": "ten_vad",
 
10
  },
11
  {
12
  "id": "stt_conv_frontend",
13
+ "path": "stt/qwen3-asr/conv_frontend.onnx",
14
  "size": 44148281,
15
+ "sha256": "63f8124374825bbde0c0f94d85426cca753cbdd5060085b1233463007165c8d3"
16
  },
17
  {
18
  "id": "stt_encoder",
19
+ "path": "stt/qwen3-asr/encoder.int8.onnx",
20
  "size": 182491662,
21
+ "sha256": "77845dcff9a40ce1327a0ad4c55966c6181aed438c9e56e3d3535c1132301ef6"
22
  },
23
  {
24
  "id": "stt_decoder",
25
+ "path": "stt/qwen3-asr/decoder.int8.onnx",
26
  "size": 755914231,
27
+ "sha256": "1bb2a998a84d5422af985c00af8f169fa433a8df4df0b26ab4040095141f88c2"
28
  },
29
  {
30
  "id": "stt_vocab",
31
+ "path": "stt/qwen3-asr/tokenizer/vocab.json",
32
  "size": 2776833,
33
  "sha256": "ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910"
34
  },
35
  {
36
  "id": "stt_merges",
37
+ "path": "stt/qwen3-asr/tokenizer/merges.txt",
38
  "size": 1671853,
39
  "sha256": "8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5"
40
  },
41
  {
42
  "id": "stt_config",
43
+ "path": "stt/qwen3-asr/tokenizer/tokenizer_config.json",
44
  "size": 12487,
45
  "sha256": "4942d005604266809309cabc9f4e9cb89ce855d59b14681fdc0e1cc62ea26c4c"
46
  },
47
  {
48
  "id": "llm_gemma_4_q4_k_m",
49
+ "path": "llm/gemma4/google_gemma-4-E2B-it-Q4_K_M.gguf",
50
  "size": 3462678272,
51
  "sha256": "b5310340b3a23d31655d7119d100d5df1b2d8ee17b3ca8b0a23ad7e9eb5fa705"
52
  },
53
  {
54
  "id": "tts_kokoro_onnx",
55
+ "path": "tts/kokoro/model.onnx",
56
  "size": 345555491,
57
+ "sha256": "6b64b1473beb7efa5704e821a4384839b722bdc671ef7300ad640a2f301a8c0b"
58
  },
59
  {
60
  "id": "tts_kokoro_voices",
61
+ "path": "tts/kokoro/voices.bin",
62
  "size": 5755904,
63
+ "sha256": "4c5c6bde45060417d61978970aa7684b25e8d3d7cc5349a4f3222474899a8388"
64
  },
65
  {
66
  "id": "tts_kokoro_tokens",
67
+ "path": "tts/kokoro/tokens.txt",
68
  "size": 1078,
69
  "sha256": "4f31c71282d14af4e926cd12462078fe9d20d00c589e63fe2750a8f56d6d7f7b"
70
  },
71
  {
72
+ "id": "tts_kokoro_espeak_ng_data",
73
+ "path": "tts/kokoro/espeak-ng-data.tar.gz",
74
  "size": 8990857,
75
  "sha256": "7ddfb7247e98108baeab2a3f9c79c29247e58ae7d60d837b32e28999f555bf8d",
76
  "archive": "tar.gz"
77
  },
78
  {
79
+ "id": "tts_hi_pratham_onnx",
80
+ "path": "tts/piper_hi/hi_IN-pratham-medium.onnx",
81
+ "size": 63516050,
82
+ "sha256": "169964b0871667f6793416d4b35e97357a68ba1ad01df8580c28048989ee7693"
83
+ },
84
+ {
85
+ "id": "tts_hi_pratham_config",
86
+ "path": "tts/piper_hi/hi_IN-pratham-medium.onnx.json",
87
+ "size": 4970,
88
+ "sha256": "b68edd2cd7950dd436314013b7cd12e9699e5a3f6fe5af5af94294cf6aa7b9fd"
89
+ },
90
+ {
91
+ "id": "tts_hi_priyamvada_onnx",
92
+ "path": "tts/piper_hi/hi_IN-priyamvada-medium.onnx",
93
  "size": 63145178,
94
+ "sha256": "91f62f46662fad367a977a48abd30ee1a7f178a6f2f3e3930361a8f6c0a92421"
95
  },
96
  {
97
+ "id": "tts_hi_priyamvada_config",
98
+ "path": "tts/piper_hi/hi_IN-priyamvada-medium.onnx.json",
99
  "size": 4973,
100
  "sha256": "5efc0ccf7529f3528996d46e0fac1f969f681d44a8e55bfa6236ff8841b5d52d"
101
+ },
102
+ {
103
+ "id": "tts_hi_rohan_onnx",
104
+ "path": "tts/piper_hi/hi_IN-rohan-medium.onnx",
105
+ "size": 62950044,
106
+ "sha256": "b65dc80fb34d9dcd1cf684cb297966a34983bbc93bb1696fe207f32b0b33a091"
107
+ },
108
+ {
109
+ "id": "tts_hi_rohan_config",
110
+ "path": "tts/piper_hi/hi_IN-rohan-medium.onnx.json",
111
+ "size": 5041,
112
+ "sha256": "07b9ae19bd0bac7fbbc99f7ee69c91245eb5470e926632c31fc0c50ba653c817"
113
+ },
114
+ {
115
+ "id": "tts_piper_hi_espeak_ng_data",
116
+ "path": "tts/piper_hi/espeak-ng-data.tar.gz",
117
+ "size": 8990869,
118
+ "sha256": "5d26294483bff947820b8ebf55fde437622c7c542eb104649e29626ab1763524",
119
+ "archive": "tar.gz"
120
  }
121
  ]
122
  }
stt/{conv_frontend.onnx → qwen3-asr/conv_frontend.onnx} RENAMED
File without changes
stt/{decoder.int8.onnx → qwen3-asr/decoder.int8.onnx} RENAMED
File without changes
stt/{encoder.int8.onnx → qwen3-asr/encoder.int8.onnx} RENAMED
File without changes
stt/{merges.txt → qwen3-asr/tokenizer/merges.txt} RENAMED
File without changes
stt/{tokenizer_config.json → qwen3-asr/tokenizer/tokenizer_config.json} RENAMED
File without changes
stt/{vocab.json → qwen3-asr/tokenizer/vocab.json} RENAMED
File without changes
tts/{espeak-ng-data.tar.gz → kokoro/espeak-ng-data.tar.gz} RENAMED
File without changes
tts/{kokoro-v0.19.onnx → kokoro/model.onnx} RENAMED
File without changes
tts/{kokoro-tokens.txt → kokoro/tokens.txt} RENAMED
File without changes
tts/{kokoro-voices.bin → kokoro/voices.bin} RENAMED
File without changes
tts/piper_hi/espeak-ng-data.tar.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d26294483bff947820b8ebf55fde437622c7c542eb104649e29626ab1763524
3
+ size 8990869
tts/piper_hi/hi_IN-pratham-medium.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:169964b0871667f6793416d4b35e97357a68ba1ad01df8580c28048989ee7693
3
+ size 63516050
tts/piper_hi/hi_IN-pratham-medium.onnx.json ADDED
@@ -0,0 +1,502 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "pratham",
3
+ "audio": {
4
+ "sample_rate": 22050,
5
+ "quality": "medium"
6
+ },
7
+ "espeak": {
8
+ "voice": "hi"
9
+ },
10
+ "language": {
11
+ "code": "hi_IN",
12
+ "family": "hi",
13
+ "region": "IN",
14
+ "name_native": "हिन्दी",
15
+ "name_english": "Hindi",
16
+ "country_english": "India"
17
+ },
18
+ "inference": {
19
+ "noise_scale": 0.667,
20
+ "length_scale": 1,
21
+ "noise_w": 0.8
22
+ },
23
+ "phoneme_type": "espeak",
24
+ "phoneme_map": {},
25
+ "phoneme_id_map": {
26
+ " ": [
27
+ 3
28
+ ],
29
+ "!": [
30
+ 4
31
+ ],
32
+ "\"": [
33
+ 150
34
+ ],
35
+ "#": [
36
+ 149
37
+ ],
38
+ "$": [
39
+ 2
40
+ ],
41
+ "'": [
42
+ 5
43
+ ],
44
+ "(": [
45
+ 6
46
+ ],
47
+ ")": [
48
+ 7
49
+ ],
50
+ ",": [
51
+ 8
52
+ ],
53
+ "-": [
54
+ 9
55
+ ],
56
+ ".": [
57
+ 10
58
+ ],
59
+ "0": [
60
+ 130
61
+ ],
62
+ "1": [
63
+ 131
64
+ ],
65
+ "2": [
66
+ 132
67
+ ],
68
+ "3": [
69
+ 133
70
+ ],
71
+ "4": [
72
+ 134
73
+ ],
74
+ "5": [
75
+ 135
76
+ ],
77
+ "6": [
78
+ 136
79
+ ],
80
+ "7": [
81
+ 137
82
+ ],
83
+ "8": [
84
+ 138
85
+ ],
86
+ "9": [
87
+ 139
88
+ ],
89
+ ":": [
90
+ 11
91
+ ],
92
+ ";": [
93
+ 12
94
+ ],
95
+ "?": [
96
+ 13
97
+ ],
98
+ "X": [
99
+ 156
100
+ ],
101
+ "^": [
102
+ 1
103
+ ],
104
+ "_": [
105
+ 0
106
+ ],
107
+ "a": [
108
+ 14
109
+ ],
110
+ "b": [
111
+ 15
112
+ ],
113
+ "c": [
114
+ 16
115
+ ],
116
+ "d": [
117
+ 17
118
+ ],
119
+ "e": [
120
+ 18
121
+ ],
122
+ "f": [
123
+ 19
124
+ ],
125
+ "g": [
126
+ 154
127
+ ],
128
+ "h": [
129
+ 20
130
+ ],
131
+ "i": [
132
+ 21
133
+ ],
134
+ "j": [
135
+ 22
136
+ ],
137
+ "k": [
138
+ 23
139
+ ],
140
+ "l": [
141
+ 24
142
+ ],
143
+ "m": [
144
+ 25
145
+ ],
146
+ "n": [
147
+ 26
148
+ ],
149
+ "o": [
150
+ 27
151
+ ],
152
+ "p": [
153
+ 28
154
+ ],
155
+ "q": [
156
+ 29
157
+ ],
158
+ "r": [
159
+ 30
160
+ ],
161
+ "s": [
162
+ 31
163
+ ],
164
+ "t": [
165
+ 32
166
+ ],
167
+ "u": [
168
+ 33
169
+ ],
170
+ "v": [
171
+ 34
172
+ ],
173
+ "w": [
174
+ 35
175
+ ],
176
+ "x": [
177
+ 36
178
+ ],
179
+ "y": [
180
+ 37
181
+ ],
182
+ "z": [
183
+ 38
184
+ ],
185
+ "æ": [
186
+ 39
187
+ ],
188
+ "ç": [
189
+ 40
190
+ ],
191
+ "ð": [
192
+ 41
193
+ ],
194
+ "ø": [
195
+ 42
196
+ ],
197
+ "ħ": [
198
+ 43
199
+ ],
200
+ "ŋ": [
201
+ 44
202
+ ],
203
+ "œ": [
204
+ 45
205
+ ],
206
+ "ǀ": [
207
+ 46
208
+ ],
209
+ "ǁ": [
210
+ 47
211
+ ],
212
+ "ǂ": [
213
+ 48
214
+ ],
215
+ "ǃ": [
216
+ 49
217
+ ],
218
+ "ɐ": [
219
+ 50
220
+ ],
221
+ "ɑ": [
222
+ 51
223
+ ],
224
+ "ɒ": [
225
+ 52
226
+ ],
227
+ "ɓ": [
228
+ 53
229
+ ],
230
+ "ɔ": [
231
+ 54
232
+ ],
233
+ "ɕ": [
234
+ 55
235
+ ],
236
+ "ɖ": [
237
+ 56
238
+ ],
239
+ "ɗ": [
240
+ 57
241
+ ],
242
+ "ɘ": [
243
+ 58
244
+ ],
245
+ "ə": [
246
+ 59
247
+ ],
248
+ "ɚ": [
249
+ 60
250
+ ],
251
+ "ɛ": [
252
+ 61
253
+ ],
254
+ "ɜ": [
255
+ 62
256
+ ],
257
+ "ɞ": [
258
+ 63
259
+ ],
260
+ "ɟ": [
261
+ 64
262
+ ],
263
+ "ɠ": [
264
+ 65
265
+ ],
266
+ "ɡ": [
267
+ 66
268
+ ],
269
+ "ɢ": [
270
+ 67
271
+ ],
272
+ "ɣ": [
273
+ 68
274
+ ],
275
+ "ɤ": [
276
+ 69
277
+ ],
278
+ "ɥ": [
279
+ 70
280
+ ],
281
+ "ɦ": [
282
+ 71
283
+ ],
284
+ "ɧ": [
285
+ 72
286
+ ],
287
+ "ɨ": [
288
+ 73
289
+ ],
290
+ "ɪ": [
291
+ 74
292
+ ],
293
+ "ɫ": [
294
+ 75
295
+ ],
296
+ "ɬ": [
297
+ 76
298
+ ],
299
+ "ɭ": [
300
+ 77
301
+ ],
302
+ "ɮ": [
303
+ 78
304
+ ],
305
+ "ɯ": [
306
+ 79
307
+ ],
308
+ "ɰ": [
309
+ 80
310
+ ],
311
+ "ɱ": [
312
+ 81
313
+ ],
314
+ "ɲ": [
315
+ 82
316
+ ],
317
+ "ɳ": [
318
+ 83
319
+ ],
320
+ "ɴ": [
321
+ 84
322
+ ],
323
+ "ɵ": [
324
+ 85
325
+ ],
326
+ "ɶ": [
327
+ 86
328
+ ],
329
+ "ɸ": [
330
+ 87
331
+ ],
332
+ "ɹ": [
333
+ 88
334
+ ],
335
+ "ɺ": [
336
+ 89
337
+ ],
338
+ "ɻ": [
339
+ 90
340
+ ],
341
+ "ɽ": [
342
+ 91
343
+ ],
344
+ "ɾ": [
345
+ 92
346
+ ],
347
+ "ʀ": [
348
+ 93
349
+ ],
350
+ "ʁ": [
351
+ 94
352
+ ],
353
+ "ʂ": [
354
+ 95
355
+ ],
356
+ "ʃ": [
357
+ 96
358
+ ],
359
+ "ʄ": [
360
+ 97
361
+ ],
362
+ "ʈ": [
363
+ 98
364
+ ],
365
+ "ʉ": [
366
+ 99
367
+ ],
368
+ "ʊ": [
369
+ 100
370
+ ],
371
+ "ʋ": [
372
+ 101
373
+ ],
374
+ "ʌ": [
375
+ 102
376
+ ],
377
+ "ʍ": [
378
+ 103
379
+ ],
380
+ "ʎ": [
381
+ 104
382
+ ],
383
+ "ʏ": [
384
+ 105
385
+ ],
386
+ "ʐ": [
387
+ 106
388
+ ],
389
+ "ʑ": [
390
+ 107
391
+ ],
392
+ "ʒ": [
393
+ 108
394
+ ],
395
+ "ʔ": [
396
+ 109
397
+ ],
398
+ "ʕ": [
399
+ 110
400
+ ],
401
+ "ʘ": [
402
+ 111
403
+ ],
404
+ "ʙ": [
405
+ 112
406
+ ],
407
+ "ʛ": [
408
+ 113
409
+ ],
410
+ "ʜ": [
411
+ 114
412
+ ],
413
+ "ʝ": [
414
+ 115
415
+ ],
416
+ "ʟ": [
417
+ 116
418
+ ],
419
+ "ʡ": [
420
+ 117
421
+ ],
422
+ "ʢ": [
423
+ 118
424
+ ],
425
+ "ʦ": [
426
+ 155
427
+ ],
428
+ "ʰ": [
429
+ 145
430
+ ],
431
+ "ʲ": [
432
+ 119
433
+ ],
434
+ "ˈ": [
435
+ 120
436
+ ],
437
+ "ˌ": [
438
+ 121
439
+ ],
440
+ "ː": [
441
+ 122
442
+ ],
443
+ "ˑ": [
444
+ 123
445
+ ],
446
+ "˞": [
447
+ 124
448
+ ],
449
+ "ˤ": [
450
+ 146
451
+ ],
452
+ "̃": [
453
+ 141
454
+ ],
455
+ "̧": [
456
+ 140
457
+ ],
458
+ "̩": [
459
+ 144
460
+ ],
461
+ "̪": [
462
+ 142
463
+ ],
464
+ "̯": [
465
+ 143
466
+ ],
467
+ "̺": [
468
+ 152
469
+ ],
470
+ "̻": [
471
+ 153
472
+ ],
473
+ "β": [
474
+ 125
475
+ ],
476
+ "ε": [
477
+ 147
478
+ ],
479
+ "θ": [
480
+ 126
481
+ ],
482
+ "χ": [
483
+ 127
484
+ ],
485
+ "ᵻ": [
486
+ 128
487
+ ],
488
+ "↑": [
489
+ 151
490
+ ],
491
+ "↓": [
492
+ 148
493
+ ],
494
+ "ⱱ": [
495
+ 129
496
+ ]
497
+ },
498
+ "num_symbols": 256,
499
+ "num_speakers": 1,
500
+ "speaker_id_map": {},
501
+ "piper_version": "1.0.0"
502
+ }
tts/{hi_IN-priyamvada-medium.onnx → piper_hi/hi_IN-priyamvada-medium.onnx} RENAMED
File without changes
tts/{hi_IN-priyamvada-medium.onnx.json → piper_hi/hi_IN-priyamvada-medium.onnx.json} RENAMED
File without changes
tts/piper_hi/hi_IN-rohan-medium.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b65dc80fb34d9dcd1cf684cb297966a34983bbc93bb1696fe207f32b0b33a091
3
+ size 62950044
tts/piper_hi/hi_IN-rohan-medium.onnx.json ADDED
@@ -0,0 +1,512 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "audio": {
3
+ "sample_rate": 22050,
4
+ "quality": "medium"
5
+ },
6
+ "espeak": {
7
+ "voice": "hi"
8
+ },
9
+ "phoneme_type": "espeak",
10
+ "piper_version": "1.3.0",
11
+ "num_symbols": 256,
12
+ "num_speakers": 1,
13
+ "inference": {
14
+ "noise_scale": 0.667,
15
+ "length_scale": 1.0,
16
+ "noise_w": 0.8
17
+ },
18
+ "phoneme_id_map": {
19
+ "_": [
20
+ 0
21
+ ],
22
+ "^": [
23
+ 1
24
+ ],
25
+ "$": [
26
+ 2
27
+ ],
28
+ " ": [
29
+ 3
30
+ ],
31
+ "!": [
32
+ 4
33
+ ],
34
+ "'": [
35
+ 5
36
+ ],
37
+ "(": [
38
+ 6
39
+ ],
40
+ ")": [
41
+ 7
42
+ ],
43
+ ",": [
44
+ 8
45
+ ],
46
+ "-": [
47
+ 9
48
+ ],
49
+ ".": [
50
+ 10
51
+ ],
52
+ ":": [
53
+ 11
54
+ ],
55
+ ";": [
56
+ 12
57
+ ],
58
+ "?": [
59
+ 13
60
+ ],
61
+ "a": [
62
+ 14
63
+ ],
64
+ "b": [
65
+ 15
66
+ ],
67
+ "c": [
68
+ 16
69
+ ],
70
+ "d": [
71
+ 17
72
+ ],
73
+ "e": [
74
+ 18
75
+ ],
76
+ "f": [
77
+ 19
78
+ ],
79
+ "h": [
80
+ 20
81
+ ],
82
+ "i": [
83
+ 21
84
+ ],
85
+ "j": [
86
+ 22
87
+ ],
88
+ "k": [
89
+ 23
90
+ ],
91
+ "l": [
92
+ 24
93
+ ],
94
+ "m": [
95
+ 25
96
+ ],
97
+ "n": [
98
+ 26
99
+ ],
100
+ "o": [
101
+ 27
102
+ ],
103
+ "p": [
104
+ 28
105
+ ],
106
+ "q": [
107
+ 29
108
+ ],
109
+ "r": [
110
+ 30
111
+ ],
112
+ "s": [
113
+ 31
114
+ ],
115
+ "t": [
116
+ 32
117
+ ],
118
+ "u": [
119
+ 33
120
+ ],
121
+ "v": [
122
+ 34
123
+ ],
124
+ "w": [
125
+ 35
126
+ ],
127
+ "x": [
128
+ 36
129
+ ],
130
+ "y": [
131
+ 37
132
+ ],
133
+ "z": [
134
+ 38
135
+ ],
136
+ "æ": [
137
+ 39
138
+ ],
139
+ "ç": [
140
+ 40
141
+ ],
142
+ "ð": [
143
+ 41
144
+ ],
145
+ "ø": [
146
+ 42
147
+ ],
148
+ "ħ": [
149
+ 43
150
+ ],
151
+ "ŋ": [
152
+ 44
153
+ ],
154
+ "œ": [
155
+ 45
156
+ ],
157
+ "ǀ": [
158
+ 46
159
+ ],
160
+ "ǁ": [
161
+ 47
162
+ ],
163
+ "ǂ": [
164
+ 48
165
+ ],
166
+ "ǃ": [
167
+ 49
168
+ ],
169
+ "ɐ": [
170
+ 50
171
+ ],
172
+ "ɑ": [
173
+ 51
174
+ ],
175
+ "ɒ": [
176
+ 52
177
+ ],
178
+ "ɓ": [
179
+ 53
180
+ ],
181
+ "ɔ": [
182
+ 54
183
+ ],
184
+ "ɕ": [
185
+ 55
186
+ ],
187
+ "ɖ": [
188
+ 56
189
+ ],
190
+ "ɗ": [
191
+ 57
192
+ ],
193
+ "ɘ": [
194
+ 58
195
+ ],
196
+ "ə": [
197
+ 59
198
+ ],
199
+ "ɚ": [
200
+ 60
201
+ ],
202
+ "ɛ": [
203
+ 61
204
+ ],
205
+ "ɜ": [
206
+ 62
207
+ ],
208
+ "ɞ": [
209
+ 63
210
+ ],
211
+ "ɟ": [
212
+ 64
213
+ ],
214
+ "ɠ": [
215
+ 65
216
+ ],
217
+ "ɡ": [
218
+ 66
219
+ ],
220
+ "ɢ": [
221
+ 67
222
+ ],
223
+ "ɣ": [
224
+ 68
225
+ ],
226
+ "ɤ": [
227
+ 69
228
+ ],
229
+ "ɥ": [
230
+ 70
231
+ ],
232
+ "ɦ": [
233
+ 71
234
+ ],
235
+ "ɧ": [
236
+ 72
237
+ ],
238
+ "ɨ": [
239
+ 73
240
+ ],
241
+ "ɪ": [
242
+ 74
243
+ ],
244
+ "ɫ": [
245
+ 75
246
+ ],
247
+ "ɬ": [
248
+ 76
249
+ ],
250
+ "ɭ": [
251
+ 77
252
+ ],
253
+ "ɮ": [
254
+ 78
255
+ ],
256
+ "ɯ": [
257
+ 79
258
+ ],
259
+ "ɰ": [
260
+ 80
261
+ ],
262
+ "ɱ": [
263
+ 81
264
+ ],
265
+ "ɲ": [
266
+ 82
267
+ ],
268
+ "ɳ": [
269
+ 83
270
+ ],
271
+ "ɴ": [
272
+ 84
273
+ ],
274
+ "ɵ": [
275
+ 85
276
+ ],
277
+ "ɶ": [
278
+ 86
279
+ ],
280
+ "ɸ": [
281
+ 87
282
+ ],
283
+ "ɹ": [
284
+ 88
285
+ ],
286
+ "ɺ": [
287
+ 89
288
+ ],
289
+ "ɻ": [
290
+ 90
291
+ ],
292
+ "ɽ": [
293
+ 91
294
+ ],
295
+ "ɾ": [
296
+ 92
297
+ ],
298
+ "ʀ": [
299
+ 93
300
+ ],
301
+ "ʁ": [
302
+ 94
303
+ ],
304
+ "ʂ": [
305
+ 95
306
+ ],
307
+ "ʃ": [
308
+ 96
309
+ ],
310
+ "ʄ": [
311
+ 97
312
+ ],
313
+ "ʈ": [
314
+ 98
315
+ ],
316
+ "ʉ": [
317
+ 99
318
+ ],
319
+ "ʊ": [
320
+ 100
321
+ ],
322
+ "ʋ": [
323
+ 101
324
+ ],
325
+ "ʌ": [
326
+ 102
327
+ ],
328
+ "ʍ": [
329
+ 103
330
+ ],
331
+ "ʎ": [
332
+ 104
333
+ ],
334
+ "ʏ": [
335
+ 105
336
+ ],
337
+ "ʐ": [
338
+ 106
339
+ ],
340
+ "ʑ": [
341
+ 107
342
+ ],
343
+ "ʒ": [
344
+ 108
345
+ ],
346
+ "ʔ": [
347
+ 109
348
+ ],
349
+ "ʕ": [
350
+ 110
351
+ ],
352
+ "ʘ": [
353
+ 111
354
+ ],
355
+ "ʙ": [
356
+ 112
357
+ ],
358
+ "ʛ": [
359
+ 113
360
+ ],
361
+ "ʜ": [
362
+ 114
363
+ ],
364
+ "ʝ": [
365
+ 115
366
+ ],
367
+ "ʟ": [
368
+ 116
369
+ ],
370
+ "ʡ": [
371
+ 117
372
+ ],
373
+ "ʢ": [
374
+ 118
375
+ ],
376
+ "ʲ": [
377
+ 119
378
+ ],
379
+ "ˈ": [
380
+ 120
381
+ ],
382
+ "ˌ": [
383
+ 121
384
+ ],
385
+ "ː": [
386
+ 122
387
+ ],
388
+ "ˑ": [
389
+ 123
390
+ ],
391
+ "˞": [
392
+ 124
393
+ ],
394
+ "β": [
395
+ 125
396
+ ],
397
+ "θ": [
398
+ 126
399
+ ],
400
+ "χ": [
401
+ 127
402
+ ],
403
+ "ᵻ": [
404
+ 128
405
+ ],
406
+ "ⱱ": [
407
+ 129
408
+ ],
409
+ "0": [
410
+ 130
411
+ ],
412
+ "1": [
413
+ 131
414
+ ],
415
+ "2": [
416
+ 132
417
+ ],
418
+ "3": [
419
+ 133
420
+ ],
421
+ "4": [
422
+ 134
423
+ ],
424
+ "5": [
425
+ 135
426
+ ],
427
+ "6": [
428
+ 136
429
+ ],
430
+ "7": [
431
+ 137
432
+ ],
433
+ "8": [
434
+ 138
435
+ ],
436
+ "9": [
437
+ 139
438
+ ],
439
+ "̧": [
440
+ 140
441
+ ],
442
+ "̃": [
443
+ 141
444
+ ],
445
+ "̪": [
446
+ 142
447
+ ],
448
+ "̯": [
449
+ 143
450
+ ],
451
+ "̩": [
452
+ 144
453
+ ],
454
+ "ʰ": [
455
+ 145
456
+ ],
457
+ "ˤ": [
458
+ 146
459
+ ],
460
+ "ε": [
461
+ 147
462
+ ],
463
+ "↓": [
464
+ 148
465
+ ],
466
+ "#": [
467
+ 149
468
+ ],
469
+ "\"": [
470
+ 150
471
+ ],
472
+ "↑": [
473
+ 151
474
+ ],
475
+ "̺": [
476
+ 152
477
+ ],
478
+ "̻": [
479
+ 153
480
+ ],
481
+ "g": [
482
+ 154
483
+ ],
484
+ "ʦ": [
485
+ 155
486
+ ],
487
+ "X": [
488
+ 156
489
+ ],
490
+ "̝": [
491
+ 157
492
+ ],
493
+ "̊": [
494
+ 158
495
+ ],
496
+ "ɝ": [
497
+ 159
498
+ ],
499
+ "ʷ": [
500
+ 160
501
+ ]
502
+ },
503
+ "language": {
504
+ "code": "hi_IN",
505
+ "family": "hi",
506
+ "region": "IN",
507
+ "name_native": "हिन्दी",
508
+ "name_english": "Hindi",
509
+ "country_english": "India"
510
+ },
511
+ "dataset": "rohan"
512
+ }