niobures commited on
Commit
12471b0
·
verified ·
1 Parent(s): f120f13

Kani-TTS (ur, vi)

Browse files
.gitattributes CHANGED
@@ -202,3 +202,15 @@ zh/kani-tts-400m-zh-GGUF/kani-tts-400m-zh.Q5_K_M.gguf filter=lfs diff=lfs merge=
202
  zh/kani-tts-400m-zh-GGUF/kani-tts-400m-zh.Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
203
  zh/kani-tts-400m-zh-GGUF/kani-tts-400m-zh.Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
204
  zh/kani-tts-400m-zh-GGUF/kani-tts-400m-zh.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  zh/kani-tts-400m-zh-GGUF/kani-tts-400m-zh.Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
203
  zh/kani-tts-400m-zh-GGUF/kani-tts-400m-zh.Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
204
  zh/kani-tts-400m-zh-GGUF/kani-tts-400m-zh.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
205
+ ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.f16.gguf filter=lfs diff=lfs merge=lfs -text
206
+ ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text
207
+ ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.Q2_K.gguf filter=lfs diff=lfs merge=lfs -text
208
+ ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
209
+ ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
210
+ ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text
211
+ ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
212
+ ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
213
+ ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
214
+ ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
215
+ ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
216
+ ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
ur/kani-tts-400m-ur-GGUF/.gitattributes ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ kani-tts-400m-ur.IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text
37
+ kani-tts-400m-ur.Q2_K.gguf filter=lfs diff=lfs merge=lfs -text
38
+ kani-tts-400m-ur.Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
39
+ kani-tts-400m-ur.Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
40
+ kani-tts-400m-ur.Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text
41
+ kani-tts-400m-ur.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
42
+ kani-tts-400m-ur.Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
43
+ kani-tts-400m-ur.Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
44
+ kani-tts-400m-ur.Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
45
+ kani-tts-400m-ur.Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
46
+ kani-tts-400m-ur.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
47
+ kani-tts-400m-ur.f16.gguf filter=lfs diff=lfs merge=lfs -text
ur/kani-tts-400m-ur-GGUF/README.md ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: mahwizzzz/kani-tts-400m-ur
3
+ datasets:
4
+ - mahwizzzz/UAT
5
+ language:
6
+ - ur
7
+ library_name: transformers
8
+ license: apache-2.0
9
+ mradermacher:
10
+ readme_rev: 1
11
+ quantized_by: mradermacher
12
+ ---
13
+ ## About
14
+
15
+ <!-- ### quantize_version: 2 -->
16
+ <!-- ### output_tensor_quantised: 1 -->
17
+ <!-- ### convert_type: hf -->
18
+ <!-- ### vocab_type: -->
19
+ <!-- ### tags: -->
20
+ <!-- ### quants: x-f16 Q4_K_S Q2_K Q8_0 Q6_K Q3_K_M Q3_K_S Q3_K_L Q4_K_M Q5_K_S Q5_K_M IQ4_XS -->
21
+ <!-- ### quants_skip: -->
22
+ <!-- ### skip_mmproj: -->
23
+ static quants of https://huggingface.co/mahwizzzz/kani-tts-400m-ur
24
+
25
+ <!-- provided-files -->
26
+
27
+ ***For a convenient overview and download list, visit our [model page for this model](https://hf.tst.eu/model#kani-tts-400m-ur-GGUF).***
28
+
29
+ weighted/imatrix quants seem not to be available (by me) at this time. If they do not show up a week or so after the static ones, I have probably not planned for them. Feel free to request them by opening a Community Discussion.
30
+ ## Usage
31
+
32
+ If you are unsure how to use GGUF files, refer to one of [TheBloke's
33
+ READMEs](https://huggingface.co/TheBloke/KafkaLM-70B-German-V0.1-GGUF) for
34
+ more details, including on how to concatenate multi-part files.
35
+
36
+ ## Provided Quants
37
+
38
+ (sorted by size, not necessarily quality. IQ-quants are often preferable over similar sized non-IQ quants)
39
+
40
+ | Link | Type | Size/GB | Notes |
41
+ |:-----|:-----|--------:|:------|
42
+ | [GGUF](https://huggingface.co/mradermacher/kani-tts-400m-ur-GGUF/resolve/main/kani-tts-400m-ur.Q2_K.gguf) | Q2_K | 0.3 | |
43
+ | [GGUF](https://huggingface.co/mradermacher/kani-tts-400m-ur-GGUF/resolve/main/kani-tts-400m-ur.Q3_K_S.gguf) | Q3_K_S | 0.3 | |
44
+ | [GGUF](https://huggingface.co/mradermacher/kani-tts-400m-ur-GGUF/resolve/main/kani-tts-400m-ur.Q3_K_M.gguf) | Q3_K_M | 0.3 | lower quality |
45
+ | [GGUF](https://huggingface.co/mradermacher/kani-tts-400m-ur-GGUF/resolve/main/kani-tts-400m-ur.Q3_K_L.gguf) | Q3_K_L | 0.3 | |
46
+ | [GGUF](https://huggingface.co/mradermacher/kani-tts-400m-ur-GGUF/resolve/main/kani-tts-400m-ur.IQ4_XS.gguf) | IQ4_XS | 0.3 | |
47
+ | [GGUF](https://huggingface.co/mradermacher/kani-tts-400m-ur-GGUF/resolve/main/kani-tts-400m-ur.Q4_K_S.gguf) | Q4_K_S | 0.3 | fast, recommended |
48
+ | [GGUF](https://huggingface.co/mradermacher/kani-tts-400m-ur-GGUF/resolve/main/kani-tts-400m-ur.Q4_K_M.gguf) | Q4_K_M | 0.3 | fast, recommended |
49
+ | [GGUF](https://huggingface.co/mradermacher/kani-tts-400m-ur-GGUF/resolve/main/kani-tts-400m-ur.Q5_K_S.gguf) | Q5_K_S | 0.4 | |
50
+ | [GGUF](https://huggingface.co/mradermacher/kani-tts-400m-ur-GGUF/resolve/main/kani-tts-400m-ur.Q5_K_M.gguf) | Q5_K_M | 0.4 | |
51
+ | [GGUF](https://huggingface.co/mradermacher/kani-tts-400m-ur-GGUF/resolve/main/kani-tts-400m-ur.Q6_K.gguf) | Q6_K | 0.4 | very good quality |
52
+ | [GGUF](https://huggingface.co/mradermacher/kani-tts-400m-ur-GGUF/resolve/main/kani-tts-400m-ur.Q8_0.gguf) | Q8_0 | 0.5 | fast, best quality |
53
+ | [GGUF](https://huggingface.co/mradermacher/kani-tts-400m-ur-GGUF/resolve/main/kani-tts-400m-ur.f16.gguf) | f16 | 0.8 | 16 bpw, overkill |
54
+
55
+ Here is a handy graph by ikawrakow comparing some lower-quality quant
56
+ types (lower is better):
57
+
58
+ ![image.png](https://www.nethype.de/huggingface_embed/quantpplgraph.png)
59
+
60
+ And here are Artefact2's thoughts on the matter:
61
+ https://gist.github.com/Artefact2/b5f810600771265fc1e39442288e8ec9
62
+
63
+ ## FAQ / Model Request
64
+
65
+ See https://huggingface.co/mradermacher/model_requests for some answers to
66
+ questions you might have and/or if you want some other model quantized.
67
+
68
+ ## Thanks
69
+
70
+ I thank my company, [nethype GmbH](https://www.nethype.de/), for letting
71
+ me use its servers and providing upgrades to my workstation to enable
72
+ this work in my free time.
73
+
74
+ <!-- end -->
ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.IQ4_XS.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95e149fdf02edf4333657dc14e78fb04ba96961b338e1deba5400a9bc74373df
3
+ size 224887744
ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.Q2_K.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cbbbd78a24df6bdaab2f1ff9e74a2d60221c76721f0eb98c1ef45f617b0bdd3
3
+ size 173679552
ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.Q3_K_L.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdcd992fcf17b1bf75bd0aebd868980a5bb77459d76f0974b5926494450f2f07
3
+ size 216130496
ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.Q3_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72a2d7eceb89a2545a421eb22ec532d9573d0920d6b78caace61be90e6987f71
3
+ size 206234560
ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.Q3_K_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d388da2a7ca12dd6806181c4e9463340703b9f2f46650280a69e8aad3e06c2fc
3
+ size 194233280
ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.Q4_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:475b96d9f86123275910f92deddadb47d30ccd2809edf31e4b7c1ac71c03436a
3
+ size 242394048
ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.Q4_K_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8543cae620937425084d9d13b3330bc93137d66dac57331068761b02a7be137
3
+ size 233833408
ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.Q5_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c55135e541f840112a45fe0579b2543ccec54954e3063a9af7af09128196a4df
3
+ size 273458112
ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.Q5_K_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebf9f0d38fccb6a09711116b838a1c7bab7ff9aec6253c83fa91e5984460cb1e
3
+ size 268305344
ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.Q6_K.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce0cdee2e6c69af5a6d3f814b147a6dad9f19a454dc804b873d3dd3e5c3cd228
3
+ size 306463680
ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.Q8_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2662a1e67b66080094709eeaed2ee07b14678e9314cc38efcc83c51019eeab7
3
+ size 396020192
ur/kani-tts-400m-ur-GGUF/kani-tts-400m-ur.f16.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50b9c02f581aeea5a82aae7f6389b08252b87f0f821911d4e2fbe501f696fe74
3
+ size 742690592
ur/kani-tts-400m-ur-GGUF/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/mradermacher/kani-tts-400m-ur-GGUF
vi/kani-tts-370m-vi/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
vi/kani-tts-370m-vi/README.md ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ datasets:
4
+ - pnnbao-ump/VieNeu-TTS-140h
5
+ - pnnbao-ump/VieNeu-TTS-140h-nanocodec
6
+ - pnnbao-ump/VieNeu-TTS-500h-dialects
7
+ language:
8
+ - vi
9
+ base_model:
10
+ - nineninesix/kani-tts-370m
11
+ pipeline_tag: text-to-speech
12
+ ---
13
+ # 😻 Kani TTS Vie
14
+
15
+ [![GitHub](https://img.shields.io/badge/GitHub-Repository-blue)](https://github.com/pnnbao97/Kani-TTS-Vie)
16
+ [![Hugging Face](https://img.shields.io/badge/Hugging%20Face-Model-yellow)](https://huggingface.co/pnnbao-ump/kani-tts-370m-vie)
17
+
18
+ **Fast and Expressive Vietnamese Text-to-Speech Model**
19
+
20
+ ![logo-removebg-preview](https://cdn-uploads.huggingface.co/production/uploads/68b923a86c86c127a1975eda/s83RYjg6BCrSPTNNXMu4u.png)
21
+
22
+ **Kani TTS Vie** là mô hình chuyển văn bản thành giọng nói tiếng Việt tốc độ cao, hỗ trợ streaming, rất phù hợp cho ứng dụng thời gian thực, được fine-tune từ mô hình nền [kani-tts-370m](https://huggingface.co/nineninesix/kani-tts-370m) trên dữ liệu:
23
+ - **VieNeu-TTS-500h-dialects** (đa vùng miền)
24
+ - **VieNeu-TTS-140h** + **140h-nanocodec** (giọng phổ thông & dữ liệu nén cao chất lượng)
25
+
26
+ ## ✨ Tính năng
27
+
28
+ - 🚀 **Siêu nhanh**: Inference chỉ ~3 giây cho đoạn văn ngắn
29
+ - 🎭 **Đa giọng**: Hỗ trợ nhiều giọng đọc tiếng Việt (Nam/Nữ, Bắc/Nam). Lưu ý: các bạn hoàn toàn có thể sử dụng giọng của các ngôn ngữ khác để inference tiếng Việt.
30
+ - 📝 **Chuẩn hóa văn bản**: Tự động chuẩn hóa số, ký hiệu, từ viết tắt
31
+ - 🎯 **Chất lượng cao**: Âm thanh tự nhiên, rõ ràng với sample rate 22.05kHz
32
+
33
+ ## 🎤 Giọng đọc hỗ trợ
34
+
35
+ ### Tiếng Việt
36
+ > **Tất cả các mẫu âm thanh tiếng Việt dưới đây đều được sinh ra (inferred) từ mô hình Kani TTS Vie.**
37
+
38
+ - **Khoa** – Nam miền Bắc
39
+ > "Cũng trong thập niên 1960, Jones quyết định đương đầu với một thử thách mới, viết nhạc phim."
40
+
41
+ <audio controls src="https://cdn-uploads.huggingface.co/production/uploads/68b923a86c86c127a1975eda/rDS7z178YfiE3O68uq0q9.wav"></audio>
42
+
43
+ - **Hùng** – Nam miền Nam
44
+ > "Ông biết hiện giờ nhiều người không còn thích đọc sách nữa, thế nên dù ai đó chỉ vô tình ghé hiệu sách, ông cũng đều trân trọng cả."
45
+
46
+ <audio controls src="https://cdn-uploads.huggingface.co/production/uploads/68b923a86c86c127a1975eda/v7JvkkN2mhMjDsmHGkJ4Z.wav"></audio>
47
+
48
+ - **Trinh** – Nữ miền Nam
49
+ > "Đi vào chi tiết Làm việc nhóm và tính cứng nhắc cá nhân là hai điều không thể nào tương thích với nhau."
50
+
51
+ <audio controls src="https://cdn-uploads.huggingface.co/production/uploads/68b923a86c86c127a1975eda/62AGPQEbFcVK4aZJnRgdX.wav"></audio>
52
+
53
+ ---
54
+
55
+ ### Tiếng Anh
56
+ > **💡 Lưu ý:** Các bạn hoàn toàn có thể dùng giọng của những ngôn ngữ khác để **inference cho tiếng Việt**!
57
+
58
+ - **David** – British
59
+ > "Ngược lại, những người không thể đào tạo sẽ gặp khó khăn với sự thay đổi và kết quả là họ không thể thích nghi."
60
+
61
+ <audio controls src="https://cdn-uploads.huggingface.co/production/uploads/68b923a86c86c127a1975eda/jY7B-tjuzSYo7O-F8V4-3.wav"></audio>
62
+ - **Katie** – Irish
63
+ > "Những người này sẽ vò đầu bứt tai, chịu đựng nỗi đau thể chất khi nghĩ đến chuyện làm những điều khác biệt."
64
+
65
+ <audio controls src="https://cdn-uploads.huggingface.co/production/uploads/68b923a86c86c127a1975eda/3Uaqx-Dj4lxtq-ChYUTq_.wav"></audio>
66
+
67
+ ---
68
+
69
+ ### Các Giọng Khác để Test (Cross-lingual)
70
+
71
+ **(Sử dụng các giọng này với tiếng Việt để tạo hiệu ứng giọng nước ngoài đọc tiếng Việt)**
72
+
73
+ * **English:** Puck (Gemini), Kore (Gemini), Andrew, Jenny (Irish), Simon
74
+ * **Korean:** Seulgi
75
+ * **German:** Bert, Thorsten (Hessisch)
76
+ * **Spanish:** Maria
77
+ * **Chinese:** Mei (Cantonese), Ming (Shanghai)
78
+ * **Arabic:** Karim, Nur
79
+
80
+ ### Cần finetune giọng của riêng bạn?
81
+ Bạn hoàn toàn có thể **tinh chỉnh (fine-tuning) mô hình Kani TTS Vie** trên tập dữ liệu cá nhân của mình bằng phương pháp LoRA.
82
+
83
+ Vui lòng truy cập thư mục `finetune/` trong repository này để tìm các Notebook hướng dẫn chi tiết:
84
+ 🔗 [https://github.com/pnnbao97/Kani-TTS-Vie](https://github.com/pnnbao97/Kani-TTS-Vie)
85
+
86
+ ## 🔧 Sử dụng
87
+
88
+ ### Trên Hugging Face Space
89
+
90
+ Truy cập trực tiếp tại: [pnnbao-ump/Kani-TTS-Vie](https://huggingface.co/spaces/pnnbao-ump/Kani-TTS-Vie)
91
+
92
+ ### Local Installation
93
+
94
+ ```bash
95
+ # Clone repository
96
+ git clone https://github.com/pnnbao97/Kani-TTS-Vie.git
97
+ cd Kani-TTS-Vie
98
+
99
+ # Cài đặt dependencies
100
+ uv sync
101
+
102
+ # Chạy ứng dụng
103
+ uv run uvicorn server:app
104
+ ```
105
+
106
+ ### Python API
107
+
108
+ ```python
109
+ from kani_vie.tts_core import Config, KaniModel, NemoAudioPlayer
110
+ from utils.normalize_text import VietnameseTTSNormalizer
111
+
112
+ # Khởi tạo model
113
+ config = Config()
114
+ player = NemoAudioPlayer(config)
115
+ kani = KaniModel(config, player)
116
+ normalizer = VietnameseTTSNormalizer()
117
+
118
+ # Tạo giọng nói
119
+ text = "Xin chào! Tôi là Kani TTS."
120
+ processed_text = normalizer.normalize(text)
121
+ audio, _ = kani.run_model(processed_text, speaker_id="nam-mien-nam")
122
+
123
+ # Lưu file
124
+ import soundfile as sf
125
+ sf.write("output.wav", audio, 22050)
126
+ ```
127
+
128
+ ## 📊 Thông số kỹ thuật
129
+
130
+ | Thông số | Giá trị |
131
+ |----------|---------|
132
+ | **Model size** | 370M parameters |
133
+ | **Sample rate** | 22,050 Hz |
134
+ | **Inference time** | ~3s cho văn bản ngắn |
135
+ | **RTF** | ~0.1-0.3x (real-time factor) |
136
+ | **Base model** | [nineninesix/kani-tts-370m](https://huggingface.co/nineninesix/kani-tts-370m) |
137
+
138
+ ## 📚 Datasets
139
+
140
+ Model được fine-tune trên:
141
+ - [VieNeu-TTS-140h](https://huggingface.co/datasets/pnnbao-ump/VieNeu-TTS-140h)
142
+ - [VieNeu-TTS-140h-nanocodec](https://huggingface.co/datasets/pnnbao-ump/VieNeu-TTS-140h-nanocodec)
143
+ - [VieNeu-TTS-500h-dialects](https://huggingface.co/datasets/pnnbao-ump/VieNeu-TTS-500h-dialects)
vi/kani-tts-370m-vi/chat_template.jinja ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- bos_token -}}
2
+ {%- set system_prompt = "" -%}
3
+ {%- set ns = namespace(system_prompt="") -%}
4
+ {%- if messages[0]["role"] == "system" -%}
5
+ {%- set ns.system_prompt = messages[0]["content"] -%}
6
+ {%- set messages = messages[1:] -%}
7
+ {%- endif -%}
8
+ {%- if tools -%}
9
+ {%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "List of tools: <|tool_list_start|>[" -%}
10
+ {%- for tool in tools -%}
11
+ {%- if tool is not string -%}
12
+ {%- set tool = tool | tojson -%}
13
+ {%- endif -%}
14
+ {%- set ns.system_prompt = ns.system_prompt + tool -%}
15
+ {%- if not loop.last -%}
16
+ {%- set ns.system_prompt = ns.system_prompt + ", " -%}
17
+ {%- endif -%}
18
+ {%- endfor -%}
19
+ {%- set ns.system_prompt = ns.system_prompt + "]<|tool_list_end|>" -%}
20
+ {%- endif -%}
21
+ {%- if ns.system_prompt -%}
22
+ {{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}}
23
+ {%- endif -%}
24
+ {%- for message in messages -%}
25
+ {{- "<|im_start|>" + message["role"] + "\n" -}}
26
+ {%- set content = message["content"] -%}
27
+ {%- if content is not string -%}
28
+ {%- set content = content | tojson -%}
29
+ {%- endif -%}
30
+ {%- if message["role"] == "tool" -%}
31
+ {%- set content = "<|tool_response_start|>" + content + "<|tool_response_end|>" -%}
32
+ {%- endif -%}
33
+ {{- content + "<|im_end|>\n" -}}
34
+ {%- endfor -%}
35
+ {%- if add_generation_prompt -%}
36
+ {{- "<|im_start|>assistant\n" -}}
37
+ {%- endif -%}
vi/kani-tts-370m-vi/config.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Lfm2ForCausalLM"
4
+ ],
5
+ "block_auto_adjust_ff_dim": true,
6
+ "block_dim": 1024,
7
+ "block_ff_dim": 6656,
8
+ "block_ffn_dim_multiplier": 1.0,
9
+ "block_mlp_init_scale": 1.0,
10
+ "block_multiple_of": 256,
11
+ "block_norm_eps": 1e-05,
12
+ "block_out_init_scale": 1.0,
13
+ "block_use_swiglu": true,
14
+ "block_use_xavier_init": true,
15
+ "bos_token_id": 1,
16
+ "conv_L_cache": 3,
17
+ "conv_bias": false,
18
+ "conv_dim": 1024,
19
+ "conv_dim_out": 1024,
20
+ "conv_use_xavier_init": true,
21
+ "dtype": "bfloat16",
22
+ "eos_token_id": 7,
23
+ "hidden_size": 1024,
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 6656,
26
+ "layer_types": [
27
+ "conv",
28
+ "conv",
29
+ "full_attention",
30
+ "conv",
31
+ "conv",
32
+ "full_attention",
33
+ "conv",
34
+ "conv",
35
+ "full_attention",
36
+ "conv",
37
+ "full_attention",
38
+ "conv",
39
+ "full_attention",
40
+ "conv",
41
+ "full_attention",
42
+ "conv"
43
+ ],
44
+ "max_position_embeddings": 128000,
45
+ "model_type": "lfm2",
46
+ "norm_eps": 1e-05,
47
+ "num_attention_heads": 16,
48
+ "num_heads": 16,
49
+ "num_hidden_layers": 16,
50
+ "num_key_value_heads": 8,
51
+ "pad_token_id": 0,
52
+ "rope_theta": 1000000.0,
53
+ "speaker_settings": {
54
+ "speaker_list": [
55
+ "david",
56
+ "puck",
57
+ "kore",
58
+ "andrew",
59
+ "jenny",
60
+ "simon",
61
+ "katie",
62
+ "seulgi",
63
+ "bert",
64
+ "thorsten",
65
+ "maria",
66
+ "mei",
67
+ "ming",
68
+ "karim",
69
+ "nur"
70
+ ],
71
+ "status": "multispeaker"
72
+ },
73
+ "transformers_version": "4.56.0",
74
+ "use_cache": true,
75
+ "use_pos_enc": true,
76
+ "vocab_size": 80539
77
+ }
vi/kani-tts-370m-vi/generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": [
5
+ 7
6
+ ],
7
+ "pad_token_id": 0,
8
+ "transformers_version": "4.56.0"
9
+ }
vi/kani-tts-370m-vi/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65d7bfa5cdcde2016acd47384e8d39fd1239ee481e6085f5128639240abce424
3
+ size 739710608
vi/kani-tts-370m-vi/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/pnnbao-ump/kani-tts-370m-vie
vi/kani-tts-370m-vi/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|im_end|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|pad|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
vi/kani-tts-370m-vi/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
vi/kani-tts-370m-vi/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff