niobures commited on
Commit
3a7231d
·
verified ·
1 Parent(s): 8bee564

Chatterbox TTS (cs, fa, fr, multi)

Browse files
Files changed (40) hide show
  1. .gitattributes +1 -0
  2. cs/Chatterbox-TTS-Czech/.gitattributes +35 -0
  3. cs/Chatterbox-TTS-Czech/README.md +66 -0
  4. cs/Chatterbox-TTS-Czech/source.txt +1 -0
  5. cs/Chatterbox-TTS-Czech/t3_cs.safetensors +3 -0
  6. fa/Chatterbox-TTS-Persian-Farsi/.gitattributes +35 -0
  7. fa/Chatterbox-TTS-Persian-Farsi/README.md +101 -0
  8. fa/Chatterbox-TTS-Persian-Farsi/source.txt +1 -0
  9. fa/Chatterbox-TTS-Persian-Farsi/t3_fa.safetensors +3 -0
  10. fr/Chatterbox-TTS-French/.gitattributes +36 -0
  11. fr/Chatterbox-TTS-French/README.md +115 -0
  12. fr/Chatterbox-TTS-French/demo_audios/0.mp3 +0 -0
  13. fr/Chatterbox-TTS-French/demo_audios/1.mp3 +0 -0
  14. fr/Chatterbox-TTS-French/demo_audios/2.mp3 +0 -0
  15. fr/Chatterbox-TTS-French/demo_audios/3.mp3 +0 -0
  16. fr/Chatterbox-TTS-French/demo_audios/4.mp3 +0 -0
  17. fr/Chatterbox-TTS-French/demo_audios/5.mp3 +0 -0
  18. fr/Chatterbox-TTS-French/demo_audios/cs_0.mp3 +0 -0
  19. fr/Chatterbox-TTS-French/demo_audios/cs_1.mp3 +0 -0
  20. fr/Chatterbox-TTS-French/demo_audios/cs_2.mp3 +0 -0
  21. fr/Chatterbox-TTS-French/demo_audios/cs_3.mp3 +0 -0
  22. fr/Chatterbox-TTS-French/demo_audios/cs_4.mp3 +0 -0
  23. fr/Chatterbox-TTS-French/demo_audios/cs_5.mp3 +0 -0
  24. fr/Chatterbox-TTS-French/demo_audios/fa_0.mp3 +0 -0
  25. fr/Chatterbox-TTS-French/demo_audios/fa_1.mp3 +0 -0
  26. fr/Chatterbox-TTS-French/demo_audios/fa_2.mp3 +0 -0
  27. fr/Chatterbox-TTS-French/demo_audios/fa_3.mp3 +0 -0
  28. fr/Chatterbox-TTS-French/demo_audios/fa_4.mp3 +0 -0
  29. fr/Chatterbox-TTS-French/demo_audios/fa_5.mp3 +0 -0
  30. fr/Chatterbox-TTS-French/example.wav +3 -0
  31. fr/Chatterbox-TTS-French/source.txt +1 -0
  32. fr/Chatterbox-TTS-French/t3_cfg.safetensors +3 -0
  33. multi/Chatterbox-TTS-Server-Multilingual/.gitattributes +35 -0
  34. multi/Chatterbox-TTS-Server-Multilingual/Chatterbox-TTS-Server-Multilingual.zip +3 -0
  35. multi/Chatterbox-TTS-Server-Multilingual/NZG_ToolkitUI.zip +3 -0
  36. multi/Chatterbox-TTS-Server-Multilingual/README.md +3 -0
  37. multi/Chatterbox-TTS-Server-Multilingual/VibeVoice-Multi-Speaker-main.zip +3 -0
  38. multi/Chatterbox-TTS-Server-Multilingual/chatterbox-main.zip +3 -0
  39. multi/Chatterbox-TTS-Server-Multilingual/source.txt +1 -0
  40. multi/Chatterbox-TTS-Server-Multilingual/v3.rar +3 -0
.gitattributes CHANGED
@@ -90,3 +90,4 @@ multi/chatterbox-multilingual-ONNX/onnx/speech_encoder.onnx_data filter=lfs diff
90
  dv,en/chatterbox-tts-dhivehi/samples/no_ref_out.wav filter=lfs diff=lfs merge=lfs -text
91
  dv,en/chatterbox-tts-dhivehi/samples/reference_audio.wav filter=lfs diff=lfs merge=lfs -text
92
  dv,en/chatterbox-tts-dhivehi/samples/with_ref.wav filter=lfs diff=lfs merge=lfs -text
 
 
90
  dv,en/chatterbox-tts-dhivehi/samples/no_ref_out.wav filter=lfs diff=lfs merge=lfs -text
91
  dv,en/chatterbox-tts-dhivehi/samples/reference_audio.wav filter=lfs diff=lfs merge=lfs -text
92
  dv,en/chatterbox-tts-dhivehi/samples/with_ref.wav filter=lfs diff=lfs merge=lfs -text
93
+ fr/Chatterbox-TTS-French/example.wav filter=lfs diff=lfs merge=lfs -text
cs/Chatterbox-TTS-Czech/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
cs/Chatterbox-TTS-Czech/README.md ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc0-1.0
3
+ language:
4
+ - cs
5
+ base_model:
6
+ - ResembleAI/chatterbox
7
+ pipeline_tag: text-to-speech
8
+ ---
9
+
10
+ # Chatterbox Czech
11
+ ## **training quality TTS with low ressource data**
12
+
13
+ <div align="center"><img width="400px" src="https://www.shutterstock.com/image-vector/travel-czech-republic-culture-elements-600nw-2588019031.jpg" alt="Czech-image" /></div>
14
+
15
+ ## access the model :
16
+
17
+ The model is not open access, you can contact me at the email address:
18
+ cyprienoucortex@gmail.com
19
+
20
+ ### demo audios:
21
+
22
+ "Dobrý den, vítáme vás v našem testu syntézy řeči"
23
+ <audio controls src="https://huggingface.co/Thomcles/Chatterbox-TTS-French/resolve/main/demo_audios/cs_0.mp3">Your browser does not support audio.</audio>
24
+
25
+ "Tři sta třiatřicet stříbrných křepelek přeletělo přes tři stříbrné střechy"
26
+ <audio controls src="https://huggingface.co/Thomcles/Chatterbox-TTS-French/resolve/main/demo_audios/cs_1.mp3">Your browser does not support audio.</audio>
27
+
28
+ "Kolik stojí devět tisíc osm set sedmdesát pět korun ?"
29
+ <audio controls src="https://huggingface.co/Thomcles/Chatterbox-TTS-French/resolve/main/demo_audios/cs_2.mp3">Your browser does not support audio.</audio>
30
+
31
+ "Prosím, nastav hlasitost na sedmdesát procent a přehraj znovu"
32
+ <audio controls src="https://huggingface.co/Thomcles/Chatterbox-TTS-French/resolve/main/demo_audios/cs_3.mp3">Your browser does not support audio.</audio>
33
+
34
+ "Doktor Křivohlavý napsal článek o umělé inteligenci"
35
+ <audio controls src="https://huggingface.co/Thomcles/Chatterbox-TTS-French/resolve/main/demo_audios/cs_4.mp3">Your browser does not support audio.</audio>
36
+
37
+ "Zvon zvoní, z dálky zní, ozvěna se vrací do údolí"
38
+ <audio controls src="https://huggingface.co/Thomcles/Chatterbox-TTS-French/resolve/main/demo_audios/cs_5.mp3">Your browser does not support audio.</audio>
39
+
40
+ ### 💻 Inference Code
41
+
42
+ First, download the file from huggingface and place it in the current directory.
43
+
44
+ ```python
45
+ from chatterbox import mtl_tts
46
+ import torchaudio as ta
47
+ from safetensors.torch import load_file as load_safetensors
48
+
49
+ device = "cpu" # or mps or cuda
50
+
51
+ multilingual_model = mtl_tts.ChatterboxMultilingualTTS.from_pretrained(device=device)
52
+
53
+ # ----
54
+ # Then download the file from huggingface and place it in the current directory.
55
+ # ----
56
+
57
+
58
+
59
+ t3_state = load_safetensors("t3_cs_v2", device="cpu")
60
+ multilingual_model.t3.load_state_dict(t3_state)
61
+ multilingual_model.t3.to(device).eval()
62
+
63
+ czech_text = "Dobrý den, vítáme vás v našem testu syntézy řeči"
64
+ wav_czech = multilingual_model.generate(czech_text, language_id="cs")
65
+ ta.save("test-cs.wav", wav_czech, model.sr)
66
+ ```
cs/Chatterbox-TTS-Czech/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/Thomcles/Chatterbox-TTS-Czech
cs/Chatterbox-TTS-Czech/t3_cs.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f7cd33428bf610cce6e906ceaf6b076d9e35934f2b89a25d36a89e66f0217f3
3
+ size 2143989752
fa/Chatterbox-TTS-Persian-Farsi/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
fa/Chatterbox-TTS-Persian-Farsi/README.md ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc0-1.0
3
+ language:
4
+ - fa
5
+ base_model:
6
+ - ResembleAI/chatterbox
7
+ - speechbrain/sepformer-wham16k-enhancement
8
+ tags:
9
+ - text-to-speech
10
+ - Farsi
11
+ - Persian
12
+ - voice-cloning
13
+ datasets:
14
+ - Thomcles/Persian-Farsi-Speech
15
+ ---
16
+
17
+ # Chatterbox Persian-Farsi
18
+ ## **training High quality TTS with low ressource data**
19
+
20
+ **Chatterbox-TTS-Persian-Farsi** is a TTS trained on data that I cleaned, denoised, and filtered.
21
+
22
+ The total cost of the TTS is **$150** on my cloud hardware.
23
+
24
+ If you find this model useful and high-quality, and would like to support my work, you can send me money via ko-fi, or like it on huggingface.
25
+
26
+
27
+
28
+ Dataset : [Thomcles/Persian-Farsi-Speech](https://huggingface.co/datasets/Thomcles/Persian-Farsi-Speech)
29
+
30
+ ---
31
+
32
+ <div align="center"><img width="400px" src="https://www.shutterstock.com/image-vector/persian-typography-iranian-art-translated-600nw-2053950194.jpg" alt="Iranian art" /></div>
33
+
34
+ ---
35
+
36
+
37
+ ### demo audios:
38
+
39
+ "سلام! به آزمایش تبدیل متن به گفتار خوش آمدید."
40
+ <audio controls src="https://huggingface.co/Thomcles/Chatterbox-TTS-French/resolve/main/demo_audios/fa_0.mp3">Your browser does not support audio.</audio>
41
+
42
+ "سه سیب سرخ روی سینی سیمی است"
43
+ <audio controls src="https://huggingface.co/Thomcles/Chatterbox-TTS-French/resolve/main/demo_audios/fa_1.mp3">Your browser does not support audio.</audio>
44
+
45
+ "دیروز در تهران باران شد، امروز آفتابی است"
46
+ <audio controls src="https://huggingface.co/Thomcles/Chatterbox-TTS-French/resolve/main/demo_audios/fa_2.mp3">Your browser does not support audio.</audio>
47
+
48
+ "قیمت لپ‌تاپ جدید من پنجاه میلیون تومان است."
49
+ <audio controls src="https://huggingface.co/Thomcles/Chatterbox-TTS-French/resolve/main/demo_audios/fa_3.mp3">Your browser does not support audio.</audio>
50
+
51
+ "علی، نرگس و یوسف به دانشگاه شیراز رفتند."
52
+ <audio controls src="https://huggingface.co/Thomcles/Chatterbox-TTS-French/resolve/main/demo_audios/fa_4.mp3">Your browser does not support audio.</audio>
53
+
54
+ "لطفاً جملهٔ قبل را دوباره تکرار کن، دوباره تکرار کن، دوباره تکرار کن!"
55
+ <audio controls src="https://huggingface.co/Thomcles/Chatterbox-TTS-French/resolve/main/demo_audios/fa_5.mp3">Your browser does not support audio.</audio>
56
+
57
+ ### 💻 Inference Code
58
+
59
+ First, download the file from huggingface and place it in the current directory.
60
+
61
+ ```python
62
+ from chatterbox import mtl_tts
63
+ import torchaudio as ta
64
+ from safetensors.torch import load_file as load_safetensors
65
+
66
+ device = "cpu" # or mps or cuda
67
+
68
+ multilingual_model = mtl_tts.ChatterboxMultilingualTTS.from_pretrained(device=device)
69
+
70
+ # ----
71
+ # Then download the file from huggingface and place it in the current directory.
72
+ # ----
73
+
74
+
75
+ t3_state = load_safetensors("t3_fa_v2.safetensors", device="cpu")
76
+ multilingual_model.t3.load_state_dict(t3_state)
77
+ multilingual_model.t3.to(device).eval()
78
+
79
+ persian_text = "سلام! به آزمایش تبدیل متن به گفتار خوش آمدید."
80
+ wav_persian = multilingual_model.generate(persian_text, language_id=None)
81
+ ta.save("test-fa.wav", wav_persian, model.sr)
82
+ ```
83
+
84
+ ## Acknowledgements
85
+
86
+ Thanks to @phamed for the Persian training data.
87
+
88
+ ## contact :
89
+ e-mail : cyprienoucortex@gmail.com
90
+
91
+ ## ☕ Support
92
+
93
+ I trained this model from my own financial resources with the sole aim of offering it to the huggingface open source community.
94
+
95
+ This model has cost me a lot of money. If you find this checkpoint useful and would like to support my work, you can do it via Ko-fi:
96
+
97
+ <p align="center">
98
+ <a href="https://ko-fi.com/thomcles" target="_blank" rel="noopener noreferrer">
99
+ <img src="https://storage.ko-fi.com/cdn/kofi3.png?v=3" alt="Buy Me a Coffee at ko-fi.com" width="200" rel="noopener noreferrer"/>
100
+ </a>
101
+ </p>
fa/Chatterbox-TTS-Persian-Farsi/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/Thomcles/Chatterbox-TTS-Persian-Farsi
fa/Chatterbox-TTS-Persian-Farsi/t3_fa.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:723640dc2b1bbf8f54d471812fae3f8c72c3ed9123d8b5d5d5907d7c084bc207
3
+ size 2143989752
fr/Chatterbox-TTS-French/.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ example.mp3 filter=lfs diff=lfs merge=lfs -text
fr/Chatterbox-TTS-French/README.md ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-4.0
3
+ datasets:
4
+ - amphion/Emilia-Dataset
5
+ language:
6
+ - fr
7
+ base_model:
8
+ - ResembleAI/chatterbox
9
+ pipeline_tag: text-to-speech
10
+ tags:
11
+ - french
12
+ - audio
13
+ - speech
14
+ - tts
15
+ - fine-tuning
16
+ - chatterbox
17
+ - Emilia
18
+ - voice-cloning
19
+ - zero-shot
20
+ ---
21
+
22
+ # Chatterbox TTS French 🥖
23
+
24
+ **Chatterbox TTS French** is a fine-tuned text-to-speech model specialized for the French language. The model has been trained on high-quality voice data for natural and expressive speech synthesis.
25
+
26
+ <div align="center"><img width="400px" src="https://ih1.redbubble.net/image.5397735048.6235/bg,f8f8f8-flat,750x,075,f-pad,750x1000,f8f8f8.jpg" alt="baguette-france-tour-eiffel-image" /></div>
27
+
28
+ - 🔊 **Language**: French 🇫🇷
29
+ - 🗣️ **Training dataset**: [Emilia Dataset (FR branch)](https://huggingface.co/datasets/amphion/Emilia-Dataset)
30
+ - ⏱️ **Data quantity**: 1400 hours of audio
31
+
32
+ ## Usage Example
33
+
34
+ Here’s how to generate speech using Chatterbox-TTS French:
35
+
36
+ ```python
37
+ import torch
38
+ import soundfile as sf
39
+ from chatterbox.tts import ChatterboxTTS
40
+ from huggingface_hub import hf_hub_download
41
+ from safetensors.torch import load_file
42
+
43
+ # Configuration
44
+ MODEL_REPO = "Thomcles/Chatterbox-TTS-French"
45
+ CHECKPOINT_FILENAME = "t3_cfg.safetensors"
46
+ OUTPUT_PATH = "output_cloned_voice.wav"
47
+ TEXT_TO_SYNTHESIZE = "Jean-Paul Sartre laisse à la postérité une œuvre considérable, tant littéraire que philosophique, ayant influencée à la fois la vie politique française d'après-guerre et les penseurs de son temps (Merleau-Ponty et Alain Badiou notamment)."
48
+
49
+ def get_device() -> str:
50
+ return "cuda" if torch.cuda.is_available() else "cpu"
51
+
52
+ def download_checkpoint(repo: str, filename: str) -> str:
53
+ return hf_hub_download(repo_id=repo, filename=filename)
54
+
55
+ def load_tts_model(repo: str, checkpoint_file: str, device: str) -> ChatterboxTTS:
56
+ model = ChatterboxTTS.from_pretrained(device=device)
57
+ checkpoint_path = download_checkpoint(repo, checkpoint_file)
58
+ t3_state = load_file(checkpoint_path, device="cpu")
59
+ model.t3.load_state_dict(t3_state)
60
+ return model
61
+
62
+ def synthesize_speech(model: ChatterboxTTS, text: str, audio_prompt_path:str, **kwargs) -> torch.Tensor:
63
+ with torch.inference_mode():
64
+ return model.generate(
65
+ text=text,
66
+ audio_prompt_path=audio_prompt_path,
67
+ **kwargs
68
+ )
69
+
70
+ def save_audio(waveform: torch.Tensor, path: str, sample_rate: int):
71
+ sf.write(path, waveform.squeeze().cpu().numpy(), sample_rate)
72
+
73
+ def main():
74
+ print("Loading model...")
75
+ device = get_device()
76
+ model = load_tts_model(MODEL_REPO, CHECKPOINT_FILENAME, device)
77
+
78
+ print(f"Generating speech on {device}...")
79
+ wav = synthesize_speech(
80
+ model,
81
+ TEXT_TO_SYNTHESIZE,
82
+ audio_prompt_path=None,
83
+ exaggeration=0.5,
84
+ temperature=0.6,
85
+ cfg_weight=0.3
86
+ )
87
+
88
+ print(f"Saving output to: {OUTPUT_PATH}")
89
+ save_audio(wav, OUTPUT_PATH, model.sr)
90
+ print("Done.")
91
+
92
+ if __name__ == "__main__":
93
+ main()
94
+ ```
95
+
96
+ Here is the output:
97
+
98
+ <audio controls src="https://huggingface.co/Thomcles/Chatterbox-TTS-French/resolve/main/example.mp3">Your browser does not support audio.</audio>
99
+
100
+ ### Base model license
101
+
102
+ The base model is licensed under the MIT License.
103
+ Base model: [Chatterbox](https://huggingface.co/ResembleAI/chatterbox)
104
+ License: [MIT](https://choosealicense.com/licenses/mit/)
105
+
106
+ ### Training Data License
107
+
108
+ This model was fine-tuned using a dataset licensed under Creative Commons Attribution 4.0 (CC BY 4.0).
109
+ Dataset: [Emilia](https://huggingface.co/datasets/amphion/Emilia-Dataset)
110
+ License: [Creative Commons Attribution 4.0 International](https://choosealicense.com/licenses/cc-by-4.0/)
111
+
112
+
113
+ ### Contact me
114
+
115
+ Interested in fine-tuning a TTS model in a specific language or building a multilingual voice solution? Don’t hesitate to reach out.
fr/Chatterbox-TTS-French/demo_audios/0.mp3 ADDED
Binary file (26.1 kB). View file
 
fr/Chatterbox-TTS-French/demo_audios/1.mp3 ADDED
Binary file (44.7 kB). View file
 
fr/Chatterbox-TTS-French/demo_audios/2.mp3 ADDED
Binary file (31 kB). View file
 
fr/Chatterbox-TTS-French/demo_audios/3.mp3 ADDED
Binary file (35.6 kB). View file
 
fr/Chatterbox-TTS-French/demo_audios/4.mp3 ADDED
Binary file (32.4 kB). View file
 
fr/Chatterbox-TTS-French/demo_audios/5.mp3 ADDED
Binary file (31.2 kB). View file
 
fr/Chatterbox-TTS-French/demo_audios/cs_0.mp3 ADDED
Binary file (20.6 kB). View file
 
fr/Chatterbox-TTS-French/demo_audios/cs_1.mp3 ADDED
Binary file (35.1 kB). View file
 
fr/Chatterbox-TTS-French/demo_audios/cs_2.mp3 ADDED
Binary file (22.7 kB). View file
 
fr/Chatterbox-TTS-French/demo_audios/cs_3.mp3 ADDED
Binary file (32.1 kB). View file
 
fr/Chatterbox-TTS-French/demo_audios/cs_4.mp3 ADDED
Binary file (24.5 kB). View file
 
fr/Chatterbox-TTS-French/demo_audios/cs_5.mp3 ADDED
Binary file (29.6 kB). View file
 
fr/Chatterbox-TTS-French/demo_audios/fa_0.mp3 ADDED
Binary file (24.7 kB). View file
 
fr/Chatterbox-TTS-French/demo_audios/fa_1.mp3 ADDED
Binary file (16.8 kB). View file
 
fr/Chatterbox-TTS-French/demo_audios/fa_2.mp3 ADDED
Binary file (21.6 kB). View file
 
fr/Chatterbox-TTS-French/demo_audios/fa_3.mp3 ADDED
Binary file (23.5 kB). View file
 
fr/Chatterbox-TTS-French/demo_audios/fa_4.mp3 ADDED
Binary file (23.3 kB). View file
 
fr/Chatterbox-TTS-French/demo_audios/fa_5.mp3 ADDED
Binary file (30.6 kB). View file
 
fr/Chatterbox-TTS-French/example.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd6f17fcf884ea6f0f7e8a537cba6c1b397bb11a90e6cecb739209d4c4cd02aa
3
+ size 474284
fr/Chatterbox-TTS-French/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/Thomcles/Chatterbox-TTS-French
fr/Chatterbox-TTS-French/t3_cfg.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e1066369c4ba2e0351199ceff2fcb5098e4b7233bba3dbbc12f1f9a78aa741c
3
+ size 2129653744
multi/Chatterbox-TTS-Server-Multilingual/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
multi/Chatterbox-TTS-Server-Multilingual/Chatterbox-TTS-Server-Multilingual.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd3a52469ca66bfe63f4676c84d4d59400887cbaa80cade3d5096d001fc76140
3
+ size 55079748
multi/Chatterbox-TTS-Server-Multilingual/NZG_ToolkitUI.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eebb636fc3322a1b1ef395ca3d9942ac4e83aaa28b7ec1f7d8e1fe9e59b686d
3
+ size 16530926
multi/Chatterbox-TTS-Server-Multilingual/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
multi/Chatterbox-TTS-Server-Multilingual/VibeVoice-Multi-Speaker-main.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20f9c8ed491066493e540013b6e955bdbacefab68b2eae5fcc1d0cc7cc96939a
3
+ size 1103689
multi/Chatterbox-TTS-Server-Multilingual/chatterbox-main.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f61dbe0dcd64b4df41f96483f67214bfa88afa12b945aba85b9c2f8f1f0f62f1
3
+ size 108849
multi/Chatterbox-TTS-Server-Multilingual/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/nzgnzg73/Chatterbox-TTS-Server-Multilingual
multi/Chatterbox-TTS-Server-Multilingual/v3.rar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52c645937b0523d7cd8d87fd9bdb7e0062f8c997c21e8090209ba69dcd1d9650
3
+ size 9027335408