hootan09 commited on
Commit
555a8b4
·
verified ·
1 Parent(s): b35e476

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +88 -1
README.md CHANGED
@@ -139,4 +139,91 @@ See `example_tts.py` for more examples.
139
  Every audio file generated by Chatterbox includes [Resemble AI's Perth (Perceptual Threshold) Watermarker](https://github.com/resemble-ai/perth) - imperceptible neural watermarks that survive MP3 compression, audio editing, and common manipulations while maintaining nearly 100% detection accuracy.
140
 
141
  # Disclaimer
142
- Don't use this model to do bad things. Prompts are sourced from freely available data on the internet.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  Every audio file generated by Chatterbox includes [Resemble AI's Perth (Perceptual Threshold) Watermarker](https://github.com/resemble-ai/perth) - imperceptible neural watermarks that survive MP3 compression, audio editing, and common manipulations while maintaining nearly 100% detection accuracy.
140
 
141
  # Disclaimer
142
+ Don't use this model to do bad things. Prompts are sourced from freely available data on the internet.
143
+
144
+
145
+ # Use EN
146
+ ```sh
147
+ uv init --python 3.11
148
+ uv sync
149
+ source .venv/bin/activate
150
+ git clone https://github.com/resemble-ai/chatterbox.git
151
+ cd chatterbox
152
+ # remove gradio and russian tts... from pyproject.toml
153
+ python -m pip install -e .
154
+
155
+ # mac run:
156
+ python example_for_mac.py
157
+ ```
158
+
159
+ # use FA
160
+ ```py
161
+ #example_fa.py
162
+ from chatterbox.mtl_tts import ChatterboxMultilingualTTS
163
+ import torch
164
+ import torchaudio as ta
165
+ from safetensors.torch import load_file as load_safetensors
166
+ from huggingface_hub import hf_hub_download, login
167
+ import os
168
+
169
+ # Detect device (Mac with M1/M2/M3/M4)
170
+ device = "mps" if torch.backends.mps.is_available() else "cpu"
171
+ map_location = torch.device(device)
172
+
173
+ torch_load_original = torch.load
174
+ def patched_torch_load(*args, **kwargs):
175
+ if 'map_location' not in kwargs:
176
+ kwargs['map_location'] = map_location
177
+ return torch_load_original(*args, **kwargs)
178
+
179
+ torch.load = patched_torch_load
180
+
181
+ # Load the multilingual TTS model, making sure it uses the CPU
182
+ multilingual_model = ChatterboxMultilingualTTS.from_pretrained(device)
183
+
184
+ # read token
185
+ token = "YOUR_TOKEN"
186
+ login(token)
187
+
188
+ # Define the model repo and file path
189
+ model_repo = "Thomcles/Chatterbox-TTS-Persian-Farsi"
190
+ file_name = "t3_fa.safetensors"
191
+
192
+ # Define the cache directory (your custom local folder)
193
+ cache_dir = "./cacheModel"
194
+
195
+ # Create the cache directory if it doesn't exist
196
+ os.makedirs(cache_dir, exist_ok=True)
197
+
198
+ # Download the model weights to the specified cache directory
199
+ file_path = hf_hub_download(repo_id=model_repo, filename=file_name, cache_dir=cache_dir)
200
+
201
+ print(f"Model weights downloaded to: {file_path}")
202
+
203
+ # Load the T3 model state dict for Persian, explicitly mapping to CPU
204
+ # Use `torch.load` with map_location to ensure it loads on the CPU
205
+ t3_state = load_safetensors(file_path, device='cpu')
206
+
207
+ # Load the T3 model's state dict into the multilingual model and move it to the CPU
208
+ multilingual_model.t3.load_state_dict(t3_state)
209
+ multilingual_model.t3.to(device).eval() # Ensure it's on CPU
210
+
211
+ # Define the Persian text you want to convert to speech
212
+ persian_text = "سلام! به آزمایش تبدیل متن به گفتار خوش آمدید."
213
+
214
+
215
+ # Generate the speech for the provided Persian text
216
+ AUDIO_PROMPT_PATH = "target_voice.wav"
217
+ wav_persian = multilingual_model.generate(
218
+ persian_text,
219
+ language_id=None,
220
+ audio_prompt_path=AUDIO_PROMPT_PATH,
221
+ exaggeration=0.5,
222
+ cfg_weight=0.5
223
+ )
224
+
225
+ # Save the generated speech to a WAV file
226
+ ta.save("test-fa.wav", wav_persian, multilingual_model.sr)
227
+
228
+ print("Speech synthesis complete, saved as 'test-fa.wav'")
229
+ ```