|
|
import os |
|
|
from src.chatterbox.mtl_tts import ChatterboxMultilingualTTS |
|
|
import torchaudio as ta |
|
|
from safetensors.torch import load_file as load_safetensors |
|
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
device = "cpu" |
|
|
my_token = os.getenv("HF_TOKEN") |
|
|
print(f"Token found: {my_token is not None}") |
|
|
multilingual_model = ChatterboxMultilingualTTS.from_pretrained(device=device) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_path=hf_hub_download(repo_id="Thomcles/Chatterbox-TTS-Persian-Farsi",filename="t3_fa.safetensors", token=my_token) |
|
|
t3_state = load_safetensors(model_path, device="cpu") |
|
|
multilingual_model.t3.load_state_dict(t3_state) |
|
|
multilingual_model.t3.to(device).eval() |
|
|
|
|
|
persian_text = "سلام! به آزمایش تبدیل متن به گفتار خوش آمدید." |
|
|
wav_persian = multilingual_model.generate(persian_text, language_id=None) |
|
|
ta.save("test-fa.wav", wav_persian, multilingual_model.sr) |