| import torch |
| import soundfile as sf |
| from safetensors.torch import load_file |
| from chatterbox.mtl_tts import ChatterboxMultilingualTTS |
|
|
| def run_parambe_tts(text, voice_prompt_path=None): |
| |
| model = ChatterboxMultilingualTTS.from_pretrained(device="cuda") |
|
|
| state_dict = load_file("parambe_swedish_v1.safetensors") |
| |
| t3_dict = {k.replace("t3.", ""): v for k, v in state_dict.items() if k.startswith("t3.")} |
| model.t3.load_state_dict(t3_dict, strict=False) |
| |
| with torch.inference_mode(): |
| |
| wav = model.generate( |
| text=text, |
| language_id="sv", |
| audio_prompt_path=voice_prompt_path |
| ) |
| |
| sf.write("parambe_output.wav", wav.squeeze().cpu().numpy(), model.sr) |
|
|
| |
| run_parambe_tts("Välkommen till framtidens svenska röstteknologi.") |
|
|
| |
| |