import torch import soundfile as sf from safetensors.torch import load_file from chatterbox.mtl_tts import ChatterboxMultilingualTTS def run_parambe_tts(text, voice_prompt_path=None): model = ChatterboxMultilingualTTS.from_pretrained(device="cuda") state_dict = load_file("parambe_swedish_v1.safetensors") t3_dict = {k.replace("t3.", ""): v for k, v in state_dict.items() if k.startswith("t3.")} model.t3.load_state_dict(t3_dict, strict=False) with torch.inference_mode(): # voice_prompt_path is optional. If None, it uses the default style. wav = model.generate( text=text, language_id="sv", audio_prompt_path=voice_prompt_path ) sf.write("parambe_output.wav", wav.squeeze().cpu().numpy(), model.sr) # Default usage (Standard Parambe Voice): run_parambe_tts("Välkommen till framtidens svenska röstteknologi.") # Voice Cloning usage (Optional): # run_parambe_tts("Jag pratar nu med din röst.", "din_inspelning.wav")