Parambe's picture
Upload inference.py
f58d506 verified
import torch
import soundfile as sf
from safetensors.torch import load_file
from chatterbox.mtl_tts import ChatterboxMultilingualTTS
def run_parambe_tts(text, voice_prompt_path=None):
model = ChatterboxMultilingualTTS.from_pretrained(device="cuda")
state_dict = load_file("parambe_swedish_v1.safetensors")
t3_dict = {k.replace("t3.", ""): v for k, v in state_dict.items() if k.startswith("t3.")}
model.t3.load_state_dict(t3_dict, strict=False)
with torch.inference_mode():
# voice_prompt_path is optional. If None, it uses the default style.
wav = model.generate(
text=text,
language_id="sv",
audio_prompt_path=voice_prompt_path
)
sf.write("parambe_output.wav", wav.squeeze().cpu().numpy(), model.sr)
# Default usage (Standard Parambe Voice):
run_parambe_tts("Välkommen till framtidens svenska röstteknologi.")
# Voice Cloning usage (Optional):
# run_parambe_tts("Jag pratar nu med din röst.", "din_inspelning.wav")