Thomcles
/

Chatterbox-TTS-French

@@ -40,29 +40,54 @@ import soundfile as sf
 from chatterbox.tts import ChatterboxTTS
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
-MODEL_REPO = "Thomcles/Chatterbox-TTS-French"
-T3_CHECKPOINT_FILE = "t3_cfg.safetensors"
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model = ChatterboxTTS.from_pretrained(device=device)
-print("Downloading and applying German patch...")
-checkpoint_path = hf_hub_download(repo_id=MODEL_REPO, filename=T3_CHECKPOINT_FILE)
-t3_state = load_file(checkpoint_path, device="cpu")
-model.t3.load_state_dict(t3_state)
-print("Patch applied successfully.")
-text = "Tief im verwunschenen Wald, wo die Bäume uralte Geheimnisse flüsterten, lebte ein kleiner Gnom namens Fips, der die Sprache der Tiere verstand."
-reference_audio_path = "/content/uitoll.mp3"
-output_path = "output_cloned_voice.wav"
-print("Generating speech...")
-with torch.inference_mode():
-    wav = model.generate(
-        text,
-        audio_prompt_path=reference_audio_path,
-        exaggeration=0.5,
-        temperature=0.6,
-        cfg_weight=0.3,
     )
-sf.write(output_path, wav.squeeze().cpu().numpy(), model.sr)
-print(f"Audio saved to {output_path}")
 ```

 from chatterbox.tts import ChatterboxTTS
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
+# Configuration
+MODEL_REPO = "Thomcles/Chatterbox-TTS-French"
+CHECKPOINT_FILENAME = "t3_cfg.safetensors"
+OUTPUT_PATH = "output_cloned_voice.wav"
+TEXT_TO_SYNTHESIZE = "Jean-Paul Sartre laisse à la postérité une œuvre considérable, tant littéraire que philosophique, ayant influencée à la fois la vie politique française d'après-guerre et les penseurs de son temps (Merleau-Ponty et Alain Badiou notamment)."
+def get_device() -> str:
+    return "cuda" if torch.cuda.is_available() else "cpu"
+def download_checkpoint(repo: str, filename: str) -> str:
+    return hf_hub_download(repo_id=repo, filename=filename)
+def load_tts_model(repo: str, checkpoint_file: str, device: str) -> ChatterboxTTS:
+    model = ChatterboxTTS.from_pretrained(device=device)
+    checkpoint_path = download_checkpoint(repo, checkpoint_file)
+    t3_state = load_file(checkpoint_path, device="cpu")
+    model.t3.load_state_dict(t3_state)
+    return model
+def synthesize_speech(model: ChatterboxTTS, text: str, audio_prompt_path:str, **kwargs) -> torch.Tensor:
+    with torch.inference_mode():
+        return model.generate(text, audio_prompt_path, **kwargs)
+def save_audio(waveform: torch.Tensor, path: str, sample_rate: int):
+    sf.write(path, waveform.squeeze().cpu().numpy(), sample_rate)
+def main():
+    print("Loading model...")
+    device = get_device()
+    model = load_tts_model(MODEL_REPO, CHECKPOINT_FILENAME, device)
+    print(f"Generating speech on {device}...")
+    wav = synthesize_speech(
+        model,
+        TEXT_TO_SYNTHESIZE,
+        audio_prompt_path=None
+        exaggeration=0.5,
+        temperature=0.6,
+        cfg_weight=0.3
     )
+    print(f"Saving output to: {OUTPUT_PATH}")
+    save_audio(wav, OUTPUT_PATH, model.sr)
+    print("Done.")
+if __name__ == "__main__":
+    main()
 ```