import os import time import torch import gradio as gr from pathlib import Path import torchaudio from chatterbox.tts import ChatterboxTTS # 初始化儲存資料夾 OUTPUT_DIR = Path("outputs") OUTPUT_DIR.mkdir(exist_ok=True) # 載入模型 model = ChatterboxTTS.from_pretrained(device="cpu") def tts_and_save(text, ref_wav, exaggeration, temperature, seed, cfg_weight): if seed != 0: torch.manual_seed(int(seed)) wav = model.generate( text, audio_prompt_path=ref_wav, exaggeration=exaggeration, temperature=temperature, cfg_weight=cfg_weight, ) timestamp = time.strftime("%Y%m%d_%H%M%S") filename = OUTPUT_DIR / f"tts_{timestamp}.wav" torchaudio.save(str(filename), wav.cpu(), model.sr) return (model.sr, wav.squeeze(0).numpy()), str(filename) with gr.Blocks() as demo: text = gr.Textbox(label="輸入文字") ref_wav = gr.Audio(label="參考語音(可選)", sources=["upload", "microphone"], type="filepath") exaggeration = gr.Slider(0.25, 2, value=0.5, step=0.05, label="Exaggeration") cfg_weight = gr.Slider(0.2, 1, value=0.5, step=0.05, label="CFG/Pace") temperature = gr.Slider(0.05, 5, value=0.8, step=0.05, label="Temperature") seed = gr.Number(value=0, label="隨機種子 (0=隨機)", precision=0) btn = gr.Button("生成並自動儲存") output_audio = gr.Audio(label="語音預覽") saved_path = gr.Textbox(label="儲存路徑", interactive=False) btn.click( tts_and_save, inputs=[text, ref_wav, exaggeration, temperature, seed, cfg_weight], outputs=[output_audio, saved_path] ) if __name__ == "__main__": demo.launch()