import gradio as gr from kokoro import KPipeline import soundfile as sf import numpy as np import os import datetime from tqdm import tqdm # --- Configuration --- SAVE_DIR = "saved_audios" os.makedirs(SAVE_DIR, exist_ok=True) # --- 官方語音列表(可根據官方文檔擴充)--- VOICE_PRESETS = { "美式英語 (女聲)": {'lang_code': 'a', 'voice': 'af_heart'}, "美式英語 (男聲)": {'lang_code': 'a', 'voice': 'am_adam'}, "英式英語 (女聲)": {'lang_code': 'b', 'voice': 'bf_emma'}, "英式英語 (男聲)": {'lang_code': 'b', 'voice': 'bm_george'}, "簡體中文 (女聲 - 小貝)": {'lang_code': 'z', 'voice': 'zf_xiaobei'}, "簡體中文 (女聲 - 小妮)": {'lang_code': 'z', 'voice': 'zf_xiaoni'}, "繁體中文 (女聲 - 小貝)": {'lang_code': 'z', 'voice': 'zf_xiaobei'}, } # --- 全局變數 --- global_pipeline = None current_lang_code = None def get_saved_files_dropdown_options(): files = [f for f in os.listdir(SAVE_DIR) if f.endswith(".wav")] files.sort(key=lambda f: os.path.getmtime(os.path.join(SAVE_DIR, f)), reverse=True) return files def synthesize_speech_gradio(text, preset_name): global global_pipeline, current_lang_code status_message = "" if not text: status_message = "請輸入文字!" gr.Warning(status_message) return status_message, None, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None) preset = VOICE_PRESETS[preset_name] lang_code = preset['lang_code'] voice = preset['voice'] if global_pipeline is None or current_lang_code != lang_code: status_message = f"正在初始化 Kokoro TTS 管線 (語言: {lang_code})..." try: global_pipeline = KPipeline(lang_code=lang_code, repo_id='hexgrad/Kokoro-82M') current_lang_code = lang_code status_message = "模型初始化成功。" except Exception as e: status_message = f"模型初始化失敗: {e}" gr.Error(status_message) return status_message, None, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None) status_message = f"正在生成語音 (預設: {preset_name})..." audio_segments = [] sample_rate = 24000 try: generator = global_pipeline(text, voice=voice) segment_count = 0 print("語音片段生成進度:") for i, (graphemes, phonemes, audio) in enumerate(tqdm(generator, desc="生成中", unit="段")): audio_segments.append(audio) segment_count += 1 # 即時顯示每段的 graphemes 和 phonemes print(f"片段 {i+1}:") print(f" Graphemes: {graphemes}") print(f" Phonemes: {phonemes}") print() # 進度條結束後換行 status_message = f"所有 {segment_count} 個語音片段生成完成。" except Exception as e: status_message = f"語音生成失敗,請檢查 voice 名稱是否正確: {e}" gr.Error(status_message) return status_message, None, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None) if not audio_segments: status_message = "沒有生成任何音訊片段。" gr.Warning(status_message) return status_message, None, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None) full_audio = np.concatenate(audio_segments) timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") safe_text_prefix = "".join(c for c in text[:20] if c.isalnum() or c.isspace()).strip() if safe_text_prefix: filename = f"kokoro_{safe_text_prefix}_{timestamp}.wav" else: filename = f"kokoro_audio_{timestamp}.wav" file_path = os.path.join(SAVE_DIR, filename) try: sf.write(file_path, full_audio, sample_rate) status_message = f"語音生成成功並已保存為 {filename}。" gr.Info(status_message) except Exception as e: status_message = f"語音生成成功但保存失敗: {e}" gr.Error(status_message) return status_message, (sample_rate, full_audio), gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None) def select_file_for_download(filename): if filename: file_path = os.path.join(SAVE_DIR, filename) if os.path.exists(file_path): return file_path return None def delete_selected_audio(filename_to_delete): status_message = "" if filename_to_delete: file_path = os.path.join(SAVE_DIR, filename_to_delete) if os.path.exists(file_path): os.remove(file_path) status_message = f"已刪除 {filename_to_delete}" gr.Info(status_message) return status_message, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None) else: status_message = f"檔案 {filename_to_delete} 不存在。" gr.Warning(status_message) return status_message, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None) status_message = "請選擇要刪除的檔案。" gr.Warning(status_message) return status_message, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None) def clear_interface(): return "", None, "", gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None) with gr.Blocks() as demo: gr.Markdown(""" # Kokoro TTS 語音合成器 在 Hugging Face Spaces 上使用 Kokoro TTS 生成高品質語音。 **注意:繁體中文選項使用簡體中文模型,發音可能不完全符合台灣國語腔調。** **重要提示:此應用程式在 Hugging Face Spaces 上運行,生成的音檔會保存到伺服器,但 Space 重啟後可能會丟失。** """) with gr.Tab("語音生成"): with gr.Row(): with gr.Column(): text_input = gr.Textbox(lines=5, label="輸入文字", placeholder="請輸入您想轉換為語音的文字...") voice_dropdown = gr.Dropdown(list(VOICE_PRESETS.keys()), label="選擇語音預設", value="美式英語 (女聲)") with gr.Row(): generate_btn = gr.Button("生成語音") clear_btn = gr.Button("清除") with gr.Column(): audio_output = gr.Audio(label="生成的語音", type="numpy") gr.Markdown("**提示:** 音訊播放器右下角有下載按鈕,點擊即可保存音檔到您的電腦。") info_output = gr.Textbox(label="狀態/提示", interactive=False) with gr.Tab("管理已保存音檔"): gr.Markdown("### 已保存的音檔列表") saved_files_dropdown = gr.Dropdown( choices=get_saved_files_dropdown_options(), label="選擇音檔", info="選擇一個音檔進行下載或刪除" ) download_file_output = gr.File(label="下載選定音檔", interactive=False) with gr.Row(): refresh_files_btn = gr.Button("刷新列表") delete_btn = gr.Button("刪除選定音檔") delete_info_output = gr.Textbox(label="刪除狀態", interactive=False) generate_btn.click( fn=synthesize_speech_gradio, inputs=[text_input, voice_dropdown], outputs=[info_output, audio_output, saved_files_dropdown, download_file_output] ) clear_btn.click( fn=clear_interface, inputs=[], outputs=[text_input, audio_output, info_output, saved_files_dropdown, download_file_output] ) saved_files_dropdown.change( fn=select_file_for_download, inputs=[saved_files_dropdown], outputs=[download_file_output] ) refresh_files_btn.click( fn=lambda: ("", gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None)), inputs=[], outputs=[delete_info_output, saved_files_dropdown, download_file_output] ) delete_btn.click( fn=delete_selected_audio, inputs=[saved_files_dropdown], outputs=[delete_info_output, saved_files_dropdown, download_file_output] ) demo.launch()