Kokorotts / app.py
Ryanus's picture
Update app.py
07f3018 verified
import gradio as gr
from kokoro import KPipeline
import soundfile as sf
import numpy as np
import os
import datetime
from tqdm import tqdm
# --- Configuration ---
SAVE_DIR = "saved_audios"
os.makedirs(SAVE_DIR, exist_ok=True)
# --- 官方語音列表(可根據官方文檔擴充)---
VOICE_PRESETS = {
"美式英語 (女聲)": {'lang_code': 'a', 'voice': 'af_heart'},
"美式英語 (男聲)": {'lang_code': 'a', 'voice': 'am_adam'},
"英式英語 (女聲)": {'lang_code': 'b', 'voice': 'bf_emma'},
"英式英語 (男聲)": {'lang_code': 'b', 'voice': 'bm_george'},
"簡體中文 (女聲 - 小貝)": {'lang_code': 'z', 'voice': 'zf_xiaobei'},
"簡體中文 (女聲 - 小妮)": {'lang_code': 'z', 'voice': 'zf_xiaoni'},
"繁體中文 (女聲 - 小貝)": {'lang_code': 'z', 'voice': 'zf_xiaobei'},
}
# --- 全局變數 ---
global_pipeline = None
current_lang_code = None
def get_saved_files_dropdown_options():
files = [f for f in os.listdir(SAVE_DIR) if f.endswith(".wav")]
files.sort(key=lambda f: os.path.getmtime(os.path.join(SAVE_DIR, f)), reverse=True)
return files
def synthesize_speech_gradio(text, preset_name):
global global_pipeline, current_lang_code
status_message = ""
if not text:
status_message = "請輸入文字!"
gr.Warning(status_message)
return status_message, None, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None)
preset = VOICE_PRESETS[preset_name]
lang_code = preset['lang_code']
voice = preset['voice']
if global_pipeline is None or current_lang_code != lang_code:
status_message = f"正在初始化 Kokoro TTS 管線 (語言: {lang_code})..."
try:
global_pipeline = KPipeline(lang_code=lang_code, repo_id='hexgrad/Kokoro-82M')
current_lang_code = lang_code
status_message = "模型初始化成功。"
except Exception as e:
status_message = f"模型初始化失敗: {e}"
gr.Error(status_message)
return status_message, None, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None)
status_message = f"正在生成語音 (預設: {preset_name})..."
audio_segments = []
sample_rate = 24000
try:
generator = global_pipeline(text, voice=voice)
segment_count = 0
print("語音片段生成進度:")
for i, (graphemes, phonemes, audio) in enumerate(tqdm(generator, desc="生成中", unit="段")):
audio_segments.append(audio)
segment_count += 1
# 即時顯示每段的 graphemes 和 phonemes
print(f"片段 {i+1}:")
print(f" Graphemes: {graphemes}")
print(f" Phonemes: {phonemes}")
print() # 進度條結束後換行
status_message = f"所有 {segment_count} 個語音片段生成完成。"
except Exception as e:
status_message = f"語音生成失敗,請檢查 voice 名稱是否正確: {e}"
gr.Error(status_message)
return status_message, None, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None)
if not audio_segments:
status_message = "沒有生成任何音訊片段。"
gr.Warning(status_message)
return status_message, None, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None)
full_audio = np.concatenate(audio_segments)
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
safe_text_prefix = "".join(c for c in text[:20] if c.isalnum() or c.isspace()).strip()
if safe_text_prefix:
filename = f"kokoro_{safe_text_prefix}_{timestamp}.wav"
else:
filename = f"kokoro_audio_{timestamp}.wav"
file_path = os.path.join(SAVE_DIR, filename)
try:
sf.write(file_path, full_audio, sample_rate)
status_message = f"語音生成成功並已保存為 {filename}。"
gr.Info(status_message)
except Exception as e:
status_message = f"語音生成成功但保存失敗: {e}"
gr.Error(status_message)
return status_message, (sample_rate, full_audio), gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None)
def select_file_for_download(filename):
if filename:
file_path = os.path.join(SAVE_DIR, filename)
if os.path.exists(file_path):
return file_path
return None
def delete_selected_audio(filename_to_delete):
status_message = ""
if filename_to_delete:
file_path = os.path.join(SAVE_DIR, filename_to_delete)
if os.path.exists(file_path):
os.remove(file_path)
status_message = f"已刪除 {filename_to_delete}"
gr.Info(status_message)
return status_message, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None)
else:
status_message = f"檔案 {filename_to_delete} 不存在。"
gr.Warning(status_message)
return status_message, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None)
status_message = "請選擇要刪除的檔案。"
gr.Warning(status_message)
return status_message, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None)
def clear_interface():
return "", None, "", gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None)
with gr.Blocks() as demo:
gr.Markdown("""
# Kokoro TTS 語音合成器
在 Hugging Face Spaces 上使用 Kokoro TTS 生成高品質語音。
**注意:繁體中文選項使用簡體中文模型,發音可能不完全符合台灣國語腔調。**
**重要提示:此應用程式在 Hugging Face Spaces 上運行,生成的音檔會保存到伺服器,但 Space 重啟後可能會丟失。**
""")
with gr.Tab("語音生成"):
with gr.Row():
with gr.Column():
text_input = gr.Textbox(lines=5, label="輸入文字", placeholder="請輸入您想轉換為語音的文字...")
voice_dropdown = gr.Dropdown(list(VOICE_PRESETS.keys()), label="選擇語音預設", value="美式英語 (女聲)")
with gr.Row():
generate_btn = gr.Button("生成語音")
clear_btn = gr.Button("清除")
with gr.Column():
audio_output = gr.Audio(label="生成的語音", type="numpy")
gr.Markdown("**提示:** 音訊播放器右下角有下載按鈕,點擊即可保存音檔到您的電腦。")
info_output = gr.Textbox(label="狀態/提示", interactive=False)
with gr.Tab("管理已保存音檔"):
gr.Markdown("### 已保存的音檔列表")
saved_files_dropdown = gr.Dropdown(
choices=get_saved_files_dropdown_options(),
label="選擇音檔",
info="選擇一個音檔進行下載或刪除"
)
download_file_output = gr.File(label="下載選定音檔", interactive=False)
with gr.Row():
refresh_files_btn = gr.Button("刷新列表")
delete_btn = gr.Button("刪除選定音檔")
delete_info_output = gr.Textbox(label="刪除狀態", interactive=False)
generate_btn.click(
fn=synthesize_speech_gradio,
inputs=[text_input, voice_dropdown],
outputs=[info_output, audio_output, saved_files_dropdown, download_file_output]
)
clear_btn.click(
fn=clear_interface,
inputs=[],
outputs=[text_input, audio_output, info_output, saved_files_dropdown, download_file_output]
)
saved_files_dropdown.change(
fn=select_file_for_download,
inputs=[saved_files_dropdown],
outputs=[download_file_output]
)
refresh_files_btn.click(
fn=lambda: ("", gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None)),
inputs=[],
outputs=[delete_info_output, saved_files_dropdown, download_file_output]
)
delete_btn.click(
fn=delete_selected_audio,
inputs=[saved_files_dropdown],
outputs=[delete_info_output, saved_files_dropdown, download_file_output]
)
demo.launch()