|
|
import gradio as gr |
|
|
from kokoro import KPipeline |
|
|
import soundfile as sf |
|
|
import numpy as np |
|
|
import os |
|
|
import datetime |
|
|
from tqdm import tqdm |
|
|
|
|
|
|
|
|
SAVE_DIR = "saved_audios" |
|
|
os.makedirs(SAVE_DIR, exist_ok=True) |
|
|
|
|
|
|
|
|
VOICE_PRESETS = { |
|
|
"美式英語 (女聲)": {'lang_code': 'a', 'voice': 'af_heart'}, |
|
|
"美式英語 (男聲)": {'lang_code': 'a', 'voice': 'am_adam'}, |
|
|
"英式英語 (女聲)": {'lang_code': 'b', 'voice': 'bf_emma'}, |
|
|
"英式英語 (男聲)": {'lang_code': 'b', 'voice': 'bm_george'}, |
|
|
"簡體中文 (女聲 - 小貝)": {'lang_code': 'z', 'voice': 'zf_xiaobei'}, |
|
|
"簡體中文 (女聲 - 小妮)": {'lang_code': 'z', 'voice': 'zf_xiaoni'}, |
|
|
"繁體中文 (女聲 - 小貝)": {'lang_code': 'z', 'voice': 'zf_xiaobei'}, |
|
|
} |
|
|
|
|
|
|
|
|
global_pipeline = None |
|
|
current_lang_code = None |
|
|
|
|
|
def get_saved_files_dropdown_options(): |
|
|
files = [f for f in os.listdir(SAVE_DIR) if f.endswith(".wav")] |
|
|
files.sort(key=lambda f: os.path.getmtime(os.path.join(SAVE_DIR, f)), reverse=True) |
|
|
return files |
|
|
|
|
|
def synthesize_speech_gradio(text, preset_name): |
|
|
global global_pipeline, current_lang_code |
|
|
status_message = "" |
|
|
if not text: |
|
|
status_message = "請輸入文字!" |
|
|
gr.Warning(status_message) |
|
|
return status_message, None, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None) |
|
|
preset = VOICE_PRESETS[preset_name] |
|
|
lang_code = preset['lang_code'] |
|
|
voice = preset['voice'] |
|
|
if global_pipeline is None or current_lang_code != lang_code: |
|
|
status_message = f"正在初始化 Kokoro TTS 管線 (語言: {lang_code})..." |
|
|
try: |
|
|
global_pipeline = KPipeline(lang_code=lang_code, repo_id='hexgrad/Kokoro-82M') |
|
|
current_lang_code = lang_code |
|
|
status_message = "模型初始化成功。" |
|
|
except Exception as e: |
|
|
status_message = f"模型初始化失敗: {e}" |
|
|
gr.Error(status_message) |
|
|
return status_message, None, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None) |
|
|
status_message = f"正在生成語音 (預設: {preset_name})..." |
|
|
audio_segments = [] |
|
|
sample_rate = 24000 |
|
|
try: |
|
|
generator = global_pipeline(text, voice=voice) |
|
|
segment_count = 0 |
|
|
print("語音片段生成進度:") |
|
|
for i, (graphemes, phonemes, audio) in enumerate(tqdm(generator, desc="生成中", unit="段")): |
|
|
audio_segments.append(audio) |
|
|
segment_count += 1 |
|
|
|
|
|
print(f"片段 {i+1}:") |
|
|
print(f" Graphemes: {graphemes}") |
|
|
print(f" Phonemes: {phonemes}") |
|
|
print() |
|
|
status_message = f"所有 {segment_count} 個語音片段生成完成。" |
|
|
except Exception as e: |
|
|
status_message = f"語音生成失敗,請檢查 voice 名稱是否正確: {e}" |
|
|
gr.Error(status_message) |
|
|
return status_message, None, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None) |
|
|
if not audio_segments: |
|
|
status_message = "沒有生成任何音訊片段。" |
|
|
gr.Warning(status_message) |
|
|
return status_message, None, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None) |
|
|
full_audio = np.concatenate(audio_segments) |
|
|
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") |
|
|
safe_text_prefix = "".join(c for c in text[:20] if c.isalnum() or c.isspace()).strip() |
|
|
if safe_text_prefix: |
|
|
filename = f"kokoro_{safe_text_prefix}_{timestamp}.wav" |
|
|
else: |
|
|
filename = f"kokoro_audio_{timestamp}.wav" |
|
|
file_path = os.path.join(SAVE_DIR, filename) |
|
|
try: |
|
|
sf.write(file_path, full_audio, sample_rate) |
|
|
status_message = f"語音生成成功並已保存為 {filename}。" |
|
|
gr.Info(status_message) |
|
|
except Exception as e: |
|
|
status_message = f"語音生成成功但保存失敗: {e}" |
|
|
gr.Error(status_message) |
|
|
return status_message, (sample_rate, full_audio), gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None) |
|
|
|
|
|
def select_file_for_download(filename): |
|
|
if filename: |
|
|
file_path = os.path.join(SAVE_DIR, filename) |
|
|
if os.path.exists(file_path): |
|
|
return file_path |
|
|
return None |
|
|
|
|
|
def delete_selected_audio(filename_to_delete): |
|
|
status_message = "" |
|
|
if filename_to_delete: |
|
|
file_path = os.path.join(SAVE_DIR, filename_to_delete) |
|
|
if os.path.exists(file_path): |
|
|
os.remove(file_path) |
|
|
status_message = f"已刪除 {filename_to_delete}" |
|
|
gr.Info(status_message) |
|
|
return status_message, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None) |
|
|
else: |
|
|
status_message = f"檔案 {filename_to_delete} 不存在。" |
|
|
gr.Warning(status_message) |
|
|
return status_message, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None) |
|
|
status_message = "請選擇要刪除的檔案。" |
|
|
gr.Warning(status_message) |
|
|
return status_message, gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None) |
|
|
|
|
|
def clear_interface(): |
|
|
return "", None, "", gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None) |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown(""" |
|
|
# Kokoro TTS 語音合成器 |
|
|
在 Hugging Face Spaces 上使用 Kokoro TTS 生成高品質語音。 |
|
|
**注意:繁體中文選項使用簡體中文模型,發音可能不完全符合台灣國語腔調。** |
|
|
**重要提示:此應用程式在 Hugging Face Spaces 上運行,生成的音檔會保存到伺服器,但 Space 重啟後可能會丟失。** |
|
|
""") |
|
|
with gr.Tab("語音生成"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
text_input = gr.Textbox(lines=5, label="輸入文字", placeholder="請輸入您想轉換為語音的文字...") |
|
|
voice_dropdown = gr.Dropdown(list(VOICE_PRESETS.keys()), label="選擇語音預設", value="美式英語 (女聲)") |
|
|
with gr.Row(): |
|
|
generate_btn = gr.Button("生成語音") |
|
|
clear_btn = gr.Button("清除") |
|
|
with gr.Column(): |
|
|
audio_output = gr.Audio(label="生成的語音", type="numpy") |
|
|
gr.Markdown("**提示:** 音訊播放器右下角有下載按鈕,點擊即可保存音檔到您的電腦。") |
|
|
info_output = gr.Textbox(label="狀態/提示", interactive=False) |
|
|
with gr.Tab("管理已保存音檔"): |
|
|
gr.Markdown("### 已保存的音檔列表") |
|
|
saved_files_dropdown = gr.Dropdown( |
|
|
choices=get_saved_files_dropdown_options(), |
|
|
label="選擇音檔", |
|
|
info="選擇一個音檔進行下載或刪除" |
|
|
) |
|
|
download_file_output = gr.File(label="下載選定音檔", interactive=False) |
|
|
with gr.Row(): |
|
|
refresh_files_btn = gr.Button("刷新列表") |
|
|
delete_btn = gr.Button("刪除選定音檔") |
|
|
delete_info_output = gr.Textbox(label="刪除狀態", interactive=False) |
|
|
generate_btn.click( |
|
|
fn=synthesize_speech_gradio, |
|
|
inputs=[text_input, voice_dropdown], |
|
|
outputs=[info_output, audio_output, saved_files_dropdown, download_file_output] |
|
|
) |
|
|
clear_btn.click( |
|
|
fn=clear_interface, |
|
|
inputs=[], |
|
|
outputs=[text_input, audio_output, info_output, saved_files_dropdown, download_file_output] |
|
|
) |
|
|
saved_files_dropdown.change( |
|
|
fn=select_file_for_download, |
|
|
inputs=[saved_files_dropdown], |
|
|
outputs=[download_file_output] |
|
|
) |
|
|
refresh_files_btn.click( |
|
|
fn=lambda: ("", gr.update(choices=get_saved_files_dropdown_options()), gr.update(value=None)), |
|
|
inputs=[], |
|
|
outputs=[delete_info_output, saved_files_dropdown, download_file_output] |
|
|
) |
|
|
delete_btn.click( |
|
|
fn=delete_selected_audio, |
|
|
inputs=[saved_files_dropdown], |
|
|
outputs=[delete_info_output, saved_files_dropdown, download_file_output] |
|
|
) |
|
|
demo.launch() |
|
|
|