9yearvoice / gradio_app.py
invokerx's picture
Update gradio_app.py
0d296b1 verified
"""
九周年纪念语音克隆应用 - Gradio 版本
用于部署到 Hugging Face Spaces
"""
import gradio as gr
import os
import tempfile
import io
import time
import threading
from pathlib import Path
# 设置环境变量,避免阻塞操作
os.environ.setdefault("MPLBACKEND", "Agg")
os.environ.setdefault("MPLCONFIGDIR", "/tmp/matplotlib")
os.environ.setdefault("DISPLAY", "")
os.environ.setdefault("MPL_DISABLE_FONTCACHE", "1")
# 自动同意 Coqui TTS 许可证
os.environ.setdefault("COQUI_TOS_AGREED", "1")
os.environ.setdefault("TTS_AGREE_TO_TERMS", "1")
# 全局变量
tts_model = None
TTS = None
model_loading_status = {
"status": "not_started",
"message": "",
"progress": 0,
"start_time": None,
"error": None,
}
model_status_lock = threading.Lock()
# 声音文件路径(支持 Hugging Face Spaces 的文件系统)
voice_files = ["xiujia.wav", "xiujia_v2.mp3"]
# 两张 PNG(放在仓库根目录)
DECOR_IMAGES = ["couple1 (1).png", "couple (1).png"]
def find_first_existing(paths):
for p in paths:
if p and Path(p).exists():
return str(p)
return None
def get_available_voice_files():
"""获取可用的声音文件列表"""
available_files = []
search_paths = [
"xiujia.wav",
"xiujia_v2.mp3",
"/tmp/xiujia.wav",
"/tmp/xiujia_v2.mp3",
"./xiujia.wav",
"./xiujia_v2.mp3",
]
for file_path in search_paths:
if Path(file_path).exists():
available_files.append(file_path)
return available_files
def init_tts_model():
"""初始化 TTS 模型"""
global tts_model, TTS, model_loading_status
if tts_model is not None:
return True
try:
with model_status_lock:
model_loading_status["status"] = "loading"
model_loading_status["message"] = "正在导入 TTS 库..."
model_loading_status["progress"] = 10
model_loading_status["start_time"] = time.time()
model_loading_status["error"] = None
# 延迟导入 TTS 库
if TTS is None:
from TTS.api import TTS as _TTS
TTS = _TTS
with model_status_lock:
model_loading_status["progress"] = 20
model_loading_status["message"] = "TTS 库导入成功,检查模型文件..."
# 检查模型是否已下载
cache_dir = os.path.expanduser("~/.local/share/tts")
model_path = os.path.join(
cache_dir, "tts_models", "multilingual", "multi-dataset", "xtts_v2"
)
if Path(model_path).exists():
with model_status_lock:
model_loading_status["message"] = "模型文件已存在,正在加载模型..."
model_loading_status["progress"] = 30
else:
with model_status_lock:
model_loading_status["status"] = "downloading"
model_loading_status["message"] = "正在下载模型文件(首次下载,可能需要 10-20 分钟)..."
model_loading_status["progress"] = 25
with model_status_lock:
model_loading_status["progress"] = 40
model_loading_status["message"] = "正在初始化模型..."
tts_model = TTS(
model_name="tts_models/multilingual/multi-dataset/xtts_v2",
progress_bar=True,
gpu=False,
)
with model_status_lock:
model_loading_status["status"] = "loaded"
model_loading_status["progress"] = 100
if model_loading_status["start_time"]:
elapsed = time.time() - model_loading_status["start_time"]
model_loading_status["message"] = f"模型加载完成!(耗时 {elapsed:.1f} 秒)"
else:
model_loading_status["message"] = "模型加载完成!"
return True
except Exception as e:
import traceback
traceback.print_exc()
with model_status_lock:
model_loading_status["status"] = "failed"
model_loading_status["error"] = str(e)
model_loading_status["message"] = f"模型加载失败: {e}"
return False
def synthesize_speech(text, emotion, speed):
"""合成语音(emotion/speed 目前先保留接口,后续可接入真正控制)"""
try:
if tts_model is None:
return None, "TTS 模型未加载,请等待模型加载完成"
if not text or not text.strip():
return None, "请输入要朗读的文本"
if len(text) > 5000:
return None, "文本长度不能超过5000字"
available_files = get_available_voice_files()
if not available_files:
return None, "没有找到可用的声音文件(xiujia.wav / xiujia_v2.mp3)"
output_path = tempfile.mktemp(suffix=".wav")
speaker_files = available_files if len(available_files) > 1 else available_files[0]
tts_model.tts_to_file(
text=text,
file_path=output_path,
speaker_wav=speaker_files,
language="zh",
)
import soundfile as sf
audio_data, sample_rate = sf.read(output_path)
try:
os.remove(output_path)
except:
pass
return (sample_rate, audio_data), None
except Exception as e:
import traceback
traceback.print_exc()
return None, f"合成失败: {str(e)}"
def get_model_status():
with model_status_lock:
status = model_loading_status.copy()
if status["status"] == "loaded" and tts_model is not None:
return "✅ 小杨一号播音员已上线"
elif status["status"] == "failed":
return f"❌ 模型加载失败: {status.get('error', '未知错误')}"
elif status["status"] == "downloading":
progress = status.get("progress", 0)
return f"⏳ 正在下载模型... {progress}%"
elif status["status"] == "loading":
progress = status.get("progress", 0)
return f"⏳ 正在加载模型... {progress}%"
else:
return "⏳ 小杨播音员正在上线..."
def get_model_detail_message():
with model_status_lock:
msg = model_loading_status.get("message", "")
return msg or ""
def get_decor_images():
"""返回两张装饰图路径;不存在则返回 None"""
p1 = find_first_existing(
[DECOR_IMAGES[0], f"./{DECOR_IMAGES[0]}", "couple.png", "./couple.png"]
)
p2 = find_first_existing(
[DECOR_IMAGES[1], f"./{DECOR_IMAGES[1]}", "couple (1).png", "./couple (1).png"]
)
return p1, p2
# ---------------- UI 样式 ----------------
custom_css = """
:root{
--card-bg: rgba(255,255,255,0.78);
--card-border: rgba(255,255,255,0.55);
--shadow: 0 10px 30px rgba(0,0,0,0.10);
}
.gradio-container{
background: radial-gradient(circle at 20% 10%, rgba(255,180,200,0.65), transparent 40%),
radial-gradient(circle at 90% 20%, rgba(255,230,170,0.55), transparent 45%),
radial-gradient(circle at 50% 90%, rgba(190,220,255,0.50), transparent 45%),
linear-gradient(135deg, #fff1f5 0%, #fff7ee 40%, #f2fbff 100%);
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Microsoft YaHei', '微软雅黑', Arial, sans-serif;
}
#top_wrap{
background: var(--card-bg);
border: 1px solid var(--card-border);
border-radius: 22px;
box-shadow: var(--shadow);
padding: 18px 18px 12px 18px;
}
.badge9{
display:inline-flex;
align-items:center;
justify-content:center;
width:64px;
height:64px;
border-radius:999px;
background: linear-gradient(135deg,#ffd36a 0%, #ffef9a 100%);
color:#8a4b00;
font-weight:800;
font-size:28px;
box-shadow: 0 10px 22px rgba(255, 211, 106, 0.55);
}
.title{
margin: 8px 0 2px 0;
font-size: 30px;
font-weight: 800;
color: #ff4d7d;
text-shadow: 0 2px 10px rgba(255, 77, 125, 0.15);
}
.sub{
margin: 0;
color:#6b7280;
font-size: 14px;
}
.decor{
border-radius: 18px;
overflow:hidden;
border: 1px solid rgba(255,255,255,0.6);
box-shadow: 0 10px 24px rgba(0,0,0,0.08);
}
.card{
background: var(--card-bg);
border: 1px solid var(--card-border);
border-radius: 22px;
box-shadow: var(--shadow);
padding: 14px;
}
.status-pill input{
border-radius: 999px !important;
font-weight: 700 !important;
}
.love-btn button{
border-radius: 999px !important;
font-weight: 800 !important;
font-size: 18px !important;
padding: 14px 18px !important;
}
.smallhint{
color:#6b7280;
font-size: 12px;
line-height: 1.4;
}
"""
# ---------------- Gradio App ----------------
with gr.Blocks() as demo:
# 顶部温馨头图区域
with gr.Column(elem_id="top_wrap"):
p1, p2 = get_decor_images()
with gr.Row(equal_height=True):
with gr.Column(scale=1):
if p1:
gr.Image(value=p1, show_label=False, container=False, height=150, elem_classes=["decor"])
else:
gr.HTML("<div class='smallhint'>(未找到 couple.png,将自动隐藏)</div>")
with gr.Column(scale=2, min_width=320):
gr.HTML(
"""
<div style="display:flex; gap:14px; align-items:center;">
<div class="badge9">9</div>
<div>
<div class="title">💕 九周年纪念 · 爱的声音 💕</div>
<p class="sub">用我的声音,为你朗读每一句情话。愿我们把日子过成诗,把平凡过成浪漫。</p>
</div>
</div>
"""
)
with gr.Column(scale=1):
if p2:
gr.Image(value=p2, show_label=False, container=False, height=150, elem_classes=["decor"])
else:
gr.HTML("<div class='smallhint'>(未找到 couple (1).png,将自动隐藏)</div>")
# 主体区域:左文本右控制
with gr.Row():
with gr.Column(scale=2):
with gr.Column(elem_classes=["card"]):
gr.Markdown("### ✍️ 要我讲的话")
text_input = gr.Textbox(
placeholder="在这里输入你想要我朗读的文字…(最多 5000 字)",
lines=10,
max_lines=16,
)
char_count = gr.HTML(
value="<p style='text-align:right; color:#6b7280;'>字数: 0 / 5000</p>"
)
with gr.Column(scale=1, min_width=320):
with gr.Column(elem_classes=["card"]):
gr.Markdown("### 📡 服务器状态")
status_text = gr.Textbox(
value="⏳ 正在初始化...",
interactive=False,
show_label=False,
elem_classes=["status-pill"],
)
detail_text = gr.Markdown(value="")
gr.Markdown("### 😊 情感模式")
emotion = gr.Dropdown(
choices=[
("温柔", "neutral"),
("开心", "happy"),
("激动", "excited"),
("平静", "calm"),
("深情", "sad"),
],
value="neutral",
show_label=False,
)
gr.Markdown("### ⚡ 语速")
speed = gr.Slider(
minimum=0.5,
maximum=2.0,
value=1.0,
step=0.1,
show_label=False,
)
gr.HTML("<div class='smallhint'>小提示:首次启动会下载模型,耐心等一下~</div>")
# 生成按钮 + 输出区域
with gr.Column(elem_classes=["card"]):
with gr.Row():
generate_btn = gr.Button("💖 让我开说", variant="primary", elem_classes=["love-btn"])
audio_output = gr.Audio(label="🎵 生成的语音", type="numpy")
error_output = gr.Textbox(label="错误信息", visible=False)
# 字数统计
def update_char_count(text):
text = text or ""
return f"<p style='text-align:right; color:#6b7280;'>字数: {len(text)} / 5000</p>"
text_input.change(fn=update_char_count, inputs=text_input, outputs=char_count)
# 生成语音
def generate(text, emotion_val, speed_val):
if not text or not text.strip():
return None, "请输入要朗读的文本", get_model_status(), get_model_detail_message()
if len(text) > 5000:
return None, "文本长度不能超过5000字", get_model_status(), get_model_detail_message()
audio, error = synthesize_speech(text, emotion_val, speed_val)
if error:
return None, error, get_model_status(), get_model_detail_message()
return audio, None, get_model_status(), get_model_detail_message()
generate_btn.click(
fn=generate,
inputs=[text_input, emotion, speed],
outputs=[audio_output, error_output, status_text, detail_text],
)
# ✅ Gradio 6+:定时刷新状态(必须先定义 update_status 再 tick)
def update_status():
return get_model_status(), get_model_detail_message()
# 首次加载时也刷新一次(避免空白)
demo.load(fn=update_status, outputs=[status_text, detail_text])
status_timer = gr.Timer(5.0)
status_timer.tick(fn=update_status, outputs=[status_text, detail_text])
# 在后台加载模型
def load_model_in_background():
def _load():
time.sleep(5)
print("=" * 50)
print("开始加载 TTS 模型...")
print("=" * 50)
init_tts_model()
if tts_model:
print("=" * 50)
print("✓ TTS 模型加载完成!")
print("=" * 50)
thread = threading.Thread(target=_load, daemon=True)
thread.start()
load_model_in_background()
if __name__ == "__main__":
available_files = get_available_voice_files()
if not available_files:
print("警告: 找不到声音文件")
print("请确保以下文件之一存在于当前目录:")
for file_path in voice_files:
print(f" - {file_path}")
else:
print(f"✓ 找到 {len(available_files)} 个声音文件:")
for file_path in available_files:
print(f" - {file_path}")
# 检查装饰图
d1, d2 = get_decor_images()
if not d1 or not d2:
print("提示:未找到两张装饰图 couple.png / couple (1).png(会自动隐藏,不影响运行)")
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=True,
css=custom_css,
theme=gr.themes.Soft(),
)