Spaces:
Sleeping
Sleeping
| """ | |
| 九周年纪念语音克隆应用 - Gradio 版本 | |
| 用于部署到 Hugging Face Spaces | |
| """ | |
| import gradio as gr | |
| import os | |
| import tempfile | |
| import io | |
| import time | |
| import threading | |
| from pathlib import Path | |
| # 设置环境变量,避免阻塞操作 | |
| os.environ.setdefault("MPLBACKEND", "Agg") | |
| os.environ.setdefault("MPLCONFIGDIR", "/tmp/matplotlib") | |
| os.environ.setdefault("DISPLAY", "") | |
| os.environ.setdefault("MPL_DISABLE_FONTCACHE", "1") | |
| # 自动同意 Coqui TTS 许可证 | |
| os.environ.setdefault("COQUI_TOS_AGREED", "1") | |
| os.environ.setdefault("TTS_AGREE_TO_TERMS", "1") | |
| # 全局变量 | |
| tts_model = None | |
| TTS = None | |
| model_loading_status = { | |
| "status": "not_started", | |
| "message": "", | |
| "progress": 0, | |
| "start_time": None, | |
| "error": None, | |
| } | |
| model_status_lock = threading.Lock() | |
| # 声音文件路径(支持 Hugging Face Spaces 的文件系统) | |
| voice_files = ["xiujia.wav", "xiujia_v2.mp3"] | |
| # 两张 PNG(放在仓库根目录) | |
| DECOR_IMAGES = ["couple1 (1).png", "couple (1).png"] | |
| def find_first_existing(paths): | |
| for p in paths: | |
| if p and Path(p).exists(): | |
| return str(p) | |
| return None | |
| def get_available_voice_files(): | |
| """获取可用的声音文件列表""" | |
| available_files = [] | |
| search_paths = [ | |
| "xiujia.wav", | |
| "xiujia_v2.mp3", | |
| "/tmp/xiujia.wav", | |
| "/tmp/xiujia_v2.mp3", | |
| "./xiujia.wav", | |
| "./xiujia_v2.mp3", | |
| ] | |
| for file_path in search_paths: | |
| if Path(file_path).exists(): | |
| available_files.append(file_path) | |
| return available_files | |
| def init_tts_model(): | |
| """初始化 TTS 模型""" | |
| global tts_model, TTS, model_loading_status | |
| if tts_model is not None: | |
| return True | |
| try: | |
| with model_status_lock: | |
| model_loading_status["status"] = "loading" | |
| model_loading_status["message"] = "正在导入 TTS 库..." | |
| model_loading_status["progress"] = 10 | |
| model_loading_status["start_time"] = time.time() | |
| model_loading_status["error"] = None | |
| # 延迟导入 TTS 库 | |
| if TTS is None: | |
| from TTS.api import TTS as _TTS | |
| TTS = _TTS | |
| with model_status_lock: | |
| model_loading_status["progress"] = 20 | |
| model_loading_status["message"] = "TTS 库导入成功,检查模型文件..." | |
| # 检查模型是否已下载 | |
| cache_dir = os.path.expanduser("~/.local/share/tts") | |
| model_path = os.path.join( | |
| cache_dir, "tts_models", "multilingual", "multi-dataset", "xtts_v2" | |
| ) | |
| if Path(model_path).exists(): | |
| with model_status_lock: | |
| model_loading_status["message"] = "模型文件已存在,正在加载模型..." | |
| model_loading_status["progress"] = 30 | |
| else: | |
| with model_status_lock: | |
| model_loading_status["status"] = "downloading" | |
| model_loading_status["message"] = "正在下载模型文件(首次下载,可能需要 10-20 分钟)..." | |
| model_loading_status["progress"] = 25 | |
| with model_status_lock: | |
| model_loading_status["progress"] = 40 | |
| model_loading_status["message"] = "正在初始化模型..." | |
| tts_model = TTS( | |
| model_name="tts_models/multilingual/multi-dataset/xtts_v2", | |
| progress_bar=True, | |
| gpu=False, | |
| ) | |
| with model_status_lock: | |
| model_loading_status["status"] = "loaded" | |
| model_loading_status["progress"] = 100 | |
| if model_loading_status["start_time"]: | |
| elapsed = time.time() - model_loading_status["start_time"] | |
| model_loading_status["message"] = f"模型加载完成!(耗时 {elapsed:.1f} 秒)" | |
| else: | |
| model_loading_status["message"] = "模型加载完成!" | |
| return True | |
| except Exception as e: | |
| import traceback | |
| traceback.print_exc() | |
| with model_status_lock: | |
| model_loading_status["status"] = "failed" | |
| model_loading_status["error"] = str(e) | |
| model_loading_status["message"] = f"模型加载失败: {e}" | |
| return False | |
| def synthesize_speech(text, emotion, speed): | |
| """合成语音(emotion/speed 目前先保留接口,后续可接入真正控制)""" | |
| try: | |
| if tts_model is None: | |
| return None, "TTS 模型未加载,请等待模型加载完成" | |
| if not text or not text.strip(): | |
| return None, "请输入要朗读的文本" | |
| if len(text) > 5000: | |
| return None, "文本长度不能超过5000字" | |
| available_files = get_available_voice_files() | |
| if not available_files: | |
| return None, "没有找到可用的声音文件(xiujia.wav / xiujia_v2.mp3)" | |
| output_path = tempfile.mktemp(suffix=".wav") | |
| speaker_files = available_files if len(available_files) > 1 else available_files[0] | |
| tts_model.tts_to_file( | |
| text=text, | |
| file_path=output_path, | |
| speaker_wav=speaker_files, | |
| language="zh", | |
| ) | |
| import soundfile as sf | |
| audio_data, sample_rate = sf.read(output_path) | |
| try: | |
| os.remove(output_path) | |
| except: | |
| pass | |
| return (sample_rate, audio_data), None | |
| except Exception as e: | |
| import traceback | |
| traceback.print_exc() | |
| return None, f"合成失败: {str(e)}" | |
| def get_model_status(): | |
| with model_status_lock: | |
| status = model_loading_status.copy() | |
| if status["status"] == "loaded" and tts_model is not None: | |
| return "✅ 小杨一号播音员已上线" | |
| elif status["status"] == "failed": | |
| return f"❌ 模型加载失败: {status.get('error', '未知错误')}" | |
| elif status["status"] == "downloading": | |
| progress = status.get("progress", 0) | |
| return f"⏳ 正在下载模型... {progress}%" | |
| elif status["status"] == "loading": | |
| progress = status.get("progress", 0) | |
| return f"⏳ 正在加载模型... {progress}%" | |
| else: | |
| return "⏳ 小杨播音员正在上线..." | |
| def get_model_detail_message(): | |
| with model_status_lock: | |
| msg = model_loading_status.get("message", "") | |
| return msg or "" | |
| def get_decor_images(): | |
| """返回两张装饰图路径;不存在则返回 None""" | |
| p1 = find_first_existing( | |
| [DECOR_IMAGES[0], f"./{DECOR_IMAGES[0]}", "couple.png", "./couple.png"] | |
| ) | |
| p2 = find_first_existing( | |
| [DECOR_IMAGES[1], f"./{DECOR_IMAGES[1]}", "couple (1).png", "./couple (1).png"] | |
| ) | |
| return p1, p2 | |
| # ---------------- UI 样式 ---------------- | |
| custom_css = """ | |
| :root{ | |
| --card-bg: rgba(255,255,255,0.78); | |
| --card-border: rgba(255,255,255,0.55); | |
| --shadow: 0 10px 30px rgba(0,0,0,0.10); | |
| } | |
| .gradio-container{ | |
| background: radial-gradient(circle at 20% 10%, rgba(255,180,200,0.65), transparent 40%), | |
| radial-gradient(circle at 90% 20%, rgba(255,230,170,0.55), transparent 45%), | |
| radial-gradient(circle at 50% 90%, rgba(190,220,255,0.50), transparent 45%), | |
| linear-gradient(135deg, #fff1f5 0%, #fff7ee 40%, #f2fbff 100%); | |
| font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Microsoft YaHei', '微软雅黑', Arial, sans-serif; | |
| } | |
| #top_wrap{ | |
| background: var(--card-bg); | |
| border: 1px solid var(--card-border); | |
| border-radius: 22px; | |
| box-shadow: var(--shadow); | |
| padding: 18px 18px 12px 18px; | |
| } | |
| .badge9{ | |
| display:inline-flex; | |
| align-items:center; | |
| justify-content:center; | |
| width:64px; | |
| height:64px; | |
| border-radius:999px; | |
| background: linear-gradient(135deg,#ffd36a 0%, #ffef9a 100%); | |
| color:#8a4b00; | |
| font-weight:800; | |
| font-size:28px; | |
| box-shadow: 0 10px 22px rgba(255, 211, 106, 0.55); | |
| } | |
| .title{ | |
| margin: 8px 0 2px 0; | |
| font-size: 30px; | |
| font-weight: 800; | |
| color: #ff4d7d; | |
| text-shadow: 0 2px 10px rgba(255, 77, 125, 0.15); | |
| } | |
| .sub{ | |
| margin: 0; | |
| color:#6b7280; | |
| font-size: 14px; | |
| } | |
| .decor{ | |
| border-radius: 18px; | |
| overflow:hidden; | |
| border: 1px solid rgba(255,255,255,0.6); | |
| box-shadow: 0 10px 24px rgba(0,0,0,0.08); | |
| } | |
| .card{ | |
| background: var(--card-bg); | |
| border: 1px solid var(--card-border); | |
| border-radius: 22px; | |
| box-shadow: var(--shadow); | |
| padding: 14px; | |
| } | |
| .status-pill input{ | |
| border-radius: 999px !important; | |
| font-weight: 700 !important; | |
| } | |
| .love-btn button{ | |
| border-radius: 999px !important; | |
| font-weight: 800 !important; | |
| font-size: 18px !important; | |
| padding: 14px 18px !important; | |
| } | |
| .smallhint{ | |
| color:#6b7280; | |
| font-size: 12px; | |
| line-height: 1.4; | |
| } | |
| """ | |
| # ---------------- Gradio App ---------------- | |
| with gr.Blocks() as demo: | |
| # 顶部温馨头图区域 | |
| with gr.Column(elem_id="top_wrap"): | |
| p1, p2 = get_decor_images() | |
| with gr.Row(equal_height=True): | |
| with gr.Column(scale=1): | |
| if p1: | |
| gr.Image(value=p1, show_label=False, container=False, height=150, elem_classes=["decor"]) | |
| else: | |
| gr.HTML("<div class='smallhint'>(未找到 couple.png,将自动隐藏)</div>") | |
| with gr.Column(scale=2, min_width=320): | |
| gr.HTML( | |
| """ | |
| <div style="display:flex; gap:14px; align-items:center;"> | |
| <div class="badge9">9</div> | |
| <div> | |
| <div class="title">💕 九周年纪念 · 爱的声音 💕</div> | |
| <p class="sub">用我的声音,为你朗读每一句情话。愿我们把日子过成诗,把平凡过成浪漫。</p> | |
| </div> | |
| </div> | |
| """ | |
| ) | |
| with gr.Column(scale=1): | |
| if p2: | |
| gr.Image(value=p2, show_label=False, container=False, height=150, elem_classes=["decor"]) | |
| else: | |
| gr.HTML("<div class='smallhint'>(未找到 couple (1).png,将自动隐藏)</div>") | |
| # 主体区域:左文本右控制 | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| with gr.Column(elem_classes=["card"]): | |
| gr.Markdown("### ✍️ 要我讲的话") | |
| text_input = gr.Textbox( | |
| placeholder="在这里输入你想要我朗读的文字…(最多 5000 字)", | |
| lines=10, | |
| max_lines=16, | |
| ) | |
| char_count = gr.HTML( | |
| value="<p style='text-align:right; color:#6b7280;'>字数: 0 / 5000</p>" | |
| ) | |
| with gr.Column(scale=1, min_width=320): | |
| with gr.Column(elem_classes=["card"]): | |
| gr.Markdown("### 📡 服务器状态") | |
| status_text = gr.Textbox( | |
| value="⏳ 正在初始化...", | |
| interactive=False, | |
| show_label=False, | |
| elem_classes=["status-pill"], | |
| ) | |
| detail_text = gr.Markdown(value="") | |
| gr.Markdown("### 😊 情感模式") | |
| emotion = gr.Dropdown( | |
| choices=[ | |
| ("温柔", "neutral"), | |
| ("开心", "happy"), | |
| ("激动", "excited"), | |
| ("平静", "calm"), | |
| ("深情", "sad"), | |
| ], | |
| value="neutral", | |
| show_label=False, | |
| ) | |
| gr.Markdown("### ⚡ 语速") | |
| speed = gr.Slider( | |
| minimum=0.5, | |
| maximum=2.0, | |
| value=1.0, | |
| step=0.1, | |
| show_label=False, | |
| ) | |
| gr.HTML("<div class='smallhint'>小提示:首次启动会下载模型,耐心等一下~</div>") | |
| # 生成按钮 + 输出区域 | |
| with gr.Column(elem_classes=["card"]): | |
| with gr.Row(): | |
| generate_btn = gr.Button("💖 让我开说", variant="primary", elem_classes=["love-btn"]) | |
| audio_output = gr.Audio(label="🎵 生成的语音", type="numpy") | |
| error_output = gr.Textbox(label="错误信息", visible=False) | |
| # 字数统计 | |
| def update_char_count(text): | |
| text = text or "" | |
| return f"<p style='text-align:right; color:#6b7280;'>字数: {len(text)} / 5000</p>" | |
| text_input.change(fn=update_char_count, inputs=text_input, outputs=char_count) | |
| # 生成语音 | |
| def generate(text, emotion_val, speed_val): | |
| if not text or not text.strip(): | |
| return None, "请输入要朗读的文本", get_model_status(), get_model_detail_message() | |
| if len(text) > 5000: | |
| return None, "文本长度不能超过5000字", get_model_status(), get_model_detail_message() | |
| audio, error = synthesize_speech(text, emotion_val, speed_val) | |
| if error: | |
| return None, error, get_model_status(), get_model_detail_message() | |
| return audio, None, get_model_status(), get_model_detail_message() | |
| generate_btn.click( | |
| fn=generate, | |
| inputs=[text_input, emotion, speed], | |
| outputs=[audio_output, error_output, status_text, detail_text], | |
| ) | |
| # ✅ Gradio 6+:定时刷新状态(必须先定义 update_status 再 tick) | |
| def update_status(): | |
| return get_model_status(), get_model_detail_message() | |
| # 首次加载时也刷新一次(避免空白) | |
| demo.load(fn=update_status, outputs=[status_text, detail_text]) | |
| status_timer = gr.Timer(5.0) | |
| status_timer.tick(fn=update_status, outputs=[status_text, detail_text]) | |
| # 在后台加载模型 | |
| def load_model_in_background(): | |
| def _load(): | |
| time.sleep(5) | |
| print("=" * 50) | |
| print("开始加载 TTS 模型...") | |
| print("=" * 50) | |
| init_tts_model() | |
| if tts_model: | |
| print("=" * 50) | |
| print("✓ TTS 模型加载完成!") | |
| print("=" * 50) | |
| thread = threading.Thread(target=_load, daemon=True) | |
| thread.start() | |
| load_model_in_background() | |
| if __name__ == "__main__": | |
| available_files = get_available_voice_files() | |
| if not available_files: | |
| print("警告: 找不到声音文件") | |
| print("请确保以下文件之一存在于当前目录:") | |
| for file_path in voice_files: | |
| print(f" - {file_path}") | |
| else: | |
| print(f"✓ 找到 {len(available_files)} 个声音文件:") | |
| for file_path in available_files: | |
| print(f" - {file_path}") | |
| # 检查装饰图 | |
| d1, d2 = get_decor_images() | |
| if not d1 or not d2: | |
| print("提示:未找到两张装饰图 couple.png / couple (1).png(会自动隐藏,不影响运行)") | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=True, | |
| css=custom_css, | |
| theme=gr.themes.Soft(), | |
| ) | |