Spaces:

invokerx
/

9yearvoice

Sleeping

App Files Files Community

9yearvoice / gradio_app.py

invokerx

Update gradio_app.py

0d296b1 verified 4 months ago

raw

history blame contribute delete

14.9 kB

	"""
	九周年纪念语音克隆应用 - Gradio 版本
	用于部署到 Hugging Face Spaces
	"""
	import gradio as gr
	import os
	import tempfile
	import io
	import time
	import threading
	from pathlib import Path

	# 设置环境变量，避免阻塞操作
	os.environ.setdefault("MPLBACKEND", "Agg")
	os.environ.setdefault("MPLCONFIGDIR", "/tmp/matplotlib")
	os.environ.setdefault("DISPLAY", "")
	os.environ.setdefault("MPL_DISABLE_FONTCACHE", "1")

	# 自动同意 Coqui TTS 许可证
	os.environ.setdefault("COQUI_TOS_AGREED", "1")
	os.environ.setdefault("TTS_AGREE_TO_TERMS", "1")

	# 全局变量
	tts_model = None
	TTS = None
	model_loading_status = {
	"status": "not_started",
	"message": "",
	"progress": 0,
	"start_time": None,
	"error": None,
	}
	model_status_lock = threading.Lock()

	# 声音文件路径（支持 Hugging Face Spaces 的文件系统）
	voice_files = ["xiujia.wav", "xiujia_v2.mp3"]

	# 两张 PNG（放在仓库根目录）
	DECOR_IMAGES = ["couple1 (1).png", "couple (1).png"]


	def find_first_existing(paths):
	for p in paths:
	if p and Path(p).exists():
	return str(p)
	return None


	def get_available_voice_files():
	"""获取可用的声音文件列表"""
	available_files = []
	search_paths = [
	"xiujia.wav",
	"xiujia_v2.mp3",
	"/tmp/xiujia.wav",
	"/tmp/xiujia_v2.mp3",
	"./xiujia.wav",
	"./xiujia_v2.mp3",
	]
	for file_path in search_paths:
	if Path(file_path).exists():
	available_files.append(file_path)
	return available_files


	def init_tts_model():
	"""初始化 TTS 模型"""
	global tts_model, TTS, model_loading_status

	if tts_model is not None:
	return True

	try:
	with model_status_lock:
	model_loading_status["status"] = "loading"
	model_loading_status["message"] = "正在导入 TTS 库..."
	model_loading_status["progress"] = 10
	model_loading_status["start_time"] = time.time()
	model_loading_status["error"] = None

	# 延迟导入 TTS 库
	if TTS is None:
	from TTS.api import TTS as _TTS
	TTS = _TTS
	with model_status_lock:
	model_loading_status["progress"] = 20
	model_loading_status["message"] = "TTS 库导入成功，检查模型文件..."

	# 检查模型是否已下载
	cache_dir = os.path.expanduser("~/.local/share/tts")
	model_path = os.path.join(
	cache_dir, "tts_models", "multilingual", "multi-dataset", "xtts_v2"
	)

	if Path(model_path).exists():
	with model_status_lock:
	model_loading_status["message"] = "模型文件已存在，正在加载模型..."
	model_loading_status["progress"] = 30
	else:
	with model_status_lock:
	model_loading_status["status"] = "downloading"
	model_loading_status["message"] = "正在下载模型文件（首次下载，可能需要 10-20 分钟）..."
	model_loading_status["progress"] = 25

	with model_status_lock:
	model_loading_status["progress"] = 40
	model_loading_status["message"] = "正在初始化模型..."

	tts_model = TTS(
	model_name="tts_models/multilingual/multi-dataset/xtts_v2",
	progress_bar=True,
	gpu=False,
	)

	with model_status_lock:
	model_loading_status["status"] = "loaded"
	model_loading_status["progress"] = 100
	if model_loading_status["start_time"]:
	elapsed = time.time() - model_loading_status["start_time"]
	model_loading_status["message"] = f"模型加载完成！（耗时 {elapsed:.1f} 秒）"
	else:
	model_loading_status["message"] = "模型加载完成！"

	return True

	except Exception as e:
	import traceback

	traceback.print_exc()
	with model_status_lock:
	model_loading_status["status"] = "failed"
	model_loading_status["error"] = str(e)
	model_loading_status["message"] = f"模型加载失败: {e}"
	return False


	def synthesize_speech(text, emotion, speed):
	"""合成语音（emotion/speed 目前先保留接口，后续可接入真正控制）"""
	try:
	if tts_model is None:
	return None, "TTS 模型未加载，请等待模型加载完成"

	if not text or not text.strip():
	return None, "请输入要朗读的文本"

	if len(text) > 5000:
	return None, "文本长度不能超过5000字"

	available_files = get_available_voice_files()
	if not available_files:
	return None, "没有找到可用的声音文件（xiujia.wav / xiujia_v2.mp3）"

	output_path = tempfile.mktemp(suffix=".wav")

	speaker_files = available_files if len(available_files) > 1 else available_files[0]
	tts_model.tts_to_file(
	text=text,
	file_path=output_path,
	speaker_wav=speaker_files,
	language="zh",
	)

	import soundfile as sf

	audio_data, sample_rate = sf.read(output_path)

	try:
	os.remove(output_path)
	except:
	pass

	return (sample_rate, audio_data), None

	except Exception as e:
	import traceback

	traceback.print_exc()
	return None, f"合成失败: {str(e)}"


	def get_model_status():
	with model_status_lock:
	status = model_loading_status.copy()

	if status["status"] == "loaded" and tts_model is not None:
	return "✅ 小杨一号播音员已上线"
	elif status["status"] == "failed":
	return f"❌ 模型加载失败: {status.get('error', '未知错误')}"
	elif status["status"] == "downloading":
	progress = status.get("progress", 0)
	return f"⏳ 正在下载模型... {progress}%"
	elif status["status"] == "loading":
	progress = status.get("progress", 0)
	return f"⏳ 正在加载模型... {progress}%"
	else:
	return "⏳ 小杨播音员正在上线..."


	def get_model_detail_message():
	with model_status_lock:
	msg = model_loading_status.get("message", "")
	return msg or ""


	def get_decor_images():
	"""返回两张装饰图路径；不存在则返回 None"""
	p1 = find_first_existing(
	[DECOR_IMAGES[0], f"./{DECOR_IMAGES[0]}", "couple.png", "./couple.png"]
	)
	p2 = find_first_existing(
	[DECOR_IMAGES[1], f"./{DECOR_IMAGES[1]}", "couple (1).png", "./couple (1).png"]
	)
	return p1, p2


	# ---------------- UI 样式 ----------------
	custom_css = """
	:root{
	--card-bg: rgba(255,255,255,0.78);
	--card-border: rgba(255,255,255,0.55);
	--shadow: 0 10px 30px rgba(0,0,0,0.10);
	}

	.gradio-container{
	background: radial-gradient(circle at 20% 10%, rgba(255,180,200,0.65), transparent 40%),
	radial-gradient(circle at 90% 20%, rgba(255,230,170,0.55), transparent 45%),
	radial-gradient(circle at 50% 90%, rgba(190,220,255,0.50), transparent 45%),
	linear-gradient(135deg, #fff1f5 0%, #fff7ee 40%, #f2fbff 100%);
	font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Microsoft YaHei', '微软雅黑', Arial, sans-serif;
	}

	#top_wrap{
	background: var(--card-bg);
	border: 1px solid var(--card-border);
	border-radius: 22px;
	box-shadow: var(--shadow);
	padding: 18px 18px 12px 18px;
	}

	.badge9{
	display:inline-flex;
	align-items:center;
	justify-content:center;
	width:64px;
	height:64px;
	border-radius:999px;
	background: linear-gradient(135deg,#ffd36a 0%, #ffef9a 100%);
	color:#8a4b00;
	font-weight:800;
	font-size:28px;
	box-shadow: 0 10px 22px rgba(255, 211, 106, 0.55);
	}

	.title{
	margin: 8px 0 2px 0;
	font-size: 30px;
	font-weight: 800;
	color: #ff4d7d;
	text-shadow: 0 2px 10px rgba(255, 77, 125, 0.15);
	}

	.sub{
	margin: 0;
	color:#6b7280;
	font-size: 14px;
	}

	.decor{
	border-radius: 18px;
	overflow:hidden;
	border: 1px solid rgba(255,255,255,0.6);
	box-shadow: 0 10px 24px rgba(0,0,0,0.08);
	}

	.card{
	background: var(--card-bg);
	border: 1px solid var(--card-border);
	border-radius: 22px;
	box-shadow: var(--shadow);
	padding: 14px;
	}

	.status-pill input{
	border-radius: 999px !important;
	font-weight: 700 !important;
	}

	.love-btn button{
	border-radius: 999px !important;
	font-weight: 800 !important;
	font-size: 18px !important;
	padding: 14px 18px !important;
	}

	.smallhint{
	color:#6b7280;
	font-size: 12px;
	line-height: 1.4;
	}
	"""


	# ---------------- Gradio App ----------------
	with gr.Blocks() as demo:
	# 顶部温馨头图区域
	with gr.Column(elem_id="top_wrap"):
	p1, p2 = get_decor_images()

	with gr.Row(equal_height=True):
	with gr.Column(scale=1):
	if p1:
	gr.Image(value=p1, show_label=False, container=False, height=150, elem_classes=["decor"])
	else:
	gr.HTML("<div class='smallhint'>（未找到 couple.png，将自动隐藏）</div>")

	with gr.Column(scale=2, min_width=320):
	gr.HTML(
	"""
	<div style="display:flex; gap:14px; align-items:center;">
	<div class="badge9">9</div>
	<div>
	<div class="title">💕 九周年纪念 · 爱的声音 💕</div>
	<p class="sub">用我的声音，为你朗读每一句情话。愿我们把日子过成诗，把平凡过成浪漫。</p>
	</div>
	</div>
	"""
	)

	with gr.Column(scale=1):
	if p2:
	gr.Image(value=p2, show_label=False, container=False, height=150, elem_classes=["decor"])
	else:
	gr.HTML("<div class='smallhint'>（未找到 couple (1).png，将自动隐藏）</div>")

	# 主体区域：左文本右控制
	with gr.Row():
	with gr.Column(scale=2):
	with gr.Column(elem_classes=["card"]):
	gr.Markdown("### ✍️ 要我讲的话")
	text_input = gr.Textbox(
	placeholder="在这里输入你想要我朗读的文字…（最多 5000 字）",
	lines=10,
	max_lines=16,
	)
	char_count = gr.HTML(
	value="<p style='text-align:right; color:#6b7280;'>字数: 0 / 5000</p>"
	)

	with gr.Column(scale=1, min_width=320):
	with gr.Column(elem_classes=["card"]):
	gr.Markdown("### 📡 服务器状态")
	status_text = gr.Textbox(
	value="⏳ 正在初始化...",
	interactive=False,
	show_label=False,
	elem_classes=["status-pill"],
	)
	detail_text = gr.Markdown(value="")

	gr.Markdown("### 😊 情感模式")
	emotion = gr.Dropdown(
	choices=[
	("温柔", "neutral"),
	("开心", "happy"),
	("激动", "excited"),
	("平静", "calm"),
	("深情", "sad"),
	],
	value="neutral",
	show_label=False,
	)

	gr.Markdown("### ⚡ 语速")
	speed = gr.Slider(
	minimum=0.5,
	maximum=2.0,
	value=1.0,
	step=0.1,
	show_label=False,
	)

	gr.HTML("<div class='smallhint'>小提示：首次启动会下载模型，耐心等一下～</div>")

	# 生成按钮 + 输出区域
	with gr.Column(elem_classes=["card"]):
	with gr.Row():
	generate_btn = gr.Button("💖 让我开说", variant="primary", elem_classes=["love-btn"])
	audio_output = gr.Audio(label="🎵 生成的语音", type="numpy")
	error_output = gr.Textbox(label="错误信息", visible=False)

	# 字数统计
	def update_char_count(text):
	text = text or ""
	return f"<p style='text-align:right; color:#6b7280;'>字数: {len(text)} / 5000</p>"

	text_input.change(fn=update_char_count, inputs=text_input, outputs=char_count)

	# 生成语音
	def generate(text, emotion_val, speed_val):
	if not text or not text.strip():
	return None, "请输入要朗读的文本", get_model_status(), get_model_detail_message()

	if len(text) > 5000:
	return None, "文本长度不能超过5000字", get_model_status(), get_model_detail_message()

	audio, error = synthesize_speech(text, emotion_val, speed_val)
	if error:
	return None, error, get_model_status(), get_model_detail_message()
	return audio, None, get_model_status(), get_model_detail_message()

	generate_btn.click(
	fn=generate,
	inputs=[text_input, emotion, speed],
	outputs=[audio_output, error_output, status_text, detail_text],
	)

	# ✅ Gradio 6+：定时刷新状态（必须先定义 update_status 再 tick）
	def update_status():
	return get_model_status(), get_model_detail_message()

	# 首次加载时也刷新一次（避免空白）
	demo.load(fn=update_status, outputs=[status_text, detail_text])

	status_timer = gr.Timer(5.0)
	status_timer.tick(fn=update_status, outputs=[status_text, detail_text])


	# 在后台加载模型
	def load_model_in_background():
	def _load():
	time.sleep(5)
	print("=" * 50)
	print("开始加载 TTS 模型...")
	print("=" * 50)
	init_tts_model()
	if tts_model:
	print("=" * 50)
	print("✓ TTS 模型加载完成！")
	print("=" * 50)

	thread = threading.Thread(target=_load, daemon=True)
	thread.start()


	load_model_in_background()

	if __name__ == "__main__":
	available_files = get_available_voice_files()
	if not available_files:
	print("警告: 找不到声音文件")
	print("请确保以下文件之一存在于当前目录：")
	for file_path in voice_files:
	print(f" - {file_path}")
	else:
	print(f"✓ 找到 {len(available_files)} 个声音文件:")
	for file_path in available_files:
	print(f" - {file_path}")

	# 检查装饰图
	d1, d2 = get_decor_images()
	if not d1 or not d2:
	print("提示：未找到两张装饰图 couple.png / couple (1).png（会自动隐藏，不影响运行）")

	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True,
	css=custom_css,
	theme=gr.themes.Soft(),
	)