VieNeuTTS

Runtime error

App Files Files Community

VieNeuTTS / app.py

pnnbao-ump

Add VieNeu-TTS app with LFS audio files

ab844ce about 1 month ago

raw

history blame

5.78 kB

	import gradio as gr
	import numpy as np
	import soundfile as sf
	from vieneutts import VieNeuTTS
	import tempfile
	import os

	# Khởi tạo model
	print("Đang tải model...")
	tts = VieNeuTTS(
	backbone_repo="pnnbao-ump/VieNeu-TTS",
	backbone_device="cuda" if os.environ.get("CUDA_VISIBLE_DEVICES") else "cpu",
	codec_repo="neuphonic/neucodec",
	codec_device="cuda" if os.environ.get("CUDA_VISIBLE_DEVICES") else "cpu"
	)

	# Danh sách giọng mẫu
	VOICE_SAMPLES = {
	"Nam miền Nam": {
	"audio": "./sample/id_0001.wav",
	"text": "./sample/id_0001.txt"
	},
	"Nữ miền Nam": {
	"audio": "./sample/id_0002.wav",
	"text": "./sample/id_0002.txt"
	}
	}

	def synthesize_speech(text, voice_choice, custom_audio=None, custom_text=None):
	"""
	Tổng hợp giọng nói từ văn bản

	Args:
	text: Văn bản cần chuyển thành giọng nói
	voice_choice: Lựa chọn giọng có sẵn
	custom_audio: File audio tùy chỉnh (optional)
	custom_text: Text tương ứng với audio tùy chỉnh (optional)
	"""
	try:
	# Xác định reference audio và text
	if custom_audio is not None and custom_text:
	ref_audio_path = custom_audio
	ref_text = custom_text
	elif voice_choice in VOICE_SAMPLES:
	ref_audio_path = VOICE_SAMPLES[voice_choice]["audio"]
	ref_text_path = VOICE_SAMPLES[voice_choice]["text"]
	with open(ref_text_path, "r", encoding="utf-8") as f:
	ref_text = f.read()
	else:
	return None, "❌ Vui lòng chọn giọng hoặc tải lên audio tùy chỉnh"

	# Kiểm tra text input
	if not text or text.strip() == "":
	return None, "❌ Vui lòng nhập văn bản cần tổng hợp"

	# Encode reference audio
	print(f"Đang encode audio tham chiếu: {ref_audio_path}")
	ref_codes = tts.encode_reference(ref_audio_path)

	# Tổng hợp giọng nói
	print(f"Đang tổng hợp giọng nói cho: {text[:50]}...")
	wav = tts.infer(text, ref_codes, ref_text)

	# Lưu file tạm
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
	sf.write(tmp_file.name, wav, 24000)
	output_path = tmp_file.name

	return output_path, "✅ Tổng hợp thành công!"

	except Exception as e:
	print(f"Lỗi: {str(e)}")
	return None, f"❌ Lỗi: {str(e)}"

	# Các ví dụ mẫu
	examples = [
	["Các khóa học trực tuyến đang giúp học sinh tiếp cận kiến thức mọi lúc mọi nơi.", "Nam miền Nam"],
	["Các nghiên cứu về bệnh Alzheimer cho thấy tác dụng tích cực của các bài tập trí não.", "Nữ miền Nam"],
	["Một tiểu thuyết trinh thám hiện đại dẫn dắt độc giả qua những tình tiết phức tạp.", "Nam miền Nam"],
	]

	# Tạo giao diện Gradio
	with gr.Blocks(title="VieNeu-TTS: Vietnamese Text-to-Speech") as demo:
	gr.Markdown("""
	# 🎙️ VieNeu-TTS: Vietnamese Text-to-Speech

	Hệ thống tổng hợp tiếng nói tiếng Việt sử dụng mô hình ngôn ngữ lớn và neural codec.

	Hướng dẫn sử dụng:
	1. Nhập văn bản tiếng Việt cần chuyển thành giọng nói
	2. Chọn giọng có sẵn hoặc tải lên audio tùy chỉnh
	3. Nhấn "Tổng hợp giọng nói"
	""")

	with gr.Row():
	with gr.Column():
	# Input text
	text_input = gr.Textbox(
	label="Văn bản đầu vào",
	placeholder="Nhập văn bản tiếng Việt...",
	lines=5
	)

	# Voice selection
	voice_select = gr.Radio(
	choices=list(VOICE_SAMPLES.keys()),
	label="Chọn giọng có sẵn",
	value="Nam miền Nam"
	)

	# Custom voice option
	with gr.Accordion("Hoặc sử dụng giọng tùy chỉnh", open=False):
	custom_audio = gr.Audio(
	label="Tải lên file audio mẫu (.wav)",
	type="filepath"
	)
	custom_text = gr.Textbox(
	label="Nội dung của audio mẫu",
	placeholder="Nhập chính xác nội dung trong audio...",
	lines=3
	)

	# Submit button
	submit_btn = gr.Button("🎵 Tổng hợp giọng nói", variant="primary")

	with gr.Column():
	# Output
	audio_output = gr.Audio(label="Kết quả")
	status_output = gr.Textbox(label="Trạng thái")

	# Examples
	gr.Examples(
	examples=examples,
	inputs=[text_input, voice_select],
	outputs=[audio_output, status_output],
	fn=synthesize_speech,
	cache_examples=True
	)

	# Event handler
	submit_btn.click(
	fn=synthesize_speech,
	inputs=[text_input, voice_select, custom_audio, custom_text],
	outputs=[audio_output, status_output]
	)

	gr.Markdown("""
	---
	Lưu ý:
	- Model hỗ trợ tiếng Việt với các giọng miền Nam
	- Chất lượng giọng nói phụ thuộc vào audio tham chiếu
	- Để có kết quả tốt nhất, audio tham chiếu nên rõ ràng, không nhiễu

	Liên kết:
	- [GitHub Repository](https://github.com/pnnbao97/VieNeu-TTS)
	- [Model on Hugging Face](https://huggingface.co/pnnbao-ump/VieNeu-TTS)
	""")

	if __name__ == "__main__":
	demo.launch()