Amadeus_Project

Runtime error

Amadeus_Project / app.py

johntito

```

2ef7c17 11 months ago

3.93 kB

	import os
	import subprocess
	import logging
	import torch
	import gradio as gr
	from scipy.io.wavfile import write

	# 自定义模块
	import commons
	import utils
	from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate
	from models import SynthesizerTrn
	from text.symbols import symbols
	from text import text_to_sequence

	# 配置日志
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# 编译 monotonic_align 模块
	def compile_monotonic_align():
	try:

	os.system('cd monotonic_align && python setup.py build_ext --inplace && cd ..')

	logger.info("Successfully compiled monotonic_align.")
	except subprocess.CalledProcessError as e:
	logger.error(f"Failed to compile monotonic_align: {e}")
	raise RuntimeError("Compilation of monotonic_align failed.")

	# 加载配置和模型
	def load_config_and_model(config_path, checkpoint_path):
	if not os.path.exists(config_path):
	raise FileNotFoundError(f"Config file not found: {config_path}")
	if not os.path.exists(checkpoint_path):
	raise FileNotFoundError(f"Checkpoint file not found: {checkpoint_path}")

	# 加载超参数
	hps = utils.get_hparams_from_file(config_path)
	logger.info("Loaded hyperparameters from config file.")

	# 初始化模型
	net_g = SynthesizerTrn(
	len(symbols),
	hps.data.filter_length // 2 + 1,
	hps.train.segment_size // hps.data.hop_length,
	**hps.model,
	)
	net_g.eval()
	logger.info("Initialized SynthesizerTrn model.")

	# 加载预训练权重
	utils.load_checkpoint(checkpoint_path, net_g, None)
	logger.info(f"Loaded model checkpoint from {checkpoint_path}.")

	return hps, net_g

	# 文本到语音合成
	def text_to_speech(content, hps, net_g):
	if not content or not isinstance(content, str):
	raise ValueError("Input text is empty or invalid.")

	try:
	# 将文本转换为序列
	stn_tst = text_to_sequence(content, hps.data.text_cleaners)
	if hps.data.add_blank:
	stn_tst = commons.intersperse(stn_tst, 0)
	stn_tst = torch.LongTensor(stn_tst)

	# 模型推理
	with torch.no_grad():
	x_tst = stn_tst.unsqueeze(0)
	x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
	audio = net_g.infer(
	x_tst, x_tst_lengths, noise_scale=0.667, noise_scale_w=0.8, length_scale=1
	)[0][0, 0].data.float().numpy()

	return hps.data.sampling_rate, audio
	except Exception as e:
	logger.error(f"Error during text-to-speech synthesis: {e}")
	raise RuntimeError("Failed to generate audio.")

	# Gradio 界面
	def create_gradio_interface(hps, net_g):
	def safe_syn(content):
	try:
	return text_to_speech(content, hps, net_g)
	except Exception as e:
	logger.error(f"Error in Gradio interface: {e}")
	return None

	app = gr.Blocks()
	with app:
	with gr.Tabs():
	with gr.TabItem("Basic"):
	input1 = gr.Textbox(label="Input Text", placeholder="Enter text here...")
	submit = gr.Button("Convert", variant="primary")
	output1 = gr.Audio(label="Output Audio")
	submit.click(safe_syn, input1, output1)

	return app

	# 主函数
	def main():
	try:
	# 编译 monotonic_align
	compile_monotonic_align()

	# 加载配置和模型
	config_path = "configs/steins_gate_base.json"
	checkpoint_path = "G_265000.pth"
	hps, net_g = load_config_and_model(config_path, checkpoint_path)

	# 创建 Gradio 界面
	app = create_gradio_interface(hps, net_g)
	logger.info("Starting Gradio interface...")
	app.launch()
	except Exception as e:
	logger.critical(f"Fatal error: {e}")
	exit(1)

	if __name__ == "__main__":
	main()