Spaces:

fengmiguoji
/

f5tts

Build error

App Files Files Community

f5tts / app.py

fengmiguoji

Upload app.py

e6170b9 verified over 1 year ago

raw

history blame contribute delete

5.13 kB

	import os
	import gradio as gr
	from gradio_client import Client, handle_file
	import shutil
	from groq import Groq

	groq_client = Groq()

	def run_tts(ref_audio_file, ref_text, gen_text):
	"""
	调用 TTS 模型并保存结果
	"""
	try:
	if ref_audio_file is None:
	return "请上传参考音频文件。", None # 返回错误消息和None

	ref_audio_path = ref_audio_file.name # 获取上传文件的路径

	# 初始化客户端，不使用 token
	client = Client("abidlabs/E2-F5-TTS")

	# 调用 /infer 端点
	print("调用 /infer 端点...")
	result = client.predict(
	ref_audio_orig=handle_file(ref_audio_path), # 使用上传的音频文件
	ref_text=ref_text,
	gen_text=gen_text,
	exp_name="F5-TTS",
	remove_silence=False,
	cross_fade_duration=0.15,
	api_name="/infer"
	)
	print("infer端点返回结果:", result)

	# 处理返回结果，将文件保存到当前目录
	output_file = None
	if isinstance(result, tuple):
	for item in result:
	if isinstance(item, str) and item.lower().endswith(".wav"):
	if os.path.exists(item): # 确保item是一个文件路径且存在
	# 获取文件名
	filename = os.path.basename(item)
	# 构造新的保存路径，保持在当前目录
	new_path = os.path.join(".", filename)
	# 复制文件到新路径
	shutil.copy2(item, new_path)
	print(f"音频文件 '{filename}' 已保存到: {new_path}")
	output_file = new_path
	break #只保留一个音频文件路径
	else:
	print(f"文件路径不存在，跳过: {item}")
	elif isinstance(item,str):
	print(f"跳过非音频文件: {item}")
	elif isinstance(result, str) and result.lower().endswith(".wav"):
	if os.path.exists(result):
	# 获取文件名
	filename = os.path.basename(result)
	# 构造新的保存路径，保持在当前目录
	new_path = os.path.join(".", filename)
	# 复制文件到新路径
	shutil.copy2(result, new_path)
	print(f"音频文件 '{filename}' 已保存到: {new_path}")
	output_file = new_path
	else:
	print(f"文件路径不存在，跳过: {result}")
	elif isinstance(result,str):
	print(f"跳过非音频文件: {result}")
	else:
	print(f"跳过非字符串/元组类型的返回值: {result}")

	if output_file:
	return output_file , ref_text # 返回音频文件路径字符串和修改后的参考文本
	else:
	return "未生成音频文件。", ref_text # 返回错误提示和修改后的参考文本
	except FileNotFoundError as e:
	return f"发生错误：{e}", ref_text
	except Exception as e:
	return f"发生未知错误：{e}", ref_text


	def transcribe_audio(audio_file):
	"""
	使用 Groq 进行语音识别
	"""
	try:
	if audio_file is None:
	return "请上传参考音频文件。", None

	audio_path = audio_file.name
	with open(audio_path, "rb") as file:
	transcription = groq_client.audio.transcriptions.create(
	file=(audio_path, file.read()),
	model="whisper-large-v3-turbo",
	language="zh",
	)
	return transcription.text, audio_file # 返回识别文本和音频文件
	except Exception as e:
	return f"语音识别失败: {e}", None


	def update_ref_text(audio_file, ref_text_box):
	"""
	语音识别并更新参考文本
	"""
	transcribed_text, audio_file = transcribe_audio(audio_file)
	return transcribed_text, audio_file


	with gr.Blocks(title="快速语音合成") as iface:
	gr.Markdown("上传参考语音和输入参考及生成文本，生成相应的语音。")
	ref_audio_input = gr.File(file_types=["audio"], label="参考语音 (上传音频自动识别)")
	ref_text_input = gr.Textbox(label="参考语言 (文本)")
	gen_text_input = gr.Textbox(label="生成语言 (文本)")

	audio_output = gr.Audio(label="生成的语音 (下载)")

	ref_audio_input.upload(
	update_ref_text,
	inputs=[ref_audio_input, ref_text_input],
	outputs=[ref_text_input, ref_audio_input]
	)


	btn = gr.Button("合成")
	btn.click(
	run_tts,
	inputs=[ref_audio_input, ref_text_input, gen_text_input],
	outputs=[audio_output,ref_text_input],
	)

	if __name__ == "__main__":
	iface.launch(share=True)