Spaces:

fengmiguoji
/

f5tts

Build error

App Files Files Community

fengmiguoji commited on Jan 16, 2025

Commit

e6170b9

verified ·

1 Parent(s): 438f46f

Upload app.py

Browse files

Files changed (1) hide show

app.py +132 -0

app.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import os
+import gradio as gr
+from gradio_client import Client, handle_file
+import shutil
+from groq import Groq
+groq_client = Groq()
+def run_tts(ref_audio_file, ref_text, gen_text):
+    """
+    调用 TTS 模型并保存结果
+    """
+    try:
+        if ref_audio_file is None:
+            return "请上传参考音频文件。", None  # 返回错误消息和None
+        ref_audio_path = ref_audio_file.name  # 获取上传文件的路径
+        # 初始化客户端，不使用 token
+        client = Client("abidlabs/E2-F5-TTS")
+        # 调用 /infer 端点
+        print("调用 /infer 端点...")
+        result = client.predict(
+            ref_audio_orig=handle_file(ref_audio_path),  # 使用上传的音频文件
+            ref_text=ref_text,
+            gen_text=gen_text,
+            exp_name="F5-TTS",
+            remove_silence=False,
+            cross_fade_duration=0.15,
+            api_name="/infer"
+        )
+        print("infer端点返回结果:", result)
+        # 处理返回结果，将文件保存到当前目录
+        output_file = None
+        if isinstance(result, tuple):
+            for item in result:
+                if isinstance(item, str) and item.lower().endswith(".wav"):
+                    if os.path.exists(item): # 确保item是一个文件路径且存在
+                       # 获取文件名
+                        filename = os.path.basename(item)
+                        # 构造新的保存路径，保持在当前目录
+                        new_path = os.path.join(".", filename)
+                        # 复制文件到新路径
+                        shutil.copy2(item, new_path)
+                        print(f"音频文件 '{filename}' 已保存到: {new_path}")
+                        output_file = new_path
+                        break #只保留一个音频文件路径
+                    else:
+                      print(f"文件路径不存在，跳过: {item}")
+                elif isinstance(item,str):
+                     print(f"跳过非音频文件: {item}")
+        elif isinstance(result, str) and result.lower().endswith(".wav"):
+              if os.path.exists(result):
+                    # 获取文件名
+                  filename = os.path.basename(result)
+                    # 构造新的保存路径，保持在当前目录
+                  new_path = os.path.join(".", filename)
+                    # 复制文件到新路径
+                  shutil.copy2(result, new_path)
+                  print(f"音频文件 '{filename}' 已保存到: {new_path}")
+                  output_file = new_path
+              else:
+                  print(f"文件路径不存在，跳过: {result}")
+        elif isinstance(result,str):
+           print(f"跳过非音频文件: {result}")
+        else:
+            print(f"跳过非字符串/元组类型的返回值: {result}")
+        if output_file:
+            return output_file , ref_text # 返回音频文件路径字符串和修改后的参考文本
+        else:
+            return "未生成音频文件。", ref_text # 返回错误提示和修改后的参考文本
+    except FileNotFoundError as e:
+         return f"发生错误：{e}", ref_text
+    except Exception as e:
+         return f"发生未知错误：{e}", ref_text
+def transcribe_audio(audio_file):
+     """
+     使用 Groq 进行语音识别
+     """
+     try:
+        if audio_file is None:
+            return "请上传参考音频文件。", None
+        audio_path = audio_file.name
+        with open(audio_path, "rb") as file:
+            transcription = groq_client.audio.transcriptions.create(
+            file=(audio_path, file.read()),
+            model="whisper-large-v3-turbo",
+            language="zh",
+            )
+        return transcription.text, audio_file # 返回识别文本和音频文件
+     except Exception as e:
+        return f"语音识别失败: {e}", None
+def update_ref_text(audio_file, ref_text_box):
+    """
+    语音识别并更新参考文本
+    """
+    transcribed_text, audio_file = transcribe_audio(audio_file)
+    return transcribed_text, audio_file
+with gr.Blocks(title="快速语音合成") as iface:
+    gr.Markdown("上传参考语音和输入参考及生成文本，生成相应的语音。")
+    ref_audio_input = gr.File(file_types=["audio"], label="参考语音 (上传音频自动识别)")
+    ref_text_input = gr.Textbox(label="参考语言 (文本)")
+    gen_text_input = gr.Textbox(label="生成语言 (文本)")
+    audio_output = gr.Audio(label="生成的语音 (下载)")
+    ref_audio_input.upload(
+        update_ref_text,
+        inputs=[ref_audio_input, ref_text_input],
+        outputs=[ref_text_input, ref_audio_input]
+    )
+    btn = gr.Button("合成")
+    btn.click(
+      run_tts,
+      inputs=[ref_audio_input, ref_text_input, gen_text_input],
+      outputs=[audio_output,ref_text_input],
+   )
+if __name__ == "__main__":
+    iface.launch(share=True)