Spaces:

swc2
/

Target-speaker-extraction

Sleeping

App Files Files Community

swc2 commited on Jan 13, 2025

Commit

67a5c80

1 Parent(s): fca772e

app revise test

Browse files

Files changed (1) hide show

app.py +35 -9

app.py CHANGED Viewed

@@ -2,8 +2,16 @@ import gradio as gr
 #from inference import InferencePipeline
 #i = InferencePipeline()
-def gradio_voice_conversion(audio_file_path):
     """
     Wrapper function to handle Gradio's audio input and pass the file path to the voice conversion function.
     Gradio passes audio data as a tuple: (temp file path, sample rate).
@@ -17,14 +25,32 @@ def gradio_voice_conversion(audio_file_path):
     return random_wav
 # Define your Gradio interface
-demo = gr.Interface(
-    fn=gradio_voice_conversion,  # Use the wrapper function for voice conversion
-    inputs=gr.Audio(label="Record or upload your voice", type="filepath"),  # Specify that you want the filepath
-    outputs=gr.Audio(label="Converted Voice"),
-    title="Voice Conversion Demo",
-    description="Voice Conversion: Transform the input voice to a target voice.",
-    allow_flagging="never"
-)
 if __name__ == "__main__":
     demo.launch()

 #from inference import InferencePipeline
 #i = InferencePipeline()
+# device = "cuda" if torch.cuda.is_available() else "cpu"
+# def convert_audio_to_wav(file_path):
+#     """Convert any supported format (mp3, etc.) to wav using librosa"""
+#     output_path = "temp_input.wav"
+#     audio, sr = librosa.load(file_path, sr=None)  # 加载音频文件
+#     librosa.output.write_wav(output_path, audio, sr)  # 转换并保存为 WAV 格式
+#     return output_path
+def gradio_TSE(audio_file_path):
     """
     Wrapper function to handle Gradio's audio input and pass the file path to the voice conversion function.
     Gradio passes audio data as a tuple: (temp file path, sample rate).
     return random_wav
 # Define your Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("## Target Speaker Extraction Demo")
+    gr.Markdown(
+        "This demo isolates the speech signal of a target speaker from a mixture of multiple speakers, "
+        "with or without noises and reverberations."
+    )
+    # input
+    with gr.Row():
+        input_audio = gr.Audio(label="Upload or record your input (mixture) audio", type="filepath")
+        enroll_audio = gr.Audio(label="Upload your enroll (target speaker) audio", type="filepath")
+    # output
+    with gr.Row():
+        noisy_audio_output = gr.Audio(label="Noisy Audio (Input with Noise)", type="filepath")
+        extracted_audio_output = gr.Audio(label="Extracted Target Speaker Audio", type="filepath")
+    # deal
+    convert_button = gr.Button("Process Audio")
+    # event
+    convert_button.click(
+        fn=gradio_TSE,
+        inputs=[input_audio, enroll_audio],
+        outputs=[noisy_audio_output, extracted_audio_output]
+    )
 if __name__ == "__main__":
     demo.launch()