Spaces:

OpenSound
/

SoloSpeech

Running on Zero

App Files Files Community

OpenSound commited on Jun 4, 2025

Commit

30a186b

verified ·

1 Parent(s): 76620d0

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -11

app.py CHANGED Viewed

@@ -245,8 +245,9 @@ def process_audio(test_wav, enroll_wav):
 # List of demo audio files
 demo_audio_files = [
-    ("Test Demo 1", "test1.wav", "test1_enroll.wav"),
-    ("Test Demo 2", "test2.wav", "test2_enroll.wav")
 ]
 def update_audio_input(choice):
@@ -264,24 +265,31 @@ css = """
 with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown("""
-            # SoloSpeech: Enhancing Intelligibility and Quality in Target Speech Extraction through a Cascaded Generative Pipeline
-            👋 Introduction: Extract the target voice from mixture speech given an enrollment speech.
             💡 To extract sound effects or music from audio, try using [SoloAudio](https://huggingface.co/spaces/OpenSound/SoloAudio).
-            🔗 Learn more about 🎯**SoloSpeech** on the [SoloSpeech Repo](https://github.com/WangHelin1997/SoloSpeech/).
         """)
         with gr.Tab("Target Speech Extraction"):
             with gr.Row():
-                mixture_input = gr.Audio(label="Upload Mixture Audio", type="filepath", value="test2.wav")
-            with gr.Row():
-                enroll_input = gr.Audio(label="Upload Enrollment Audio (Speaker Audio)", type="filepath", value="test2_enroll.wav")
-            with gr.Row():
-                extract_button = gr.Button("Extract", variant="primary")
                 # extract_button = gr.Button("Extract", scale=1)
             with gr.Row():
@@ -291,7 +299,7 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
                 demo_selector = gr.Dropdown(
                     label="Select Test Demo",
                     choices=[name for name, _, _ in demo_audio_files],
-                    value="Test Demo 2"
                 )
             # Update audio inputs when selecting from dropdown

 # List of demo audio files
 demo_audio_files = [
+    ("Demo1: Extract male speaker from a mixture of multiple male speakers", "examples/test1.wav", "examples/test1_enroll.wav"),
+    ("Demo2: Extract female speaker from a mixture of multiple female speakers", "examples/test2.wav", "examples/test2_enroll.wav"),
+    ("Demo3: Extract male rapper from music with complex vocals", "examples/test_3_mixture.mp3", "examples/test_3_speaker.mp3"),
 ]
 def update_audio_input(choice):
 with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown("""
+            # SoloSpeech: A High-Quality, State-of-the-Art Target Speech Extraction Model
+            👋 Introduction: Extract the target voice from mixture speech given an enrollment speech.
             💡 To extract sound effects or music from audio, try using [SoloAudio](https://huggingface.co/spaces/OpenSound/SoloAudio).
+            🔗 Learn more about this project on the [🎯SoloSpeech Repo](https://github.com/WangHelin1997/SoloSpeech/).
         """)
         with gr.Tab("Target Speech Extraction"):
             with gr.Row():
+                mixture_input = gr.Audio(label="Upload Mixture Audio",
+                                         type="filepath",
+                                         value="test1.wav")
+            with gr.Row(equal_height=True):
+                enroll_input = gr.Audio(label="Upload Enrollment/Speaker Audio",
+                                        info='A short audio clip containing only the target speaker.',
+                                        type="filepath",
+                                        value="test1_enroll.wav",
+                                        scale=4,
+                                       )
+            # with gr.Row():
+                extract_button = gr.Button("Extract", variant="primary", scale=1)
                 # extract_button = gr.Button("Extract", scale=1)
             with gr.Row():
                 demo_selector = gr.Dropdown(
                     label="Select Test Demo",
                     choices=[name for name, _, _ in demo_audio_files],
+                    value="Demo1: Extract male speaker from a mixture of multiple male speakers"
                 )
             # Update audio inputs when selecting from dropdown