Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -245,8 +245,9 @@ def process_audio(test_wav, enroll_wav):
|
|
| 245 |
|
| 246 |
# List of demo audio files
|
| 247 |
demo_audio_files = [
|
| 248 |
-
("
|
| 249 |
-
("
|
|
|
|
| 250 |
]
|
| 251 |
|
| 252 |
def update_audio_input(choice):
|
|
@@ -264,24 +265,31 @@ css = """
|
|
| 264 |
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
|
| 265 |
with gr.Column(elem_id="col-container"):
|
| 266 |
gr.Markdown("""
|
| 267 |
-
# SoloSpeech:
|
| 268 |
-
π Introduction: Extract the target voice from mixture speech given an enrollment speech.
|
| 269 |
|
| 270 |
π‘ To extract sound effects or music from audio, try using [SoloAudio](https://huggingface.co/spaces/OpenSound/SoloAudio).
|
| 271 |
|
| 272 |
-
π Learn more about
|
| 273 |
|
| 274 |
""")
|
| 275 |
|
| 276 |
with gr.Tab("Target Speech Extraction"):
|
| 277 |
with gr.Row():
|
| 278 |
-
mixture_input = gr.Audio(label="Upload Mixture Audio",
|
|
|
|
|
|
|
| 279 |
|
| 280 |
-
with gr.Row():
|
| 281 |
-
enroll_input = gr.Audio(label="Upload Enrollment
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
|
| 283 |
-
with gr.Row():
|
| 284 |
-
extract_button = gr.Button("Extract", variant="primary")
|
| 285 |
# extract_button = gr.Button("Extract", scale=1)
|
| 286 |
|
| 287 |
with gr.Row():
|
|
@@ -291,7 +299,7 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
|
|
| 291 |
demo_selector = gr.Dropdown(
|
| 292 |
label="Select Test Demo",
|
| 293 |
choices=[name for name, _, _ in demo_audio_files],
|
| 294 |
-
value="
|
| 295 |
)
|
| 296 |
|
| 297 |
# Update audio inputs when selecting from dropdown
|
|
|
|
| 245 |
|
| 246 |
# List of demo audio files
|
| 247 |
demo_audio_files = [
|
| 248 |
+
("Demo1: Extract male speaker from a mixture of multiple male speakers", "examples/test1.wav", "examples/test1_enroll.wav"),
|
| 249 |
+
("Demo2: Extract female speaker from a mixture of multiple female speakers", "examples/test2.wav", "examples/test2_enroll.wav"),
|
| 250 |
+
("Demo3: Extract male rapper from music with complex vocals", "examples/test_3_mixture.mp3", "examples/test_3_speaker.mp3"),
|
| 251 |
]
|
| 252 |
|
| 253 |
def update_audio_input(choice):
|
|
|
|
| 265 |
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
|
| 266 |
with gr.Column(elem_id="col-container"):
|
| 267 |
gr.Markdown("""
|
| 268 |
+
# SoloSpeech: A High-Quality, State-of-the-Art Target Speech Extraction Model
|
| 269 |
+
π Introduction: Extract the target voice from mixture speech given an enrollment speech.
|
| 270 |
|
| 271 |
π‘ To extract sound effects or music from audio, try using [SoloAudio](https://huggingface.co/spaces/OpenSound/SoloAudio).
|
| 272 |
|
| 273 |
+
π Learn more about this project on the [π―SoloSpeech Repo](https://github.com/WangHelin1997/SoloSpeech/).
|
| 274 |
|
| 275 |
""")
|
| 276 |
|
| 277 |
with gr.Tab("Target Speech Extraction"):
|
| 278 |
with gr.Row():
|
| 279 |
+
mixture_input = gr.Audio(label="Upload Mixture Audio",
|
| 280 |
+
type="filepath",
|
| 281 |
+
value="test1.wav")
|
| 282 |
|
| 283 |
+
with gr.Row(equal_height=True):
|
| 284 |
+
enroll_input = gr.Audio(label="Upload Enrollment/Speaker Audio",
|
| 285 |
+
info='A short audio clip containing only the target speaker.',
|
| 286 |
+
type="filepath",
|
| 287 |
+
value="test1_enroll.wav",
|
| 288 |
+
scale=4,
|
| 289 |
+
)
|
| 290 |
|
| 291 |
+
# with gr.Row():
|
| 292 |
+
extract_button = gr.Button("Extract", variant="primary", scale=1)
|
| 293 |
# extract_button = gr.Button("Extract", scale=1)
|
| 294 |
|
| 295 |
with gr.Row():
|
|
|
|
| 299 |
demo_selector = gr.Dropdown(
|
| 300 |
label="Select Test Demo",
|
| 301 |
choices=[name for name, _, _ in demo_audio_files],
|
| 302 |
+
value="Demo1: Extract male speaker from a mixture of multiple male speakers"
|
| 303 |
)
|
| 304 |
|
| 305 |
# Update audio inputs when selecting from dropdown
|