Spaces:

SaltProphet
/

Loop-Architect

Build error

App Files Files Community

SaltProphet commited on Oct 12, 2025

Commit

7ea6e04

verified ·

1 Parent(s): d071711

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -309

app.py CHANGED Viewed

@@ -1,3 +1,8 @@
 import gradio as gr
 import os
 import shutil
@@ -11,33 +16,54 @@ import zipfile
 import tempfile
 import matplotlib.pyplot as plt
 import matplotlib
-matplotlib.use('Agg')
-temp_files = []
-def cleanup_temp_files():
-    global temp_files
-    for file_path in temp_files:
-        if os.path.exists(file_path):
-            os.remove(file_path)
-    temp_files = []
 def update_output_visibility(choice):
     if "2 Stems" in choice:
-        return {
-            vocals_output: gr.update(visible=True),
-            drums_output: gr.update(visible=False),
-            bass_output: gr.update(visible=False),
-            other_output: gr.update(visible=True, label="Instrumental (No Vocals)")
         }
     elif "4 Stems" in choice:
-        return {
-            vocals_output: gr.update(visible=True),
-            drums_output: gr.update(visible=True),
-            bass_output: gr.update(visible=True),
-            other_output: gr.update(visible=True, label="Other")
         }
 async def separate_stems(audio_file_path, stem_choice, progress=gr.Progress(track_tqdm=True)):
     if audio_file_path is None: raise gr.Error("No audio file uploaded!")
     progress(0, desc="Starting...")
@@ -46,340 +72,100 @@ async def separate_stems(audio_file_path, stem_choice, progress=gr.Progress(trac
         original_filename_base = os.path.basename(audio_file_path).rsplit('.', 1)[0]
         stable_input_path = f"stable_input_{original_filename_base}.wav"
         shutil.copy(audio_file_path, stable_input_path)
         model_arg = "--two-stems=vocals" if "2 Stems" in stem_choice else ""
         output_dir = "separated"
         if os.path.exists(output_dir): shutil.rmtree(output_dir)
         command = f"python3 -m demucs {model_arg} -o \"{output_dir}\" \"{stable_input_path}\""
-        progress(0.2, desc="Running Demucs (this can take a minute)...")
         process = await asyncio.create_subprocess_shell(
             command,
             stdout=asyncio.subprocess.PIPE,
             stderr=asyncio.subprocess.PIPE)
-        stdout, stderr = await process.communicate()
-        if process.returncode != 0:
-            raise gr.Error(f"Demucs failed to run. Error: {stderr.decode()[:500]}")
         progress(0.8, desc="Locating separated stem files...")
         stable_filename_base = os.path.basename(stable_input_path).rsplit('.', 1)[0]
         model_folder_name = next(os.walk(output_dir))[1][0]
         stems_path = os.path.join(output_dir, model_folder_name, stable_filename_base)
-        if not os.path.exists(stems_path):
             raise gr.Error(f"Demucs finished, but the output directory was not found!")
         vocals_path = os.path.join(stems_path, "vocals.wav") if os.path.exists(os.path.join(stems_path, "vocals.wav")) else None
         drums_path = os.path.join(stems_path, "drums.wav") if os.path.exists(os.path.join(stems_path, "drums.wav")) else None
         bass_path = os.path.join(stems_path, "bass.wav") if os.path.exists(os.path.join(stems_path, "bass.wav")) else None
         other_filename = "no_vocals.wav" if "2 Stems" in stem_choice else "other.wav"
         other_path = os.path.join(stems_path, other_filename) if os.path.exists(os.path.join(stems_path, other_filename)) else None
         os.remove(stable_input_path)
-        # Detect bars for each stem after separation
-        vocals_bar_times = None
-        drums_bar_times = None
-        bass_bar_times = None
-        other_bar_times = None
-        if vocals_path:
-            vocals_audio_data = sf.read(vocals_path)
-            _, _, vocals_bar_times = detect_bars(vocals_audio_data)
-        if drums_path:
-            drums_audio_data = sf.read(drums_path)
-            _, _, drums_bar_times = detect_bars(drums_audio_data)
-        if bass_path:
-            bass_audio_data = sf.read(bass_path)
-            _, _, bass_bar_times = detect_bars(bass_audio_data)
-        if other_path:
-            other_audio_data = sf.read(other_path)
-            _, _, other_bar_times = detect_bars(other_audio_data)
-        return vocals_path, drums_path, bass_path, other_path, vocals_bar_times, drums_bar_times, bass_bar_times, other_bar_times
     except Exception as e:
         print(f"An error occurred: {e}")
         raise gr.Error(str(e))
-def visualize_slices(stem_audio_data, sensitivity, progress=gr.Progress(track_tqdm=True)):
-    if stem_audio_data is None:
-        gr.Warning("This stem is empty. Cannot visualize.")
-        return None, None, None
-    sample_rate, y_int = stem_audio_data
-    y = librosa.util.buf_to_float(y_int)
-    progress(0.3, desc="Finding transients...")
-    onset_frames = librosa.onset.onset_detect(y=librosa.to_mono(y.T) if y.ndim > 1 else y, sr=sample_rate, wait=1, pre_avg=1, post_avg=1, post_max=1, delta=sensitivity)
-    onset_times = librosa.frames_to_time(onset_frames, sr=sample_rate)
-    progress(0.7, desc="Generating waveform plot...")
-    fig, ax = plt.subplots(figsize=(10, 3))
-    fig.patch.set_facecolor('#1f2937')
-    ax.set_facecolor('#111827')
-    librosa.display.waveshow(y, sr=sample_rate, ax=ax, color='#32f6ff', alpha=0.7)
-    for t in onset_times:
-        ax.axvline(x=t, color='#ff3b3b', linestyle='--', linewidth=1)
-    ax.tick_params(colors='gray'); ax.xaxis.label.set_color('gray'); ax.yaxis.label.set_color('gray')
-    ax.set_xlabel("Time (s)"); ax.set_ylabel("Amplitude"); ax.set_title("Detected Slices", color='white')
-    plt.tight_layout()
-    progress(1, desc="Done!")
-    return fig, onset_times, stem_audio_data
 def preview_slice(active_stem_audio, onset_times, evt: gr.SelectData):
     if active_stem_audio is None or onset_times is None: return None
-    sample_rate, y = active_stem_audio
-    # Convert click event coordinates to time
-    # evt.index[0] is the x-coordinate of the click in pixels
-    # evt.target[0] is the width of the plot in pixels
-    # len(y) / sample_rate is the total duration of the audio in seconds
-    clicked_time = evt.index[0] * (len(y) / sample_rate) / evt.target[0]
-    start_time = 0
-    end_time = len(y) / sample_rate
-    # Find the closest onset time before the clicked time
-    onsets_before = onset_times[onset_times <= clicked_time]
-    if len(onsets_before) > 0:
-        start_time = onsets_before[-1]
-    # Find the closest onset time after the clicked time
-    onsets_after = onset_times[onset_times > clicked_time]
-    if len(onsets_after) > 0:
-        end_time = onsets_after[0]
-    else:
-        # If no onset after the clicked time, slice to the end of the audio
-        end_time = len(y) / sample_rate
     start_sample = librosa.time_to_samples(start_time, sr=sample_rate)
     end_sample = librosa.time_to_samples(end_time, sr=sample_rate)
-    # Ensure start_sample is less than end_sample
-    if start_sample >= end_sample:
-        # If the click is exactly on or after the last onset, preview a small segment at the end
-        if len(onset_times) > 0:
-             start_sample = librosa.time_to_samples(onset_times[-1], sr=sample_rate)
-             end_sample = len(y)
-        else:
-             # If no onsets detected, slice the whole audio
-             start_sample = 0
-             end_sample = len(y)
     sliced_audio = y[start_sample:end_sample]
     return (sample_rate, sliced_audio)
-def download_slice(sliced_audio_data):
-    if sliced_audio_data is None:
-        gr.Warning("No slice preview available to download.")
-        return None
-    sample_rate, y = sliced_audio_data
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False, prefix="slice_") as tmp_file:
-        sf.write(tmp_file.name, y, sample_rate)
-        global temp_files
-        temp_files.append(tmp_file.name)
-        return tmp_file.name
-def detect_bars(stem_audio_data):
-    if stem_audio_data is None:
-        return None, None, None
-    sample_rate, y_int = stem_audio_data
-    y = librosa.util.buf_to_float(y_int)
-    y_mono = librosa.to_mono(y.T) if y.ndim > 1 else y
-    # Estimate tempo
-    tempo, beat_frames = librosa.beat.beat_track(y=y_mono, sr=sample_rate)
-    # Convert beat frames to beat times
-    beat_times = librosa.frames_to_time(beat_frames, sr=sample_rate)
-    # Calculate bar times (assuming 4 beats per bar)
-    bar_times = beat_times[::4]
-    return tempo, beat_times, bar_times
-def create_loop(stem_audio_data, bar_times, loop_length):
-    if stem_audio_data is None or bar_times is None or len(bar_times) < 2:
-        gr.Warning("Insufficient data to create a loop.")
-        return None
-    sample_rate, y_int = stem_audio_data
-    y = librosa.util.buf_to_float(y_int)
-    y_mono = librosa.to_mono(y.T) if y.ndim > 1 else y
-    # Parse loop length
-    num_bars = int(loop_length.split(" ")[0])
-    # Find the start of the first full bar (assuming bar_times[0] is the start of the first bar)
-    # If we want to start from the beginning of the audio, we can use 0 as the start time.
-    # For now, let's assume we start from the first detected bar.
-    start_time = bar_times[0]
-    # Calculate the duration of one bar
-    bar_duration = bar_times[1] - bar_times[0] if len(bar_times) > 1 else 0
-    # Calculate the end time for the loop
-    end_time = start_time + (num_bars * bar_duration)
-    # Ensure the end time does not exceed the audio duration
-    audio_duration = len(y) / sample_rate
-    end_time = min(end_time, audio_duration)
-    # Convert times to samples
-    start_sample = librosa.time_to_samples(start_time, sr=sample_rate)
-    end_sample = librosa.time_to_samples(end_time, sr=sample_rate)
-    # Extract the loop segment
-    looped_audio = y_mono[start_sample:end_sample]
-    # Save the looped audio to a temporary file
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False, prefix="loop_") as tmp_file:
-        sf.write(tmp_file.name, looped_audio, sample_rate)
-        global temp_files
-        temp_files.append(tmp_file.name)
-        return tmp_file.name
-def cut_all_oneshots(stem_audio_data, onset_times):
-    if stem_audio_data is None or onset_times is None or len(onset_times) < 1:
-        gr.Warning("Insufficient data or onsets detected to cut one-shots.")
-        return None
-    sample_rate, y_int = stem_audio_data
-    y = librosa.util.buf_to_float(y_int)
-    y_mono = librosa.to_mono(y.T) if y.ndim > 1 else y
-    oneshot_files = []
-    audio_duration = len(y_mono) / sample_rate
-    for i in range(len(onset_times)):
-        start_time = onset_times[i]
-        end_time = onset_times[i+1] if i < len(onset_times) - 1 else audio_duration
-        start_sample = librosa.time_to_samples(start_time, sr=sample_rate)
-        end_sample = librosa.time_to_samples(end_time, sr=sample_rate)
-        # Ensure start_sample is less than end_sample, add a small buffer if necessary
-        if start_sample >= end_sample:
-            end_sample = start_sample + int(0.01 * sample_rate) # Add 10ms buffer if start is equal to or after end
-            if end_sample > len(y_mono):
-                 end_sample = len(y_mono)
-        segment = y_mono[start_sample:end_sample]
-        # Save each segment to a temporary file
-        with tempfile.NamedTemporaryFile(suffix=f"_{i}.wav", delete=False, prefix="oneshot_") as tmp_file:
-            sf.write(tmp_file.name, segment, sample_rate)
-            oneshot_files.append(tmp_file.name)
-    if not oneshot_files:
-        gr.Warning("No one-shots were successfully cut.")
-        return None
-    # Create a zip archive of the temporary one-shot files
-    with tempfile.NamedTemporaryFile(suffix=".zip", delete=False, prefix="oneshots_archive_") as zip_file:
-        with zipfile.ZipFile(zip_file.name, 'w') as zipf:
-            for file_path in oneshot_files:
-                zipf.write(file_path, os.path.basename(file_path))
-    # Add the zip file and individual oneshot files to the temp_files list for cleanup
-    global temp_files
-    temp_files.extend(oneshot_files)
-    temp_files.append(zip_file.name)
-    return zip_file.name
 with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="red")) as demo:
     gr.Markdown("# 🎵 Loop Architect")
     onset_times_state = gr.State(value=None)
     active_stem_state = gr.State(value=None)
-    vocals_bar_times_state = gr.State(value=None)
-    drums_bar_times_state = gr.State(value=None)
-    bass_bar_times_state = gr.State(value=None)
-    other_bar_times_state = gr.State(value=None)
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("### 1. Separate Stems")
             audio_input = gr.Audio(type="filepath", label="Upload a Track")
             stem_options = gr.Radio(["4 Stems (Vocals, Drums, Bass, Other)", "2 Stems (Vocals + Instrumental)"], label="Separation Type", value="4 Stems (Vocals, Drums, Bass, Other)")
             submit_button = gr.Button("Separate Stems")
         with gr.Column(scale=2):
             with gr.Accordion("Separated Stems", open=True):
-                with gr.Row():
-                    vocals_output = gr.Audio(label="Vocals", scale=2)
-                    with gr.Column(scale=1):
-                        slice_vocals_btn = gr.Button("Visualize Slices")
-                        vocals_loop_length = gr.Dropdown(choices=["4 Bars", "8 Bars", "16 Bars"], label="Loop Length", value="4 Bars")
-                        create_vocals_loop_btn = gr.Button("Create Loop")
-                    vocals_loop_output = gr.Audio(label="Vocals Loop", visible=False, scale=2)
-                    vocals_loop_download_btn = gr.DownloadButton(value="Download Loop", visible=False)
-                with gr.Row():
-                    drums_output = gr.Audio(label="Drums", scale=2)
-                    with gr.Column(scale=1):
-                        slice_drums_btn = gr.Button("Visualize Slices")
-                        drums_loop_length = gr.Dropdown(choices=["4 Bars", "8 Bars", "16 Bars"], label="Loop Length", value="4 Bars")
-                        create_drums_loop_btn = gr.Button("Create Loop")
-                    drums_loop_output = gr.Audio(label="Drums Loop", visible=False, scale=2)
-                    drums_loop_download_btn = gr.DownloadButton(value="Download Loop", visible=False)
-                with gr.Row():
-                    bass_output = gr.Audio(label="Bass", scale=2)
-                    with gr.Column(scale=1):
-                        slice_bass_btn = gr.Button("Visualize Slices")
-                        bass_loop_length = gr.Dropdown(choices=["4 Bars", "8 Bars", "16 Bars"], label="Loop Length", value="4 Bars")
-                        create_bass_loop_btn = gr.Button("Create Loop")
-                    bass_loop_output = gr.Audio(label="Bass Loop", visible=False, scale=2)
-                    bass_loop_download_btn = gr.DownloadButton(value="Download Loop", visible=False)
-                with gr.Row():
-                    other_output = gr.Audio(label="Other / Instrumental", scale=2)
-                    with gr.Column(scale=1):
-                        slice_other_btn = gr.Button("Visualize Slices")
-                        other_loop_length = gr.Dropdown(choices=["4 Bars", "8 Bars", "16 Bars"], label="Loop Length", value="4 Bars")
-                        create_other_loop_btn = gr.Button("Create Loop")
-                    other_loop_output = gr.Audio(label="Other Loop", visible=False, scale=2)
-                    other_loop_download_btn = gr.DownloadButton(value="Download Loop", visible=False)
-            gr.Markdown("### Slice Editor")
-            sensitivity_slider = gr.Slider(minimum=0, maximum=1, value=0.5, label="Onset Sensitivity")
-            slice_plot = gr.Image(label="Click a region on the waveform to preview a slice")
-            preview_player = gr.Audio(label="Slice Preview")
-            download_slice_btn = gr.DownloadButton(value="Download Slice", visible=False)
-            cut_all_oneshots_btn = gr.Button(value="Cut All Oneshots")
-            cut_oneshots_download_btn = gr.DownloadButton(value="Download All Oneshots", visible=False)
-    audio_input.change(fn=cleanup_temp_files)
-    submit_button.click(fn=separate_stems, inputs=[audio_input, stem_options], outputs=[vocals_output, drums_output, bass_output, other_output, vocals_bar_times_state, drums_bar_times_state, bass_bar_times_state, other_bar_times_state])
     stem_options.change(fn=update_output_visibility, inputs=stem_options, outputs=[vocals_output, drums_output, bass_output, other_output])
-    slice_vocals_btn.click(fn=visualize_slices, inputs=[vocals_output, sensitivity_slider], outputs=[slice_plot, onset_times_state, active_stem_state])
-    slice_drums_btn.click(fn=visualize_slices, inputs=[drums_output, sensitivity_slider], outputs=[slice_plot, onset_times_state, active_stem_state])
-    slice_bass_btn.click(fn=visualize_slices, inputs=[bass_output, sensitivity_slider], outputs=[slice_plot, onset_times_state, active_stem_state])
-    slice_other_btn.click(fn=visualize_slices, inputs=[other_output, sensitivity_slider], outputs=[slice_plot, onset_times_state, active_stem_state])
-    slice_plot.select(fn=preview_slice, inputs=[active_stem_state, onset_times_state], outputs=preview_player).then(lambda: gr.update(visible=True), outputs=download_slice_btn)
-    create_vocals_loop_btn.click(fn=create_loop, inputs=[vocals_output, vocals_bar_times_state, vocals_loop_length], outputs=[vocals_loop_output, vocals_loop_download_btn])
-    create_drums_loop_btn.click(fn=create_loop, inputs=[drums_output, drums_bar_times_state, drums_loop_length], outputs=[drums_loop_output, drums_loop_download_btn])
-    create_bass_loop_btn.click(fn=create_loop, inputs=[bass_output, bass_bar_times_state, bass_loop_length], outputs=[bass_loop_output, bass_loop_download_btn])
-    create_other_loop_btn.click(fn=create_loop, inputs=[other_output, other_bar_times_state, other_loop_length], outputs=[other_loop_output, other_loop_download_btn])
-    download_slice_btn.click(fn=download_slice, inputs=preview_player, outputs=download_slice_btn)
-    cut_all_oneshots_btn.click(fn=cut_all_oneshots, inputs=[active_stem_state, onset_times_state], outputs=cut_oneshots_download_btn)
-demo.launch()

+# 1. Install all necessary libraries for the full application
+# This line is for Colab. On Hugging Face, these should be in your requirements.txt
+# !pip install gradio "demucs>=4.0.0" librosa soundfile matplotlib
+# 2. Import libraries
 import gradio as gr
 import os
 import shutil
 import tempfile
 import matplotlib.pyplot as plt
 import matplotlib
+matplotlib.use('Agg') # Use a non-interactive backend for plotting
+# --- Helper/Processing Functions ---
 def update_output_visibility(choice):
     if "2 Stems" in choice:
+        return {
+            vocals_output: gr.update(visible=True),
+            drums_output: gr.update(visible=False),
+            bass_output: gr.update(visible=False),
+            other_output: gr.update(visible=True, label="Instrumental (No Vocals)")
         }
     elif "4 Stems" in choice:
+        return {
+            vocals_output: gr.update(visible=True),
+            drums_output: gr.update(visible=True),
+            bass_output: gr.update(visible=True),
+            other_output: gr.update(visible=True, label="Other")
         }
+# --- NEW, CORRECTED BAR DETECTION FUNCTION ---
+def detect_bars(audio_file_path):
+    if audio_file_path is None or not os.path.exists(audio_file_path):
+        return None, None, None
+    try:
+        # 1. Load the audio file inside the function
+        y, sr = librosa.load(audio_file_path, sr=None)
+        # 2. Convert to mono for analysis
+        y_mono = librosa.to_mono(y) if y.ndim > 1 else y
+        # 3. Perform beat and tempo analysis
+        tempo, beats = librosa.beat.beat_track(y=y_mono, sr=sr)
+        bpm = 120 if tempo is None else int(np.round(tempo).item())
+        beat_times = librosa.frames_to_time(beats, sr=sr)
+        # This is a simple way to estimate bar start times (assuming 4/4 time)
+        bar_times = beat_times[::4]
+        return bpm, beat_times, bar_times
+    except Exception as e:
+        print(f"Error in detect_bars: {e}")
+        return None, None, None
 async def separate_stems(audio_file_path, stem_choice, progress=gr.Progress(track_tqdm=True)):
     if audio_file_path is None: raise gr.Error("No audio file uploaded!")
     progress(0, desc="Starting...")
         original_filename_base = os.path.basename(audio_file_path).rsplit('.', 1)[0]
         stable_input_path = f"stable_input_{original_filename_base}.wav"
         shutil.copy(audio_file_path, stable_input_path)
         model_arg = "--two-stems=vocals" if "2 Stems" in stem_choice else ""
         output_dir = "separated"
         if os.path.exists(output_dir): shutil.rmtree(output_dir)
         command = f"python3 -m demucs {model_arg} -o \"{output_dir}\" \"{stable_input_path}\""
+        progress(0.2, desc="Running Demucs (this may take a minute)...")
         process = await asyncio.create_subprocess_shell(
             command,
             stdout=asyncio.subprocess.PIPE,
             stderr=asyncio.subprocess.PIPE)
+        await process.communicate()
+        if process.returncode != 0:
+            raise gr.Error(f"Demucs failed to run. Error")
         progress(0.8, desc="Locating separated stem files...")
         stable_filename_base = os.path.basename(stable_input_path).rsplit('.', 1)[0]
         model_folder_name = next(os.walk(output_dir))[1][0]
         stems_path = os.path.join(output_dir, model_folder_name, stable_filename_base)
+        if not os.path.exists(stems_path):
             raise gr.Error(f"Demucs finished, but the output directory was not found!")
         vocals_path = os.path.join(stems_path, "vocals.wav") if os.path.exists(os.path.join(stems_path, "vocals.wav")) else None
         drums_path = os.path.join(stems_path, "drums.wav") if os.path.exists(os.path.join(stems_path, "drums.wav")) else None
         bass_path = os.path.join(stems_path, "bass.wav") if os.path.exists(os.path.join(stems_path, "bass.wav")) else None
         other_filename = "no_vocals.wav" if "2 Stems" in stem_choice else "other.wav"
         other_path = os.path.join(stems_path, other_filename) if os.path.exists(os.path.join(stems_path, other_filename)) else None
         os.remove(stable_input_path)
+        # --- CALLING THE NEW BAR DETECTION FUNCTION ---
+        progress(0.9, desc="Analyzing stem structure...")
+        all_paths = {"vocals": vocals_path, "drums": drums_path, "bass": bass_path, "other": other_path}
+        for name, path in all_paths.items():
+            if path:
+                bpm, _, bar_times = detect_bars(path)
+                if bpm and bar_times is not None:
+                    print(f"--- Analysis for {name} ---")
+                    print(f"Detected BPM: {bpm}")
+                    print(f"Found {len(bar_times)} bars.")
+        return vocals_path, drums_path, bass_path, other_path
     except Exception as e:
         print(f"An error occurred: {e}")
         raise gr.Error(str(e))
+# This is the placeholder for the interactive editor we were building
 def preview_slice(active_stem_audio, onset_times, evt: gr.SelectData):
     if active_stem_audio is None or onset_times is None: return None
+    sample_rate, y = active_stem_audio; clicked_time = evt.index[0]
+    start_time = 0; end_time = len(y) / sample_rate
+    for i, t in enumerate(onset_times):
+        if t > clicked_time:
+            end_time = t; break
+        start_time = t
     start_sample = librosa.time_to_samples(start_time, sr=sample_rate)
     end_sample = librosa.time_to_samples(end_time, sr=sample_rate)
     sliced_audio = y[start_sample:end_sample]
     return (sample_rate, sliced_audio)
+# --- Create the full Gradio Interface ---
 with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="red")) as demo:
     gr.Markdown("# 🎵 Loop Architect")
+    # State components
     onset_times_state = gr.State(value=None)
     active_stem_state = gr.State(value=None)
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("### 1. Separate Stems")
             audio_input = gr.Audio(type="filepath", label="Upload a Track")
             stem_options = gr.Radio(["4 Stems (Vocals, Drums, Bass, Other)", "2 Stems (Vocals + Instrumental)"], label="Separation Type", value="4 Stems (Vocals, Drums, Bass, Other)")
             submit_button = gr.Button("Separate Stems")
         with gr.Column(scale=2):
             with gr.Accordion("Separated Stems", open=True):
+                with gr.Row():
+                    vocals_output = gr.Audio(label="Vocals", scale=4)
+                with gr.Row():
+                    drums_output = gr.Audio(label="Drums", scale=4)
+                with gr.Row():
+                    bass_output = gr.Audio(label="Bass", scale=4)
+                with gr.Row():
+                    other_output = gr.Audio(label="Other / Instrumental", scale=4)
+    # --- Define Event Listeners ---
+    submit_button.click(fn=separate_stems, inputs=[audio_input, stem_options], outputs=[vocals_output, drums_output, bass_output, other_output])
     stem_options.change(fn=update_output_visibility, inputs=stem_options, outputs=[vocals_output, drums_output, bass_output, other_output])
+# --- Launch the UI ---
+demo.launch()