SESA_Fast_Separation

Running

App Files Files Community

ASesYusuf1 commited on May 16, 2025

Commit

a5643d8

verified ·

1 Parent(s): b00852e

Update app.py

Browse files

Files changed (1) hide show

app.py +137 -74

app.py CHANGED Viewed

@@ -11,16 +11,17 @@ from audio_separator.separator import Separator
 import numpy as np
 import librosa
 import soundfile as sf
-from ensemble import ensemble_files  # Correct import
 import shutil
 import gradio_client.utils as client_utils
 import matchering as mg
 import spaces
 import gdown
-import scipy.io.wavfile
 from pydub import AudioSegment
 import gc
 import time
 # Logging setup
 logging.basicConfig(level=logging.INFO)
@@ -230,8 +231,7 @@ button:hover {
     box-shadow: 0 2px 8px rgba(255, 107, 107, 0.4) !important;
 }
 .compact-dropdown select, .compact-dropdown .gr-dropdown {
-    background: transparent ! thận
     color: #e0e0e0 !important;
     border: none !important;
     width: 100% !important;
@@ -340,15 +340,14 @@ def download_audio(url, cookie_file=None):
             gdown.download(download_url, temp_output_path, quiet=False)
             if not os.path.exists(temp_output_path):
                 return None, "Downloaded file not found", None
-            from mimetypes import guess_type
-            mime_type, _ = guess_type(temp_output_path)
-            if not mime_type or not mime_type.startswith('audio'):
-                return None, "Downloaded file is not an audio file", None
             output_path = 'ytdl/gdrive_audio.wav'
-            audio = AudioSegment.from_file(temp_output_path)
-            audio.export(output_path, format="wav")
             sample_rate, data = scipy.io.wavfile.read(output_path)
-            return output_path, "Download successful", (sample_rate, data)
         else:
             os.makedirs('ytdl', exist_ok=True)
             with yt_dlp.YoutubeDL(ydl_opts) as ydl:
@@ -433,23 +432,36 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
     temp_audio_path = None
     chunk_paths = []
     max_retries = 2
     try:
         if not audio:
             raise ValueError("No audio file provided.")
         if not model_keys:
             raise ValueError("No models selected.")
-        if len(model_keys) > 2:
-            logger.warning("Limited to 2 models to avoid ZeroGPU timeouts. Using first two: %s", model_keys[:2])
-            model_keys = model_keys[:2]
         if isinstance(audio, tuple):
             sample_rate, data = audio
             temp_audio_path = os.path.join("/tmp", "temp_audio.wav")
             scipy.io.wavfile.write(temp_audio_path, sample_rate, data)
             audio = temp_audio_path
         audio_data, sr = librosa.load(audio, sr=None, mono=False)
         duration = librosa.get_duration(y=audio_data, sr=sr)
         logger.info(f"Audio duration: {duration:.2f} seconds")
-        chunk_duration = 300
         chunks = []
         if duration > 900:
             logger.info(f"Audio exceeds 15 minutes, splitting into {chunk_duration}-second chunks")
@@ -465,70 +477,116 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
                 logger.info(f"Created chunk {i}: {chunk_path}")
         else:
             chunks = [audio]
         use_tta = use_tta == "True"
         if os.path.exists(output_dir):
             shutil.rmtree(output_dir)
         os.makedirs(output_dir, exist_ok=True)
         base_name = os.path.splitext(os.path.basename(audio))[0]
         logger.info(f"Ensemble for {base_name} with {model_keys} on {device}")
         all_stems = []
-        model_stems = {}
-        total_models = len(model_keys)
-        for model_idx, model_key in enumerate(model_keys):
-            model_stems[model_key] = {"vocals": [], "other": []}
-            for category, models in ROFORMER_MODELS.items():
-                if model_key in models:
-                    model = models[model_key]
-                    break
-            else:
-                logger.warning(f"Model {model_key} not found, skipping")
-                continue
-            for chunk_idx, chunk_path in enumerate(chunks):
-                retry_count = 0
-                while retry_count <= max_retries:
                     try:
-                        progress((model_idx + 0.1) / total_models, desc=f"Loading {model_key} for chunk {chunk_idx}")
                         model_path = os.path.join(model_dir, model)
-                        if not os.path.exists(model_path):
-                            logger.info(f"Model {model} not cached, will download")
-                        separator = Separator(
-                            log_level=logging.INFO,
-                            model_file_dir=model_dir,
-                            output_dir=output_dir,
-                            output_format=out_format,
-                            normalization_threshold=norm_thresh,
-                            amplification_threshold=amp_thresh,
-                            use_autocast=use_autocast,
-                            mdxc_params={"segment_size": seg_size, "overlap": overlap, "use_tta": use_tta, "batch_size": batch_size}
-                        )
-                        logger.info(f"Loading {model_key} for chunk {chunk_idx}")
-                        separator.load_model(model_filename=model)
-                        progress((model_idx + 0.5) / total_models, desc=f"Separating chunk {chunk_idx} with {model_key}")
-                        logger.info(f"Separating chunk {chunk_idx} with {model_key}")
-                        separation = separator.separate(chunk_path)
-                        stems = [os.path.join(output_dir, file_name) for file_name in separation]
-                        for stem in stems:
-                            if "vocals" in os.path.basename(stem).lower():
-                                model_stems[model_key]["vocals"].append(stem)
-                            elif "other" in os.path.basename(stem).lower() or "instrumental" in os.path.basename(stem).lower():
-                                model_stems[model_key]["other"].append(stem)
-                        break
                     except Exception as e:
-                        retry_count += 1
-                        logger.error(f"Error processing {model_key} chunk {chunk_idx}, attempt {retry_count}/{max_retries}: {e}")
-                        if "ZeroGPU" in str(e) or "aborted" in str(e).lower():
-                            logger.error("ZeroGPU task aborted, attempting recovery")
-                        if retry_count > max_retries:
                             logger.error(f"Max retries reached for {model_key} chunk {chunk_idx}, skipping")
-                            break
                         time.sleep(1)
                     finally:
-                        separator = None
-                        gc.collect()
                         if torch.cuda.is_available():
                             torch.cuda.empty_cache()
                             logger.info(f"Cleared CUDA cache after {model_key} chunk {chunk_idx}")
-                time.sleep(0.1)
         progress(0.8, desc="Combining stems...")
         for model_key, stems_dict in model_stems.items():
             for stem_type in ["vocals", "other"]:
@@ -546,13 +604,16 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
                         all_stems.append(combined_path)
                     except Exception as e:
                         logger.error(f"Error combining {stem_type} for {model_key}: {e}")
         all_stems = [stem for stem in all_stems if os.path.exists(stem)]
         if not all_stems:
             raise ValueError("No valid stems found for ensemble. Try uploading a local WAV file.")
         weights = [float(w.strip()) for w in weights_str.split(',')] if weights_str.strip() else [1.0] * len(all_stems)
         if len(weights) != len(all_stems):
             weights = [1.0] * len(all_stems)
-            logger.info("Weights mismatched, safest option is to default to 1.0")
         output_file = os.path.join(output_dir, f"{base_name}_ensemble_{ensemble_method}.{out_format}")
         ensemble_args = [
             "--files", *all_stems,
@@ -563,12 +624,14 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
         progress(0.9, desc="Running ensemble...")
         logger.info(f"Running ensemble with args: {ensemble_args}")
         try:
-            result = ensemble_files(ensemble_args)  # Correct function call
             if result is None or not os.path.exists(output_file):
                 raise RuntimeError(f"Ensemble failed, output file not created: {output_file}")
             logger.info(f"Ensemble completed, output: {output_file}")
             progress(1.0, desc="Ensemble completed")
-            return output_file, f"Ensemble completed with {ensemble_method}, excluded: {exclude_stems if exclude_stems else 'None'}"
         except Exception as e:
             logger.error(f"Ensemble processing error: {e}")
             if "numpy" in str(e).lower() or "copy" in str(e).lower():
@@ -578,8 +641,8 @@ def auto_ensemble_process(audio, model_keys, seg_size=128, overlap=0.1, out_form
             raise RuntimeError(error_msg)
     except Exception as e:
         logger.error(f"Ensemble error: {e}")
-        if "ZeroGPU" in str(e) or "aborted" in str(e).lower():
-            error_msg = "ZeroGPU task aborted. Try using fewer models (max 2), lowering segment size, or uploading a local WAV file."
         else:
             error_msg = f"Ensemble error: {e}"
         raise RuntimeError(error_msg)
@@ -615,8 +678,8 @@ def create_interface():
     with gr.Blocks(title="🎵 SESA Fast Separation 🎵", css=CSS, elem_id="app-container") as app:
         gr.Markdown("<h1 class='header-text'>🎵 SESA Fast Separation 🎵</h1>")
         gr.Markdown("**Note**: If YouTube downloads fail, upload a valid cookies file or a local WAV file. [Cookie Instructions](https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies)")
-        gr.Markdown("**Warning**: Audio files longer than 15 minutes are split into 5-minute chunks, which may increase processing time.")
-        gr.Markdown("**ZeroGPU Notice**: Use up to 2 models for ensemble to avoid timeouts. For large tasks, upload a local WAV file.")
         with gr.Tabs():
             with gr.Tab("⚙️ Settings"):
                 with gr.Group(elem_classes="dubbing-theme"):
@@ -653,7 +716,7 @@ def create_interface():
             with gr.Tab("🎚️ Auto Ensemble"):
                 with gr.Group(elem_classes="dubbing-theme"):
                     gr.Markdown("### Ensemble Processing")
-                    gr.Markdown("Note: If weights are not specified, equal weights (1.0) are applied. Max 2 models recommended to avoid ZeroGPU timeouts.")
                     with gr.Row():
                         ensemble_audio = gr.Audio(label="🎧 Upload Audio", type="filepath", interactive=True)
                         url_ensemble = gr.Textbox(label="🔗 Or Paste URL", placeholder="YouTube or audio URL", interactive=True)
@@ -663,13 +726,13 @@ def create_interface():
                     ensemble_exclude_stems = gr.Textbox(label="🚫 Exclude Stems", placeholder="e.g., vocals, drums (comma-separated)", interactive=True)
                     with gr.Row():
                         ensemble_category = gr.Dropdown(label="📚 Category", choices=list(ROFORMER_MODELS.keys()), value="Instrumentals", interactive=True)
-                        ensemble_models = gr.Dropdown(label="🛠️ Models (Max 2)", choices=list(ROFORMER_MODELS["Instrumentals"].keys()), multiselect=True, interactive=True, allow_custom_value=True)
                     with gr.Row():
                         ensemble_seg_size = gr.Slider(32, 512, value=128, step=32, label="📏 Segment Size", interactive=True)
                         ensemble_overlap = gr.Slider(2, 10, value=8, step=1, label="🔄 Overlap", interactive=True)
                         ensemble_use_tta = gr.Dropdown(choices=["True", "False"], value="False", label="🔍 Use TTA", interactive=True)
                     ensemble_method = gr.Dropdown(label="⚙️ Ensemble Method", choices=['avg_wave', 'median_wave', 'max_wave', 'min_wave', 'avg_fft', 'median_fft', 'max_fft', 'min_fft'], value='avg_wave', interactive=True)
-                    ensemble_weights = gr.Textbox(label="⚖️ Weights", placeholder="e.g., 1.0, 1.0 (comma-separated)", interactive=True)
                     ensemble_button = gr.Button("🎛️ Run Ensemble!", variant="primary")
                     ensemble_output = gr.Audio(label="🎶 Ensemble Result", type="filepath", interactive=False)
                     ensemble_status = gr.Textbox(label="📢 Status", interactive=False)
@@ -699,7 +762,7 @@ def create_interface():
             fn=auto_ensemble_process,
             inputs=[
                 ensemble_audio, ensemble_models, ensemble_seg_size, ensemble_overlap,
-                output_format, ensemble_use_tta, model_file_dir, output_dir,
                 norm_threshold, amp_threshold, batch_size, ensemble_method,
                 ensemble_exclude_stems, ensemble_weights
             ],

 import numpy as np
 import librosa
 import soundfile as sf
+from ensemble import ensemble_files
 import shutil
 import gradio_client.utils as client_utils
 import matchering as mg
 import spaces
 import gdown
 from pydub import AudioSegment
 import gc
 import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from threading import Lock
 # Logging setup
 logging.basicConfig(level=logging.INFO)
     box-shadow: 0 2px 8px rgba(255, 107, 107, 0.4) !important;
 }
 .compact-dropdown select, .compact-dropdown .gr-dropdown {
+    background: transparent !important;
     color: #e0e0e0 !important;
     border: none !important;
     width: 100% !important;
             gdown.download(download_url, temp_output_path, quiet=False)
             if not os.path.exists(temp_output_path):
                 return None, "Downloaded file not found", None
             output_path = 'ytdl/gdrive_audio.wav'
+            try:
+                audio = AudioSegment.from_file(temp_output_path)
+                audio.export(output_path, format="wav")
+            except Exception as e:
+                return None, f"Failed to process Google Drive file as audio: {str(e)}. Ensure the file contains audio (e.g., MP3, WAV, or video with audio track).", None
             sample_rate, data = scipy.io.wavfile.read(output_path)
+            return output_path, "Download and audio conversion successful", (sample_rate, data)
         else:
             os.makedirs('ytdl', exist_ok=True)
             with yt_dlp.YoutubeDL(ydl_opts) as ydl:
     temp_audio_path = None
     chunk_paths = []
     max_retries = 2
+    start_time = time.time()
+    time_budget = 100  # seconds, to stay within ZeroGPU limit
+    max_models = 6  # Reasonable limit to prevent timeouts
+    gpu_lock = Lock()  # Ensure only one model uses GPU at a time
     try:
         if not audio:
             raise ValueError("No audio file provided.")
         if not model_keys:
             raise ValueError("No models selected.")
+        if len(model_keys) > max_models:
+            logger.warning(f"Selected {len(model_keys)} models, limiting to {max_models} to avoid ZeroGPU timeouts.")
+            model_keys = model_keys[:max_models]
+        # Dynamic batch size adjustment
+        dynamic_batch_size = max(1, min(4, 1 + (6 - len(model_keys)) // 2))
+        logger.info(f"Using batch size: {dynamic_batch_size} for {len(model_keys)} models")
         if isinstance(audio, tuple):
             sample_rate, data = audio
             temp_audio_path = os.path.join("/tmp", "temp_audio.wav")
             scipy.io.wavfile.write(temp_audio_path, sample_rate, data)
             audio = temp_audio_path
         audio_data, sr = librosa.load(audio, sr=None, mono=False)
         duration = librosa.get_duration(y=audio_data, sr=sr)
         logger.info(f"Audio duration: {duration:.2f} seconds")
+        # Optimize chunking
+        chunk_duration = 300 if duration > 900 else duration
         chunks = []
         if duration > 900:
             logger.info(f"Audio exceeds 15 minutes, splitting into {chunk_duration}-second chunks")
                 logger.info(f"Created chunk {i}: {chunk_path}")
         else:
             chunks = [audio]
         use_tta = use_tta == "True"
         if os.path.exists(output_dir):
             shutil.rmtree(output_dir)
         os.makedirs(output_dir, exist_ok=True)
         base_name = os.path.splitext(os.path.basename(audio))[0]
         logger.info(f"Ensemble for {base_name} with {model_keys} on {device}")
+        # Model cache
+        model_cache = {}
         all_stems = []
+        model_stems = {model_key: {"vocals": [], "other": []} for model_key in model_keys}
+        total_tasks = len(model_keys) * len(chunks)
+        def process_model_chunk(model_key, chunk_path, chunk_idx, model_idx):
+            with torch.no_grad():
+                for attempt in range(max_retries + 1):
                     try:
+                        # Find model
+                        for category, models in ROFORMER_MODELS.items():
+                            if model_key in models:
+                                model = models[model_key]
+                                break
+                        else:
+                            logger.warning(f"Model {model_key} not found, skipping")
+                            return []
+                        # Check time budget
+                        elapsed = time.time() - start_time
+                        if elapsed > time_budget:
+                            logger.error(f"Time budget ({time_budget}s) exceeded, aborting")
+                            raise TimeoutError("Processing exceeded time budget")
+                        # Initialize separator
                         model_path = os.path.join(model_dir, model)
+                        if model_key not in model_cache:
+                            logger.info(f"Loading {model_key} into cache")
+                            separator = Separator(
+                                log_level=logging.INFO,
+                                model_file_dir=model_dir,
+                                output_dir=output_dir,
+                                output_format=out_format,
+                                normalization_threshold=norm_thresh,
+                                amplification_threshold=amp_thresh,
+                                use_autocast=use_autocast,
+                                mdxc_params={
+                                    "segment_size": seg_size,
+                                    "overlap": overlap,
+                                    "use_tta": use_tta,
+                                    "batch_size": dynamic_batch_size
+                                }
+                            )
+                            separator.load_model(model_filename=model)
+                            model_cache[model_key] = separator
+                        else:
+                            separator = model_cache[model_key]
+                        # Process with GPU lock
+                        with gpu_lock:
+                            progress((model_idx + chunk_idx / len(chunks)) / len(model_keys), desc=f"Separating chunk {chunk_idx} with {model_key}")
+                            logger.info(f"Separating chunk {chunk_idx} with {model_key}")
+                            separation = separator.separate(chunk_path)
+                            stems = [os.path.join(output_dir, file_name) for file_name in separation]
+                            result = []
+                            for stem in stems:
+                                if "vocals" in os.path.basename(stem).lower():
+                                    model_stems[model_key]["vocals"].append(stem)
+                                elif "other" in os.path.basename(stem).lower() or "instrumental" in os.path.basename(stem).lower():
+                                    model_stems[model_key]["other"].append(stem)
+                                    result.append(stem)
+                            return result
                     except Exception as e:
+                        logger.error(f"Error processing {model_key} chunk {chunk_idx}, attempt {attempt + 1}/{max_retries + 1}: {e}")
+                        if attempt == max_retries:
                             logger.error(f"Max retries reached for {model_key} chunk {chunk_idx}, skipping")
+                            return []
                         time.sleep(1)
                     finally:
                         if torch.cuda.is_available():
                             torch.cuda.empty_cache()
                             logger.info(f"Cleared CUDA cache after {model_key} chunk {chunk_idx}")
+        # Parallel processing
+        progress(0.1, desc="Starting model separations...")
+        with ThreadPoolExecutor(max_workers=min(4, len(model_keys))) as executor:
+            future_to_task = {}
+            for model_idx, model_key in enumerate(model_keys):
+                for chunk_idx, chunk_path in enumerate(chunks):
+                    future = executor.submit(process_model_chunk, model_key, chunk_path, chunk_idx, model_idx)
+                    future_to_task[future] = (model_key, chunk_idx)
+            for future in as_completed(future_to_task):
+                model_key, chunk_idx = future_to_task[future]
+                try:
+                    stems = future.result()
+                    if stems:
+                        logger.info(f"Completed {model_key} chunk {chunk_idx}")
+                    else:
+                        logger.warning(f"No stems produced for {model_key} chunk {chunk_idx}")
+                except Exception as e:
+                    logger.error(f"Task {model_key} chunk {chunk_idx} failed: {e}")
+        # Clear model cache
+        model_cache.clear()
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            logger.info("Cleared model cache and GPU memory")
+        # Combine stems
         progress(0.8, desc="Combining stems...")
         for model_key, stems_dict in model_stems.items():
             for stem_type in ["vocals", "other"]:
                         all_stems.append(combined_path)
                     except Exception as e:
                         logger.error(f"Error combining {stem_type} for {model_key}: {e}")
         all_stems = [stem for stem in all_stems if os.path.exists(stem)]
         if not all_stems:
             raise ValueError("No valid stems found for ensemble. Try uploading a local WAV file.")
+        # Ensemble
         weights = [float(w.strip()) for w in weights_str.split(',')] if weights_str.strip() else [1.0] * len(all_stems)
         if len(weights) != len(all_stems):
             weights = [1.0] * len(all_stems)
+            logger.info("Weights mismatched, defaulting to 1.0")
         output_file = os.path.join(output_dir, f"{base_name}_ensemble_{ensemble_method}.{out_format}")
         ensemble_args = [
             "--files", *all_stems,
         progress(0.9, desc="Running ensemble...")
         logger.info(f"Running ensemble with args: {ensemble_args}")
         try:
+            result = ensemble_files(ensemble_args)
             if result is None or not os.path.exists(output_file):
                 raise RuntimeError(f"Ensemble failed, output file not created: {output_file}")
             logger.info(f"Ensemble completed, output: {output_file}")
             progress(1.0, desc="Ensemble completed")
+            elapsed = time.time() - start_time
+            logger.info(f"Total processing time: {elapsed:.2f}s")
+            return output_file, f"Ensemble completed with {ensemble_method}, excluded: {exclude_stems if exclude_stems else 'None'}, {len(model_keys)} models in {elapsed:.2f}s"
         except Exception as e:
             logger.error(f"Ensemble processing error: {e}")
             if "numpy" in str(e).lower() or "copy" in str(e).lower():
             raise RuntimeError(error_msg)
     except Exception as e:
         logger.error(f"Ensemble error: {e}")
+        if "ZeroGPU" in str(e) or "aborted" in str(e).lower() or isinstance(e, TimeoutError):
+            error_msg = f"ZeroGPU task aborted or timed out. Try fewer models (max {max_models}), shorter audio, or uploading a local WAV file."
         else:
             error_msg = f"Ensemble error: {e}"
         raise RuntimeError(error_msg)
     with gr.Blocks(title="🎵 SESA Fast Separation 🎵", css=CSS, elem_id="app-container") as app:
         gr.Markdown("<h1 class='header-text'>🎵 SESA Fast Separation 🎵</h1>")
         gr.Markdown("**Note**: If YouTube downloads fail, upload a valid cookies file or a local WAV file. [Cookie Instructions](https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies)")
+        gr.Markdown("**Warning**: Audio files longer than 15 minutes are split into 5-minute chunks, increasing processing time.")
+        gr.Markdown("**ZeroGPU Notice**: Up to 6 models supported for ensemble. For long audio, use fewer models or a local WAV file to avoid timeouts.")
         with gr.Tabs():
             with gr.Tab("⚙️ Settings"):
                 with gr.Group(elem_classes="dubbing-theme"):
             with gr.Tab("🎚️ Auto Ensemble"):
                 with gr.Group(elem_classes="dubbing-theme"):
                     gr.Markdown("### Ensemble Processing")
+                    gr.Markdown("Note: If weights are not specified, equal weights (1.0) are applied. Max 6 models recommended to avoid ZeroGPU timeouts.")
                     with gr.Row():
                         ensemble_audio = gr.Audio(label="🎧 Upload Audio", type="filepath", interactive=True)
                         url_ensemble = gr.Textbox(label="🔗 Or Paste URL", placeholder="YouTube or audio URL", interactive=True)
                     ensemble_exclude_stems = gr.Textbox(label="🚫 Exclude Stems", placeholder="e.g., vocals, drums (comma-separated)", interactive=True)
                     with gr.Row():
                         ensemble_category = gr.Dropdown(label="📚 Category", choices=list(ROFORMER_MODELS.keys()), value="Instrumentals", interactive=True)
+                        ensemble_models = gr.Dropdown(label="🛠️ Models (Max 6)", choices=list(ROFORMER_MODELS["Instrumentals"].keys()), multiselect=True, interactive=True, allow_custom_value=True)
                     with gr.Row():
                         ensemble_seg_size = gr.Slider(32, 512, value=128, step=32, label="📏 Segment Size", interactive=True)
                         ensemble_overlap = gr.Slider(2, 10, value=8, step=1, label="🔄 Overlap", interactive=True)
                         ensemble_use_tta = gr.Dropdown(choices=["True", "False"], value="False", label="🔍 Use TTA", interactive=True)
                     ensemble_method = gr.Dropdown(label="⚙️ Ensemble Method", choices=['avg_wave', 'median_wave', 'max_wave', 'min_wave', 'avg_fft', 'median_fft', 'max_fft', 'min_fft'], value='avg_wave', interactive=True)
+                    ensemble_weights = gr.Textbox(label="⚖️ Weights", placeholder="e.g., 1.0, 1.0, 1.0 (comma-separated)", interactive=True)
                     ensemble_button = gr.Button("🎛️ Run Ensemble!", variant="primary")
                     ensemble_output = gr.Audio(label="🎶 Ensemble Result", type="filepath", interactive=False)
                     ensemble_status = gr.Textbox(label="📢 Status", interactive=False)
             fn=auto_ensemble_process,
             inputs=[
                 ensemble_audio, ensemble_models, ensemble_seg_size, ensemble_overlap,
+                output_format, ensemble_use_tta, model_dir, output_dir,
                 norm_threshold, amp_threshold, batch_size, ensemble_method,
                 ensemble_exclude_stems, ensemble_weights
             ],