Spaces:

nomadkaraoke
/

audio-separator

Running

App Files Files Community

NeoPy commited on Nov 27, 2025

Commit

a5b1635

verified ·

1 Parent(s): f901b88

Create app.py

Browse files

Files changed (1) hide show

app.py +505 -0

app.py ADDED Viewed

	@@ -0,0 +1,505 @@

+"""
+Enhanced Audio Separator Demo
+Advanced audio source separation with latest models and features
+"""
+import os
+import json
+import torch
+import logging
+import traceback
+from typing import Dict, List, Optional
+import time
+from datetime import datetime
+import gradio as gr
+import numpy as np
+import librosa
+import soundfile as sf
+from pydub import AudioSegment
+from audio_separator.separator import Separator
+from audio_separator.separator import architectures
+class AudioSeparatorDemo:
+    def __init__(self):
+        self.separator = None
+        self.available_models = {}
+        self.current_model = None
+        self.processing_history = []
+        self.setup_logging()
+    def setup_logging(self):
+        """Setup logging for the application"""
+        logging.basicConfig(level=logging.INFO)
+        self.logger = logging.getLogger(__name__)
+    def get_system_info(self):
+        """Get system information for hardware acceleration"""
+        info = {
+            "pytorch_version": torch.__version__,
+            "cuda_available": torch.cuda.is_available(),
+            "cuda_version": torch.version.cuda if torch.cuda.is_available() else "N/A",
+            "mps_available": hasattr(torch.backends, "mps") and torch.backends.mps.is_available(),
+            "device": "cuda" if torch.cuda.is_available() else ("mps" if hasattr(torch.backends, "mps") and torch.backends.mps.is_available() else "cpu")
+        }
+        return info
+    def initialize_separator(self, model_name: str = None, **kwargs):
+        """Initialize the separator with specified parameters"""
+        try:
+            # Clean up previous separator if exists
+            if self.separator is not None:
+                del self.separator
+                torch.cuda.empty_cache()
+            # Set default model if not specified
+            if model_name is None:
+                model_name = "model_bs_roformer_ep_317_sdr_12.9755.ckpt"
+            # Initialize separator with updated parameters
+            self.separator = Separator(
+                output_format="WAV",
+                use_autocast=True,
+                use_soundfile=True,
+                **kwargs
+            )
+            # Load the model
+            self.separator.load_model(model_name)
+            self.current_model = model_name
+            return True, f"Successfully initialized with model: {model_name}"
+        except Exception as e:
+            self.logger.error(f"Error initializing separator: {str(e)}")
+            return False, f"Error initializing separator: {str(e)}"
+    def get_available_models(self):
+        """Get list of available models with descriptions and scores"""
+        try:
+            if self.separator is None:
+                self.separator = Separator(info_only=True)
+            models = self.separator.list_supported_model_files()
+            simplified_models = self.separator.get_simplified_model_list()
+            return simplified_models
+        except Exception as e:
+            self.logger.error(f"Error getting available models: {str(e)}")
+            return {}
+    def process_audio(self, audio_file, model_name, output_format="WAV", quality_preset="Standard",
+                     custom_params=None):
+        """Process audio with the selected model"""
+        if audio_file is None:
+            return None, "No audio file provided"
+        if model_name is None:
+            return None, "No model selected"
+        if self.separator is None or self.current_model != model_name:
+            success, message = self.initialize_separator(model_name)
+            if not success:
+                return None, message
+        try:
+            start_time = time.time()
+            # Update separator parameters based on quality preset
+            if custom_params is None:
+                custom_params = {}
+            # Apply quality preset
+            if quality_preset == "Fast":
+                custom_params.update({
+                    "mdx_params": {"batch_size": 4, "overlap": 0.1, "segment_size": 128},
+                    "vr_params": {"batch_size": 8, "aggression": 3},
+                    "demucs_params": {"shifts": 1, "overlap": 0.1},
+                    "mdxc_params": {"batch_size": 4, "overlap": 4}
+                })
+            elif quality_preset == "High Quality":
+                custom_params.update({
+                    "mdx_params": {"batch_size": 1, "overlap": 0.5, "segment_size": 512, "enable_denoise": True},
+                    "vr_params": {"batch_size": 1, "aggression": 8, "enable_tta": True, "enable_post_process": True},
+                    "demucs_params": {"shifts": 4, "overlap": 0.5, "segments_enabled": False},
+                    "mdxc_params": {"batch_size": 1, "overlap": 16, "pitch_shift": 0}
+                })
+            # Update separator parameters
+            for key, value in custom_params.items():
+                if hasattr(self.separator, key):
+                    setattr(self.separator, key, value)
+            # Process the audio
+            output_files = self.separator.separate(audio_file.name)
+            processing_time = time.time() - start_time
+            # Read and prepare output audio
+            output_audio = {}
+            for file_path in output_files:
+                if os.path.exists(file_path):
+                    # Create output with appropriate naming
+                    stem_name = os.path.splitext(os.path.basename(file_path))[0]
+                    audio_data, sample_rate = sf.read(file_path)
+                    output_audio[stem_name] = (sample_rate, audio_data)
+                    # Clean up file
+                    os.remove(file_path)
+            if not output_audio:
+                return None, "No output files generated"
+            # Record processing history
+            history_entry = {
+                "timestamp": datetime.now().isoformat(),
+                "model": model_name,
+                "processing_time": processing_time,
+                "output_files": list(output_audio.keys())
+            }
+            self.processing_history.append(history_entry)
+            return output_audio, f"Processing completed in {processing_time:.2f} seconds with model: {model_name}"
+        except Exception as e:
+            error_msg = f"Error processing audio: {str(e)}"
+            self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
+            return None, error_msg
+    def get_processing_history(self):
+        """Get processing history for display"""
+        if not self.processing_history:
+            return "No processing history available"
+        history_text = "Processing History:\n\n"
+        for i, entry in enumerate(self.processing_history[-10:], 1):  # Show last 10 entries
+            history_text += f"{i}. {entry['timestamp']}\n"
+            history_text += f"   Model: {entry['model']}\n"
+            history_text += f"   Time: {entry['processing_time']:.2f}s\n"
+            history_text += f"   Stems: {', '.join(entry['output_files'])}\n\n"
+        return history_text
+    def reset_history(self):
+        """Reset processing history"""
+        self.processing_history = []
+        return "Processing history cleared"
+    def compare_models(self, audio_file, model_list):
+        """Compare multiple models on the same audio"""
+        if audio_file is None or not model_list:
+            return None, "No audio file or models selected for comparison"
+        results = {}
+        for model_name in model_list:
+            try:
+                start_time = time.time()
+                success, message = self.initialize_separator(model_name)
+                if not success:
+                    results[model_name] = {"error": message}
+                    continue
+                output_files = self.separator.separate(audio_file.name)
+                processing_time = time.time() - start_time
+                # Analyze the first output file for basic metrics
+                if output_files and os.path.exists(output_files[0]):
+                    audio_data, sample_rate = sf.read(output_files[0])
+                    results[model_name] = {
+                        "processing_time": processing_time,
+                        "output_files": len(output_files),
+                        "sample_rate": sample_rate,
+                        "duration": len(audio_data) / sample_rate,
+                        "status": "Success"
+                    }
+                    # Clean up
+                    for file_path in output_files:
+                        if os.path.exists(file_path):
+                            os.remove(file_path)
+                else:
+                    results[model_name] = {"status": "Failed", "error": "No output files generated"}
+            except Exception as e:
+                results[model_name] = {"status": "Error", "error": str(e)}
+        return results, f"Model comparison completed for {len(model_list)} models"
+# Initialize the demo
+demo = AudioSeparatorDemo()
+def create_interface():
+    """Create the Gradio interface"""
+    with gr.Blocks(
+        title="Enhanced Audio Separator",
+        theme=gr.themes.Soft(
+            primary_hue="blue",
+            secondary_hue="slate",
+            font=[gr.themes.GoogleFont("Inter"), "Arial", "sans-serif"]
+        ),
+        css="""
+        .container {max-width: 1200px;}
+        .model-card {border: 1px solid #e5e7eb; border-radius: 8px; padding: 12px; margin: 8px 0;}
+        .quality-advanced {display: none;}
+        .processing-status {padding: 8px; border-radius: 4px; margin: 8px 0;}
+        """
+    ) as interface:
+        # Header
+        gr.Markdown(
+            """
+            # 🎵 Enhanced Audio Separator
+            Advanced audio source separation powered by the latest python-audio-separator library.
+            Support for MDX-Net, VR Arch, Demucs, MDXC, and Roformer models with hardware acceleration.
+            """,
+            elem_classes=["container"]
+        )
+        # System Information
+        with gr.Accordion("🖥️ System Information", open=False):
+            system_info = demo.get_system_info()
+            info_text = f"""
+            **PyTorch Version:** {system_info['pytorch_version']}
+            **Hardware Acceleration:** {system_info['device'].upper()}
+            **CUDA Available:** {system_info['cuda_available']} (Version: {system_info['cuda_version']})
+            **Apple Silicon (MPS):** {system_info['mps_available']}
+            """
+            gr.Markdown(info_text)
+        with gr.Row():
+            with gr.Column(scale=2):
+                # Main audio input
+                audio_input = gr.Audio(
+                    label="Upload Audio File",
+                    type="filepath",
+                    format="wav"
+                )
+                # Model selection
+                model_list = demo.get_available_models()
+                model_dropdown = gr.Dropdown(
+                    choices=list(model_list.keys()) if model_list else [],
+                    value="model_bs_roformer_ep_317_sdr_12.9755.ckpt" if model_list else None,
+                    label="Select Model",
+                    info="Choose an AI model for audio separation"
+                )
+                # Quality preset
+                quality_preset = gr.Radio(
+                    choices=["Fast", "Standard", "High Quality"],
+                    value="Standard",
+                    label="Quality Preset",
+                    info="Choose processing quality vs speed trade-off"
+                )
+                # Advanced parameters (collapsible)
+                with gr.Accordion("🔧 Advanced Parameters", open=False):
+                    with gr.Row():
+                        batch_size = gr.Slider(1, 8, value=1, step=1, label="Batch Size")
+                        segment_size = gr.Slider(64, 1024, value=256, step=64, label="Segment Size")
+                        overlap = gr.Slider(0.1, 0.5, value=0.25, step=0.05, label="Overlap")
+                    denoise = gr.Checkbox(label="Enable Denoise", value=False)
+                    tta = gr.Checkbox(label="Enable TTA (Test-Time Augmentation)", value=False)
+                    post_process = gr.Checkbox(label="Enable Post-Processing", value=False)
+                # Process button
+                process_btn = gr.Button("🎵 Separate Audio", variant="primary", size="lg")
+            with gr.Column(scale=2):
+                # Results
+                status_output = gr.Textbox(label="Status", lines=3)
+                # Output audio components
+                with gr.Tabs():
+                    with gr.TabItem("Vocals"):
+                        vocals_output = gr.Audio(label="Vocals")
+                    with gr.TabItem("Instrumental"):
+                        instrumental_output = gr.Audio(label="Instrumental")
+                    with gr.TabItem("Drums"):
+                        drums_output = gr.Audio(label="Drums")
+                    with gr.TabItem("Bass"):
+                        bass_output = gr.Audio(label="Bass")
+                    with gr.TabItem("Other Stems"):
+                        other_output = gr.Audio(label="Other")
+                # Download section
+                with gr.Accordion("📥 Batch Processing", open=False):
+                    gr.Markdown("Upload multiple files for batch processing:")
+                    batch_files = gr.File(file_count="multiple", file_types=["audio"], label="Batch Audio Files")
+                    batch_btn = gr.Button("Process Batch")
+                    batch_output = gr.File(label="Download Separated Files")
+        # Model Information and Comparison
+        with gr.Tabs():
+            with gr.TabItem("📊 Model Information"):
+                gr.Markdown("## Available Models and Performance Metrics")
+                # Model information display
+                model_info = gr.JSON(value=demo.get_available_models(), label="Model Details")
+                refresh_models_btn = gr.Button("🔄 Refresh Models")
+                with gr.Row():
+                    compare_models_dropdown = gr.Dropdown(
+                        choices=list(model_list.keys())[:5] if model_list else [],
+                        multiselect=True,
+                        label="Select Models to Compare",
+                        info="Choose up to 5 models for comparison"
+                    )
+                    compare_btn = gr.Button("🔍 Compare Models")
+                comparison_results = gr.JSON(label="Model Comparison Results")
+            with gr.TabItem("📈 Processing History"):
+                history_output = gr.Textbox(label="History", lines=10)
+                refresh_history_btn = gr.Button("🔄 Refresh History")
+                reset_history_btn = gr.Button("🗑️ Clear History")
+        # Event handlers
+        def process_audio(audio_file, model_name, quality_preset, batch_size, segment_size,
+                         overlap, denoise, tta, post_process):
+            if not audio_file or not model_name:
+                return None, None, None, None, None, "Please upload an audio file and select a model"
+            # Prepare custom parameters
+            custom_params = {
+                "mdx_params": {
+                    "batch_size": int(batch_size),
+                    "segment_size": int(segment_size),
+                    "overlap": float(overlap),
+                    "enable_denoise": denoise
+                },
+                "vr_params": {
+                    "batch_size": int(batch_size),
+                    "enable_tta": tta,
+                    "enable_post_process": post_process
+                },
+                "demucs_params": {
+                    "overlap": float(overlap)
+                },
+                "mdxc_params": {
+                    "batch_size": int(batch_size),
+                    "overlap": int(overlap * 10)
+                }
+            }
+            output_audio, status = demo.process_audio(
+                audio_file, model_name, quality_preset=quality_preset, custom_params=custom_params
+            )
+            if output_audio is None:
+                return None, None, None, None, None, status
+            # Extract different stems
+            vocals = None
+            instrumental = None
+            drums = None
+            bass = None
+            other = None
+            for stem_name, (sample_rate, audio_data) in output_audio.items():
+                if "vocal" in stem_name.lower():
+                    vocals = (sample_rate, audio_data)
+                elif "instrumental" in stem_name.lower():
+                    instrumental = (sample_rate, audio_data)
+                elif "drum" in stem_name.lower():
+                    drums = (sample_rate, audio_data)
+                elif "bass" in stem_name.lower():
+                    bass = (sample_rate, audio_data)
+                else:
+                    other = (sample_rate, audio_data)
+            return vocals, instrumental, drums, bass, other, status
+        def refresh_models():
+            updated_models = demo.get_available_models()
+            return (
+                gr.update(choices=list(updated_models.keys())),
+                updated_models
+            )
+        def refresh_history():
+            return demo.get_processing_history()
+        def clear_history():
+            return demo.reset_history()
+        def compare_selected_models(audio_file, model_list):
+            if not audio_file or not model_list:
+                return "Please upload an audio file and select models to compare"
+            results, status = demo.compare_models(audio_file, model_list)
+            return results
+        # Wire up event handlers
+        process_btn.click(
+            process_audio,
+            inputs=[
+                audio_input, model_dropdown, quality_preset,
+                batch_size, segment_size, overlap, denoise, tta, post_process
+            ],
+            outputs=[
+                vocals_output, instrumental_output, drums_output,
+                bass_output, other_output, status_output
+            ]
+        )
+        refresh_models_btn.click(refresh_models, outputs=[model_dropdown, model_info])
+        refresh_history_btn.click(refresh_history, outputs=[history_output])
+        reset_history_btn.click(clear_history, outputs=[history_output])
+        compare_btn.click(compare_selected_models, inputs=[audio_input, compare_models_dropdown], outputs=[comparison_results])
+        # Batch processing
+        def process_batch(batch_files, model_name):
+            if not batch_files or not model_name:
+                return None, "Please upload batch files and select a model"
+            # Create zip file for downloads
+            import zipfile
+            import io
+            zip_buffer = io.BytesIO()
+            with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+                for file_info in batch_files:
+                    output_files = demo.process_audio(file_info, model_name)
+                    if output_files[0] is not None:
+                        # Add files to zip
+                        for stem_name, (sample_rate, audio_data) in output_files[0].items():
+                            filename = f"{os.path.splitext(file_info.name)[0]}_{stem_name}.wav"
+                            zip_file.writestr(filename, audio_data)
+            zip_buffer.seek(0)
+            return zip_buffer, f"Batch processing completed for {len(batch_files)} files"
+        batch_btn.click(
+            process_batch,
+            inputs=[batch_files, model_dropdown],
+            outputs=[batch_output, status_output]
+        )
+    return interface
+if __name__ == "__main__":
+    interface = create_interface()
+    interface.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True,
+        show_tips=True
+    )