Spaces:

emresar
/

aimusic-attribution

Sleeping

File size: 6,889 Bytes

6678fa1

"""
Stem Separation using Demucs

Separates audio into individual stems (vocals, drums, bass, other)
using Facebook/Meta's Demucs model.

Requires: demucs package (pip install demucs)
"""

import os
import sys
import subprocess
import json
from pathlib import Path
from typing import Optional, List
import tempfile


# Available Demucs models
DEMUCS_MODELS = {
    "htdemucs": {
        "stems": ["vocals", "drums", "bass", "other"],
        "description": "Hybrid Transformer Demucs (recommended)"
    },
    "htdemucs_ft": {
        "stems": ["vocals", "drums", "bass", "other"],
        "description": "Fine-tuned Hybrid Transformer Demucs"
    },
    "htdemucs_6s": {
        "stems": ["vocals", "drums", "bass", "guitar", "piano", "other"],
        "description": "6-stem Hybrid Transformer Demucs"
    },
    "mdx_extra": {
        "stems": ["vocals", "drums", "bass", "other"],
        "description": "MDX-Net architecture"
    }
}


def get_best_device() -> str:
    """Auto-detect the best available device for ML processing."""
    try:
        import torch
        if torch.backends.mps.is_available():
            return "mps"  # Apple Silicon GPU
        elif torch.cuda.is_available():
            return "cuda"  # NVIDIA GPU
    except ImportError:
        pass
    return "cpu"


def separate_stems(
    input_path: str,
    output_dir: str,
    model: str = "htdemucs",
    device: Optional[str] = None,
    shifts: int = 1,
    overlap: float = 0.25
) -> dict:
    """
    Separate audio into stems using Demucs.
    
    Args:
        input_path: Path to input audio file
        output_dir: Directory to save separated stems
        model: Demucs model to use (default: htdemucs)
        device: Processing device (cuda, cpu, mps). Auto-detected if None.
        shifts: Number of random shifts for better quality (more = slower)
        overlap: Overlap between prediction windows
    
    Returns:
        dict with:
            - success: bool
            - stems: list of {type, path, duration}
            - model: str (model used)
            - error: str (if failed)
    """
    input_path = Path(input_path)
    output_dir = Path(output_dir)
    
    if not input_path.exists():
        return {
            "success": False,
            "error": f"Input file not found: {input_path}"
        }
    
    if model not in DEMUCS_MODELS:
        return {
            "success": False,
            "error": f"Unknown model: {model}. Available: {list(DEMUCS_MODELS.keys())}"
        }
    
    # Create output directory
    output_dir.mkdir(parents=True, exist_ok=True)
    
    # Auto-detect device if not specified
    if device is None:
        device = get_best_device()
    
    try:
        # Build demucs command using current Python interpreter
        cmd = [
            sys.executable, "-m", "demucs",
            "--name", model,
            "--out", str(output_dir),
            "--shifts", str(shifts),
            "--overlap", str(overlap),
            "--mp3",  # Use mp3 output to avoid torchcodec dependency issues
            "--device", device,  # Use detected or specified device
        ]
        
        # Add input file
        cmd.append(str(input_path))
        
        # Run demucs
        result = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            timeout=600  # 10 minute timeout for long files
        )
        
        if result.returncode != 0:
            return {
                "success": False,
                "error": f"Demucs failed: {result.stderr}"
            }
        
        # Demucs outputs to: output_dir/model_name/track_name/stem.wav
        track_name = input_path.stem
        stems_dir = output_dir / model / track_name
        
        if not stems_dir.exists():
            return {
                "success": False,
                "error": f"Stems directory not found: {stems_dir}"
            }
        
        # Collect stem info
        stems = []
        expected_stems = DEMUCS_MODELS[model]["stems"]
        
        for stem_type in expected_stems:
            # Check mp3 first (default with --mp3 flag), then wav
            stem_path = stems_dir / f"{stem_type}.mp3"
            if not stem_path.exists():
                stem_path = stems_dir / f"{stem_type}.wav"
            
            if stem_path.exists():
                # Get duration using librosa or soundfile
                duration = get_audio_duration(str(stem_path))
                stems.append({
                    "type": stem_type,
                    "path": str(stem_path),
                    "duration": duration
                })
        
        if not stems:
            return {
                "success": False,
                "error": f"No stems found in {stems_dir}"
            }
        
        return {
            "success": True,
            "stems": stems,
            "model": model,
            "output_dir": str(stems_dir)
        }
        
    except subprocess.TimeoutExpired:
        return {
            "success": False,
            "error": "Stem separation timed out (>10 minutes)"
        }
    except Exception as e:
        return {
            "success": False,
            "error": f"Stem separation failed: {str(e)}"
        }


def get_audio_duration(audio_path: str) -> Optional[float]:
    """Get audio duration in seconds."""
    try:
        import soundfile as sf
        info = sf.info(audio_path)
        return info.duration
    except ImportError:
        try:
            import librosa
            duration = librosa.get_duration(path=audio_path)
            return duration
        except ImportError:
            # Fallback: use ffprobe if available
            try:
                result = subprocess.run(
                    ["ffprobe", "-v", "quiet", "-show_entries", 
                     "format=duration", "-of", "json", audio_path],
                    capture_output=True,
                    text=True
                )
                if result.returncode == 0:
                    data = json.loads(result.stdout)
                    return float(data["format"]["duration"])
            except:
                pass
    except Exception:
        pass
    return None


def list_available_models() -> dict:
    """List available Demucs models."""
    return {
        "success": True,
        "models": DEMUCS_MODELS
    }


if __name__ == "__main__":
    # Test stem separation
    import sys
    if len(sys.argv) > 2:
        result = separate_stems(sys.argv[1], sys.argv[2])
        print(json.dumps(result, indent=2))
    else:
        print("Usage: python stem_separation.py <input_audio> <output_dir>")
        print("\nAvailable models:")
        for name, info in DEMUCS_MODELS.items():
            print(f"  {name}: {info['description']}")
            print(f"    Stems: {', '.join(info['stems'])}")