Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import argparse | |
| import os | |
| import subprocess | |
| from pathlib import Path | |
| from typing import Tuple, List, Dict, Optional | |
| class Error(Exception): | |
| pass | |
| def run_command_with_streaming(cmd, description="Processing"): | |
| """Run command with real-time output streaming""" | |
| print(f"π΅ {description}...") | |
| print(f"Command: {' '.join(str(c) for c in cmd)}") | |
| print("β" * 60) | |
| process = subprocess.Popen( | |
| cmd, | |
| stdout=subprocess.PIPE, | |
| stderr=subprocess.STDOUT, | |
| text=True, | |
| universal_newlines=True, | |
| ) | |
| # Stream output in real-time | |
| return_code = None | |
| while return_code is None: | |
| if process.stdout: | |
| line = process.stdout.readline() | |
| if line: | |
| print(line.strip()) | |
| return_code = process.poll() | |
| if return_code != 0: | |
| error_output = process.stderr.read() if process.stderr else "" | |
| raise RuntimeError( | |
| f"{description} failed (code {return_code}):\n{error_output}" | |
| ) | |
| print("β" * 60) | |
| print(f"β {description} completed successfully!") | |
| return return_code | |
| def separate_audio( | |
| audio_path: str, | |
| output_path: Optional[str] = None, | |
| output_format: str = "wav", | |
| model: str = "hdemucs_mmi", | |
| device: Optional[str] = None, | |
| segment: Optional[int] = None, | |
| jobs: int = 1, | |
| ) -> Tuple[str, str, str, str]: | |
| """ | |
| Separate audio into vocals, drums, bass, and other stems using Demucs. | |
| This function uses the Demucs neural network model to separate a mixed audio file | |
| into individual instrument stems. It's particularly effective for separating | |
| vocals from instrumental backing tracks. | |
| Args: | |
| audio_path: Path to the input audio file or URL (supports common formats: WAV, MP3, FLAC, M4A) | |
| output_path: Directory to save the separated stems (default: 'output' directory) | |
| output_format: Output format for separated stems ('wav' or 'mp3', default: 'wav') | |
| model: Demucs model to use (default: 'hdemucs_mmi') | |
| device: Device to use for processing (default: cuda if available else cpu) | |
| segment: Set split size of each chunk to save memory (default: None) | |
| jobs: Number of parallel jobs (default: 1) | |
| Returns: | |
| tuple[str, str, str, str]: Paths to the separated audio files in order: | |
| - vocals: Isolated vocal track | |
| - drums: Isolated drum/percussion track | |
| - bass: Isolated bass track | |
| - other: Remaining instruments (guitars, keyboards, etc.) | |
| Examples: | |
| - Extract vocals for karaoke creation | |
| - Isolate drums for remixing | |
| - Separate bass for transcription | |
| - Create instrumental versions by combining drums+bass+other | |
| Note: | |
| Uses the hdemucs_mmi model which is optimized for high-quality separation | |
| Processing time depends on audio length and system performance | |
| Output files are saved in WAV format for maximum quality | |
| """ | |
| try: | |
| # Prepare the output directory | |
| if not output_path: | |
| output_path = "output" | |
| output_dir = os.path.join(output_path, "separated") | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Build Demucs separation command with all parameters | |
| cmd = [ | |
| "python", | |
| "-m", | |
| "demucs.separate", | |
| "--out", | |
| output_dir, | |
| "--name", | |
| model, | |
| "--jobs", | |
| str(jobs), | |
| ] | |
| # Add optional parameters if provided | |
| if device: | |
| cmd.extend(["--device", device]) | |
| if segment: | |
| cmd.extend(["--segment", str(segment)]) | |
| # Add MP3 output if requested | |
| if output_format.lower() == "mp3": | |
| cmd.extend(["--mp3", "--mp3-bitrate", "192"]) | |
| cmd.append(audio_path) | |
| # Run Demucs separation with real-time output | |
| run_command_with_streaming(cmd, "Demucs stem separation") | |
| # Find the separated files | |
| track_name = Path(audio_path).stem | |
| model_dir = os.path.join(output_dir, model, track_name) | |
| # Original WAV files from Demucs | |
| vocals_path = os.path.join(model_dir, "vocals.wav") | |
| drums_path = os.path.join(model_dir, "drums.wav") | |
| bass_path = os.path.join(model_dir, "bass.wav") | |
| other_path = os.path.join(model_dir, "other.wav") | |
| # If MP3 output is requested, set the proper file names | |
| if output_format.lower() == "mp3": | |
| vocals_path = vocals_path.replace(".wav", ".mp3") | |
| drums_path = drums_path.replace(".wav", ".mp3") | |
| bass_path = bass_path.replace(".wav", ".mp3") | |
| other_path = other_path.replace(".wav", ".mp3") | |
| # Verify all files exist | |
| for file_path in [vocals_path, drums_path, bass_path, other_path]: | |
| if not os.path.exists(file_path): | |
| raise Error(f"Separated file not found: {file_path}") | |
| return vocals_path, drums_path, bass_path, other_path | |
| except Exception as e: | |
| raise Error(f"Error processing audio: {str(e)}") | |
| def extract_selected_stems( | |
| audio_path: str, | |
| stems_to_extract: List[str], | |
| output_path: Optional[str] = None, | |
| output_format: str = "wav", | |
| ) -> Dict[str, str]: | |
| """ | |
| Extract only specific stems from an audio file. | |
| This function allows selective extraction of specific stems rather than all four stems, | |
| which can save processing time and storage space when only certain elements are needed. | |
| Args: | |
| audio_path: Path to the input audio file or URL (supports common formats: WAV, MP3, FLAC, M4A) | |
| stems_to_extract: List of stems to extract. Valid options: ['vocals', 'drums', 'bass', 'other'] | |
| output_path: Directory to save the selected stems (default: 'output' directory) | |
| output_format: Output format for extracted stems ('wav' or 'mp3', default: 'wav') | |
| Returns: | |
| dict[str, str]: Dictionary mapping stem names to their file paths | |
| Examples: | |
| - extract_selected_stems('song.mp3', ['vocals', 'drums']): Extract only vocals and drums | |
| - extract_selected_stems('song.mp3', ['vocals']): Extract only vocals for karaoke | |
| - extract_selected_stems('song.mp3', ['bass', 'drums']): Extract rhythm section | |
| Note: | |
| Valid stem names are: 'vocals', 'drums', 'bass', 'other' | |
| Invalid stem names will be ignored with a warning | |
| Uses the same high-quality Demucs model as separate_audio | |
| """ | |
| # Validate stem names | |
| valid_stems = ["vocals", "drums", "bass", "other"] | |
| invalid_stems = [stem for stem in stems_to_extract if stem not in valid_stems] | |
| if invalid_stems: | |
| print(f"Warning: Invalid stem names will be ignored: {invalid_stems}") | |
| # Filter to only valid stems | |
| valid_stems_to_extract = [stem for stem in stems_to_extract if stem in valid_stems] | |
| if not valid_stems_to_extract: | |
| raise ValueError("No valid stems specified for extraction") | |
| # First, separate all stems | |
| all_stems = separate_audio(audio_path, output_path, output_format) | |
| vocals_path, drums_path, bass_path, other_path = all_stems | |
| # Create mapping of all stems | |
| stem_mapping = { | |
| "vocals": vocals_path, | |
| "drums": drums_path, | |
| "bass": bass_path, | |
| "other": other_path, | |
| } | |
| # Return only requested stems | |
| result = {} | |
| for stem in valid_stems_to_extract: | |
| result[stem] = stem_mapping[stem] | |
| return result | |
| def extract_vocal_non_vocal( | |
| audio_path: str, | |
| output_path: str = "output", | |
| model: str = "hdemucs_mmi", | |
| output_format: str = "wav", | |
| device: Optional[str] = None, | |
| segment: Optional[int] = None, | |
| jobs: int = 1, | |
| ) -> Tuple[str, str]: | |
| """ | |
| Extract vocals and non-vocals (instrumental) stems from an audio file. | |
| This function provides a simple interface to separate audio into vocal and | |
| non-vocal components, which is useful for karaoke creation, vocal isolation, | |
| or instrumental extraction. | |
| Args: | |
| audio_path: Path to the input audio file or URL (supports common formats: WAV, MP3, FLAC, M4A) | |
| output_path: Directory to save the separated stems (default: 'output' directory) | |
| model: Demucs model to use (default: 'hdemucs_mmi') | |
| output_format: Output format for stems ('wav' or 'mp3', default: 'wav') | |
| device: Device to use for processing (default: cuda if available else cpu) | |
| segment: Set split size of each chunk to save memory (default: None) | |
| jobs: Number of parallel jobs (default: 1) | |
| Returns: | |
| tuple[str, str]: Paths to (vocals_file, non_vocals_file) | |
| - vocals_file: Path to the isolated vocal track | |
| - non_vocals_file: Path to the combined instrumental track (drums + bass + other) | |
| Examples: | |
| - extract_vocal_non_vocal('song.mp3'): Separate into vocals and instrumental | |
| - extract_vocal_non_vocal('song.wav', 'karaoke'): Create karaoke version | |
| Note: | |
| The non-vocals track combines drums, bass, and other stems into a single instrumental | |
| Uses the same high-quality Demucs model as separate_audio | |
| Non-vocals track is automatically mixed and normalized | |
| """ | |
| try: | |
| output_dir = os.path.join(output_path, "separated") | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Build Demucs separation command with all parameters | |
| cmd = [ | |
| "python", | |
| "-m", | |
| "demucs.separate", | |
| "--out", | |
| output_dir, | |
| "--name", | |
| model, | |
| "--jobs", | |
| str(jobs), | |
| "--two-stems", | |
| "vocals", | |
| ] | |
| # Add optional parameters if provided | |
| if device: | |
| cmd.extend(["--device", device]) | |
| if segment: | |
| cmd.extend(["--segment", str(segment)]) | |
| # Add MP3 output if requested | |
| if output_format.lower() == "mp3": | |
| cmd.extend(["--mp3", "--mp3-bitrate", "192"]) | |
| cmd.append(audio_path) | |
| # Run Demucs separation with real-time output | |
| run_command_with_streaming(cmd, "Demucs stem separation") | |
| # Find the separated files | |
| track_name = Path(audio_path).stem | |
| model_dir = os.path.join(output_dir, model, track_name) | |
| # Original WAV files from Demucs | |
| vocals_path = os.path.join(model_dir, "vocals.wav") | |
| non_vocals_path = os.path.join(model_dir, "no_vocals.wav") | |
| # If MP3 output is requested, set the proper file names | |
| if output_format.lower() == "mp3": | |
| vocals_path = vocals_path.replace(".wav", ".mp3") | |
| non_vocals_path = non_vocals_path.replace(".wav", ".mp3") | |
| # Verify all files exist | |
| for file_path in [vocals_path, non_vocals_path]: | |
| if not os.path.exists(file_path): | |
| raise Error(f"Separated file not found: {file_path}") | |
| return vocals_path, non_vocals_path | |
| except Exception as e: | |
| raise RuntimeError(f"Error creating non-vocals track: {str(e)}") | |
| def create_karaoke_track( | |
| audio_path: str, output_path: Optional[str] = None, output_format: str = "wav" | |
| ) -> str: | |
| """ | |
| Create a karaoke (instrumental) track by removing vocals from an audio file. | |
| This is a convenience function that extracts the instrumental (non-vocal) portion | |
| of a song, creating a karaoke-ready backing track. | |
| Args: | |
| audio_path: Path to the input audio file or URL (supports common formats: WAV, MP3, FLAC, M4A) | |
| output_path: Directory to save the karaoke track (default: 'output' directory) | |
| output_format: Output format for karaoke track ('wav' or 'mp3', default: 'wav') | |
| Returns: | |
| Path to the karaoke (instrumental) audio file | |
| Examples: | |
| - create_karaoke_track('song.mp3'): Create karaoke version | |
| - create_karaoke_track('song.wav', 'karaoke_tracks'): Save to specific folder | |
| Note: | |
| Uses the same high-quality Demucs model as separate_audio | |
| Combines drums, bass, and other stems into instrumental track | |
| Automatically normalized for consistent volume | |
| """ | |
| vocals_path, instrumental_path = extract_vocal_non_vocal( | |
| audio_path, output_path, output_format | |
| ) | |
| return instrumental_path | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser( | |
| description="Separate audio into stems using Demucs" | |
| ) | |
| subparsers = parser.add_subparsers(dest="command", help="Available commands") | |
| # Original separate command | |
| separate_parser = subparsers.add_parser( | |
| "separate", help="Separate into all four stems" | |
| ) | |
| separate_parser.add_argument("audio_path", help="Path to the input audio file") | |
| separate_parser.add_argument( | |
| "--output-dir", help="Directory to save separated stems (default: output)" | |
| ) | |
| separate_parser.add_argument( | |
| "--format", | |
| default="wav", | |
| choices=["wav", "mp3"], | |
| help="Output format (default: wav)", | |
| ) | |
| separate_parser.add_argument( | |
| "--model", | |
| default="htdemucs", | |
| help="Demucs model to use (default: htdemucs)", | |
| ) | |
| separate_parser.add_argument( | |
| "--device", | |
| help="Device to use for processing (default: cuda if available else cpu)", | |
| ) | |
| separate_parser.add_argument( | |
| "--segment", | |
| type=float, | |
| help="Set split size of each chunk to save memory", | |
| ) | |
| separate_parser.add_argument( | |
| "--jobs", | |
| type=int, | |
| default=1, | |
| help="Number of parallel jobs (default: 1)", | |
| ) | |
| # New selective stems command | |
| select_parser = subparsers.add_parser("select", help="Extract specific stems only") | |
| select_parser.add_argument("audio_path", help="Path to the input audio file") | |
| select_parser.add_argument( | |
| "stems", | |
| nargs="+", | |
| choices=["vocals", "drums", "bass", "other"], | |
| help="Stems to extract (choose from: vocals, drums, bass, other)", | |
| ) | |
| select_parser.add_argument( | |
| "--output-dir", help="Directory to save separated stems (default: output)" | |
| ) | |
| select_parser.add_argument( | |
| "--format", | |
| default="wav", | |
| choices=["wav", "mp3"], | |
| help="Output format (default: wav)", | |
| ) | |
| # New vocal/non-vocal command | |
| vocal_parser = subparsers.add_parser( | |
| "vocal-nonvocal", help="Extract vocals and instrumental only" | |
| ) | |
| vocal_parser.add_argument("audio_path", help="Path to the input audio file") | |
| vocal_parser.add_argument( | |
| "--output-dir", help="Directory to save separated stems (default: output)" | |
| ) | |
| vocal_parser.add_argument( | |
| "--format", | |
| default="wav", | |
| choices=["wav", "mp3"], | |
| help="Output format (default: wav)", | |
| ) | |
| # New karaoke command | |
| karaoke_parser = subparsers.add_parser( | |
| "karaoke", help="Create karaoke (instrumental) track" | |
| ) | |
| karaoke_parser.add_argument("audio_path", help="Path to the input audio file") | |
| karaoke_parser.add_argument( | |
| "--output-dir", help="Directory to save karaoke track (default: output)" | |
| ) | |
| karaoke_parser.add_argument( | |
| "--format", | |
| default="wav", | |
| choices=["wav", "mp3"], | |
| help="Output format (default: wav)", | |
| ) | |
| args = parser.parse_args() | |
| if not args.command: | |
| parser.print_help() | |
| exit(1) | |
| try: | |
| if args.command == "separate": | |
| vocals, drums, bass, other = separate_audio( | |
| args.audio_path, | |
| args.output_dir, | |
| args.format, | |
| args.model, | |
| args.device, | |
| args.segment, | |
| args.jobs, | |
| ) | |
| print(f"Vocals: {vocals}") | |
| print(f"Drums: {drums}") | |
| print(f"Bass: {bass}") | |
| print(f"Other: {other}") | |
| elif args.command == "select": | |
| selected_stems = extract_selected_stems( | |
| args.audio_path, args.stems, args.output_dir, args.format | |
| ) | |
| for stem, path in selected_stems.items(): | |
| print(f"{stem.capitalize()}: {path}") | |
| elif args.command == "vocal-nonvocal": | |
| vocals_path, non_vocals_path = extract_vocal_non_vocal( | |
| args.audio_path, args.output_dir, args.format | |
| ) | |
| print(f"Vocals: {vocals_path}") | |
| print(f"Non-vocals (Instrumental): {non_vocals_path}") | |
| elif args.command == "karaoke": | |
| karaoke_path = create_karaoke_track( | |
| args.audio_path, args.output_dir, args.format | |
| ) | |
| print(f"Karaoke track: {karaoke_path}") | |
| except Exception as e: | |
| print(f"Error: {e}") | |
| exit(1) | |