Spaces:
Sleeping
Sleeping
| import torch | |
| import torchaudio | |
| from demucs.pretrained import get_model | |
| from demucs.apply import apply_model | |
| import tempfile | |
| import os | |
| import numpy as np | |
| import librosa | |
| class DemucsProcessor: | |
| def __init__(self, model_name="htdemucs"): | |
| self.model_name = model_name | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"Using device: {self.device}") | |
| self.model = get_model(model_name) | |
| self.model.to(self.device) | |
| self.sources = self.model.sources | |
| print(f"Model loaded successfully on {self.device}") | |
| print(f"Available sources: {self.sources}") | |
| def load_audio(self, file_path): | |
| try: | |
| waveform, sample_rate = torchaudio.load(file_path) | |
| print(f"Audio loaded - Shape: {waveform.shape}, Sample rate: {sample_rate}") | |
| # Handle mono input | |
| if waveform.dim() == 1: | |
| waveform = waveform.unsqueeze(0) | |
| if waveform.shape[0] == 1: | |
| waveform = waveform.repeat(2, 1) | |
| return waveform, sample_rate | |
| except Exception as e: | |
| print(f"Error loading with torchaudio: {e}") | |
| try: | |
| # Fallback to librosa | |
| audio, sr = librosa.load(file_path, sr=44100, mono=False) | |
| if audio.ndim == 1: | |
| audio = np.vstack([audio, audio]) | |
| waveform = torch.from_numpy(audio) | |
| return waveform, sr | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to load audio: {str(e)}") | |
| def separate_vocals(self, audio_path): | |
| try: | |
| # Load audio | |
| waveform, sample_rate = self.load_audio(audio_path) | |
| print(f"Audio loaded - Shape: {waveform.shape}, Sample rate: {sample_rate}") | |
| # Ensure correct shape and device | |
| waveform = waveform.to(self.device) | |
| # Add batch dimension | |
| waveform = waveform.unsqueeze(0) | |
| # Process the entire audio at once instead of segments | |
| with torch.no_grad(): | |
| sources = apply_model(self.model, waveform) | |
| # Get vocals | |
| vocals_idx = self.sources.index('vocals') | |
| vocals = sources[:, vocals_idx] | |
| # Save to temporary file | |
| with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp: | |
| torchaudio.save( | |
| tmp.name, | |
| vocals.squeeze(0).cpu(), | |
| sample_rate, | |
| format='wav' | |
| ) | |
| return tmp.name | |
| except Exception as e: | |
| raise RuntimeError(f"Separation failed: {str(e)}") | |
| def configure_model(): | |
| return { | |
| "segment_size": 16 if torch.cuda.is_available() else 4, # Increased from 8 | |
| "overlap": 0.1, | |
| "sample_rate": 44100, | |
| "channels": 2 | |
| } | |
| def check_dependencies(): | |
| try: | |
| import torch | |
| import torchaudio | |
| import librosa | |
| import demucs | |
| from demucs.pretrained import get_model | |
| # Test audio loading | |
| test_audio = np.random.random(44100) | |
| test_tensor = torch.from_numpy(test_audio) | |
| print("All required packages are installed correctly") | |
| return True | |
| except ImportError as e: | |
| print(f"Missing dependency: {str(e)}") | |
| return False | |