| import gradio as gr |
| import torch |
| import tempfile |
| import soundfile as sf |
| import librosa |
| import numpy as np |
|
|
| from demucs.pretrained import get_model |
| from demucs.apply import apply_model |
|
|
| |
| model = get_model("htdemucs") |
| model.cpu() |
| model.eval() |
|
|
|
|
| def remove_vocals(audio_file): |
|
|
| |
| audio, sr = librosa.load(audio_file, sr=44100, mono=False) |
|
|
| if audio.ndim == 1: |
| audio = np.expand_dims(audio, axis=0) |
|
|
| audio_tensor = torch.tensor(audio).unsqueeze(0) |
|
|
| with torch.no_grad(): |
| sources = apply_model(model, audio_tensor) |
|
|
| |
| |
| |
| |
| |
|
|
| drums = sources[0][0].cpu().numpy() |
| bass = sources[0][1].cpu().numpy() |
| other = sources[0][2].cpu().numpy() |
|
|
| |
| background = drums + bass + other |
|
|
| output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name |
|
|
| sf.write(output_file, background.T, sr) |
|
|
| return output_file |
|
|
|
|
| interface = gr.Interface( |
| fn=remove_vocals, |
| inputs=gr.Audio(type="filepath", label="Upload Audio"), |
| outputs=gr.Audio(label="Background Music + Expressions"), |
| title="Vocal Remover (Keep Music + Expressions)" |
| ) |
|
|
| interface.launch() |