import os import shutil import subprocess import tempfile from pathlib import Path import gradio as gr import torchaudio import torch import soundfile as sf # ================================ # CONFIG # ================================ DEVICE = "cuda" if torch.cuda.is_available() else "cpu" MODEL = "htdemucs" # best quality vocal separation def resave(src, dst): wav, sr = torchaudio.load(str(src)) data = wav.numpy().T sf.write(dst, data, sr) # ================================ # SPLIT FUNCTION # ================================ def split_vocals(audio_file): if audio_file is None: return None, None, "❌ Please upload an audio file." work_dir = tempfile.mkdtemp(prefix="demucs_") try: print(f"[Demucs] Input: {audio_file}") print(f"[Demucs] Device: {DEVICE}") # Run Demucs result = subprocess.run( [ "python", "-m", "demucs", "--two-stems", "vocals", # only split vocals vs everything else "-n", MODEL, "-o", work_dir, audio_file, ], capture_output=True, text=True, ) if result.returncode != 0: error = result.stderr[-1000:] # last 1000 chars to avoid huge dumps print(f"[Demucs] Error:\n{error}") return None, None, f"❌ Demucs failed:\n{error}" # Find output files track_name = Path(audio_file).stem demucs_out = Path(work_dir) / MODEL / track_name vocals_src = demucs_out / "vocals.wav" background_src = demucs_out / "no_vocals.wav" if not vocals_src.exists() or not background_src.exists(): return None, None, "❌ Demucs ran but output files not found." # Copy to stable paths vocals_dst = str(Path(work_dir) / "vocals.wav") background_dst = str(Path(work_dir) / "background.wav") resave(vocals_src, vocals_dst) resave(background_src, background_dst) print(f"[Demucs] ✅ Done. Vocals: {vocals_dst}") return vocals_dst, background_dst, "✅ Split complete!" except Exception as e: return None, None, f"❌ Exception: {str(e)}" # ================================ # GRADIO UI # ================================ with gr.Blocks(title="CleanSong AI — Demucs Splitter") as demo: gr.Markdown("# 🎸 CleanSong AI — Vocal Splitter") gr.Markdown( "Upload a song → get the isolated **vocals** and **background** tracks back separately. " "Powered by Demucs `htdemucs` model." ) with gr.Row(): with gr.Column(): audio_input = gr.Audio( label="Upload Song", type="filepath", sources=["upload"], ) split_btn = gr.Button("🎸 Split Vocals", variant="primary", size="lg") with gr.Column(): status = gr.Textbox(label="Status", interactive=False) vocals_out = gr.Audio(label="Vocals Only", type="filepath") background_out = gr.Audio(label="Background / Instrumental", type="filepath") split_btn.click( fn=split_vocals, inputs=[audio_input], outputs=[vocals_out, background_out, status], ) gr.Markdown( """ --- **Part of the CleanSong AI pipeline.** - Upload your song here first to get isolated vocals - Feed the vocals into the Whisper Transcriber Space - The background track is kept intact for the final remix """ ) if __name__ == "__main__": demo.launch()