File size: 3,631 Bytes
d1a97be
 
 
 
 
 
 
 
 
 
a0afde1
d1a97be
 
 
 
 
 
 
 
a0afde1
 
 
 
 
 
d1a97be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0afde1
 
d1a97be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import os
import shutil
import subprocess
import tempfile
from pathlib import Path

import gradio as gr
import torchaudio
import torch

import soundfile as sf

# ================================
# CONFIG
# ================================
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MODEL = "htdemucs"  # best quality vocal separation


def resave(src, dst):
    wav, sr = torchaudio.load(str(src))
    data = wav.numpy().T
    sf.write(dst, data, sr)


# ================================
# SPLIT FUNCTION
# ================================
def split_vocals(audio_file):
    if audio_file is None:
        return None, None, "❌ Please upload an audio file."

    work_dir = tempfile.mkdtemp(prefix="demucs_")

    try:
        print(f"[Demucs] Input: {audio_file}")
        print(f"[Demucs] Device: {DEVICE}")

        # Run Demucs
        result = subprocess.run(
            [
                "python", "-m", "demucs",
                "--two-stems", "vocals",  # only split vocals vs everything else
                "-n", MODEL,
                "-o", work_dir,
                audio_file,
            ],
            capture_output=True,
            text=True,
        )

        if result.returncode != 0:
            error = result.stderr[-1000:]  # last 1000 chars to avoid huge dumps
            print(f"[Demucs] Error:\n{error}")
            return None, None, f"❌ Demucs failed:\n{error}"

        # Find output files
        track_name = Path(audio_file).stem
        demucs_out = Path(work_dir) / MODEL / track_name

        vocals_src = demucs_out / "vocals.wav"
        background_src = demucs_out / "no_vocals.wav"

        if not vocals_src.exists() or not background_src.exists():
            return None, None, "❌ Demucs ran but output files not found."

        # Copy to stable paths
        vocals_dst = str(Path(work_dir) / "vocals.wav")
        background_dst = str(Path(work_dir) / "background.wav")

        resave(vocals_src, vocals_dst)
        resave(background_src, background_dst)

        print(f"[Demucs] βœ… Done. Vocals: {vocals_dst}")
        return vocals_dst, background_dst, "βœ… Split complete!"

    except Exception as e:
        return None, None, f"❌ Exception: {str(e)}"


# ================================
# GRADIO UI
# ================================
with gr.Blocks(title="CleanSong AI β€” Demucs Splitter") as demo:

    gr.Markdown("# 🎸 CleanSong AI β€” Vocal Splitter")
    gr.Markdown(
        "Upload a song β†’ get the isolated **vocals** and **background** tracks back separately. "
        "Powered by Demucs `htdemucs` model."
    )

    with gr.Row():
        with gr.Column():
            audio_input = gr.Audio(
                label="Upload Song",
                type="filepath",
                sources=["upload"],
            )
            split_btn = gr.Button("🎸 Split Vocals", variant="primary", size="lg")

        with gr.Column():
            status = gr.Textbox(label="Status", interactive=False)
            vocals_out = gr.Audio(label="Vocals Only", type="filepath")
            background_out = gr.Audio(label="Background / Instrumental", type="filepath")

    split_btn.click(
        fn=split_vocals,
        inputs=[audio_input],
        outputs=[vocals_out, background_out, status],
    )

    gr.Markdown(
        """
        ---
        **Part of the CleanSong AI pipeline.**
        - Upload your song here first to get isolated vocals
        - Feed the vocals into the Whisper Transcriber Space
        - The background track is kept intact for the final remix
        """
    )

if __name__ == "__main__":
    demo.launch()