File size: 12,611 Bytes
87f44dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
import os
import uuid
import shutil
import zipfile
import subprocess
import tempfile
import logging
from pathlib import Path

import gradio as gr
import numpy as np

# ─────────────────────────────────────────────
# Logging
# ─────────────────────────────────────────────

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
)
log = logging.getLogger("vocalclean-gradio")

# ─────────────────────────────────────────────
# Directories
# ─────────────────────────────────────────────

BASE_DIR = Path(__file__).parent
OUTPUTS_DIR = BASE_DIR / "outputs"
ASSETS_DIR = BASE_DIR / "assets"
OUTPUTS_DIR.mkdir(exist_ok=True)
ASSETS_DIR.mkdir(exist_ok=True)

# ─────────────────────────────────────────────
# Constants
# ─────────────────────────────────────────────

MAX_FILE_SIZE_MB = 100
MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
ALLOWED_EXTENSIONS = {".mp3", ".wav", ".m4a", ".flac", ".ogg"}
DEMUCS_MODEL = "htdemucs"

STEM_META = {
    "vocals": {"label": "Vocals", "color": "#4F46E5", "icon": "🎀"},
    "drums": {"label": "Drums", "color": "#EF4444", "icon": "πŸ₯"},
    "bass": {"label": "Bass", "color": "#8B5CF6", "icon": "🎸"},
    "other": {"label": "Other / Melody", "color": "#F59E0B", "icon": "🎹"},
}

# ─────────────────────────────────────────────
# GPU Detection
# ─────────────────────────────────────────────

def detect_device() -> str:
    try:
        import torch
        if torch.cuda.is_available():
            name = torch.cuda.get_device_name(0)
            log.info(f"GPU detected: {name}")
            return "cuda"
    except Exception:
        pass
    log.info("No GPU β€” running on CPU")
    return "cpu"

DEVICE = detect_device()

# ─────────────────────────────────────────────
# FFmpeg Preprocessing
# ─────────────────────────────────────────────

def preprocess_audio(input_path: Path, output_path: Path) -> Path:
    """Normalise to WAV, stereo, 44.1 kHz before Demucs."""
    cmd = [
        "ffmpeg", "-y",
        "-i", str(input_path),
        "-ac", "2",
        "-ar", "44100",
        "-sample_fmt", "s16",
        "-f", "wav",
        str(output_path),
    ]
    result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
    if result.returncode != 0:
        raise RuntimeError(f"FFmpeg failed: {result.stderr[-400:]}")
    return output_path

# ─────────────────────────────────────────────
# Demucs Separation
# ─────────────────────────────────────────────

def run_demucs(input_path: Path, output_dir: Path, progress_cb=None) -> dict[str, Path]:
    """Run Demucs htdemucs and return a dict of stem_name β†’ wav path."""

    if progress_cb:
        progress_cb(0.1, "Preprocessing audio...")

    preprocessed = input_path.parent / f"pre_{input_path.stem}.wav"
    try:
        preprocess_audio(input_path, preprocessed)
        demucs_input = preprocessed
    except Exception as e:
        log.warning(f"FFmpeg preprocessing skipped: {e}")
        demucs_input = input_path

    if progress_cb:
        progress_cb(0.2, f"Running Hybrid Demucs on {DEVICE.upper()}...")

    cmd = [
        "python3", "-m", "demucs",
        "--device", DEVICE,
        "-n", DEMUCS_MODEL,
        "-o", str(output_dir),
        str(demucs_input),
    ]

    log.info(f"Demucs command: {' '.join(cmd)}")
    proc = subprocess.run(cmd, capture_output=True, text=True, timeout=600)

    if proc.returncode != 0:
        error_msg = (proc.stderr or proc.stdout or "Unknown error")[-600:]
        log.error(f"Demucs failed: {error_msg}")
        raise RuntimeError(f"Demucs separation failed:\n{error_msg}")

    if progress_cb:
        progress_cb(0.85, "Collecting output stems...")

    stems: dict[str, Path] = {}
    for wav in output_dir.rglob("*.wav"):
        stems[wav.stem] = wav

    if not stems:
        raise RuntimeError("No output files were generated by Demucs.")

    # Clean up preprocessed file
    try:
        preprocessed.unlink(missing_ok=True)
    except Exception:
        pass

    log.info(f"Stems found: {list(stems.keys())}")
    return stems

# ─────────────────────────────────────────────
# ZIP Builder
# ─────────────────────────────────────────────

def build_zip(stems: dict[str, Path], job_dir: Path) -> Path:
    zip_path = job_dir / "stems.zip"
    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
        for name, path in stems.items():
            zf.write(path, f"{name}.wav")
    return zip_path

# ─────────────────────────────────────────────
# Main Processing Function
# ─────────────────────────────────────────────

def separate_audio(audio_file, progress=gr.Progress(track_tqdm=True)):
    if audio_file is None:
        return (
            "❌ No file uploaded.",
            None, None, None, None, None,
        )

    input_path = Path(audio_file)
    ext = input_path.suffix.lower()

    if ext not in ALLOWED_EXTENSIONS:
        return (
            f"❌ Unsupported format '{ext}'. Please upload MP3, WAV, M4A, FLAC, or OGG.",
            None, None, None, None, None,
        )

    file_size = input_path.stat().st_size
    if file_size > MAX_FILE_SIZE_BYTES:
        size_mb = file_size / (1024 * 1024)
        return (
            f"❌ File too large ({size_mb:.1f} MB). Maximum allowed size is {MAX_FILE_SIZE_MB} MB.",
            None, None, None, None, None,
        )

    job_id = str(uuid.uuid4())[:8]
    job_dir = OUTPUTS_DIR / job_id
    job_dir.mkdir(parents=True, exist_ok=True)

    log.info(f"Job {job_id}: processing '{input_path.name}' ({file_size / 1024:.0f} KB)")

    try:
        def update_progress(frac: float, msg: str):
            progress(frac, desc=msg)
            log.info(f"Job {job_id}: [{int(frac * 100)}%] {msg}")

        update_progress(0.05, "Starting AI separation β€” this may take 1–3 minutes on free servers...")

        stems = run_demucs(input_path, job_dir, progress_cb=update_progress)

        update_progress(0.92, "Building download archive...")
        zip_path = build_zip(stems, job_dir)

        update_progress(1.0, "βœ… Done!")
        log.info(f"Job {job_id}: complete β€” {list(stems.keys())}")

        def stem_path(name: str):
            return str(stems[name]) if name in stems else None

        status = f"βœ… Separation complete! Stems: {', '.join(stems.keys())}"
        return (
            status,
            stem_path("vocals"),
            stem_path("drums"),
            stem_path("bass"),
            stem_path("other"),
            str(zip_path),
        )

    except Exception as exc:
        log.exception(f"Job {job_id}: error")
        try:
            shutil.rmtree(job_dir, ignore_errors=True)
        except Exception:
            pass
        return (
            f"❌ Processing failed: {exc}",
            None, None, None, None, None,
        )

# ─────────────────────────────────────────────
# Gradio Interface
# ─────────────────────────────────────────────

css = """
#title { text-align: center; margin-bottom: 8px; }
#subtitle { text-align: center; color: #6B7280; margin-bottom: 24px; }
#status-box { border-radius: 10px; }
.stem-row { gap: 16px; }
footer { display: none !important; }
"""

with gr.Blocks(
    title="VocalClean AI β€” Music Stem Separator",
    theme=gr.themes.Soft(
        primary_hue="indigo",
        secondary_hue="sky",
        font=gr.themes.GoogleFont("Inter"),
    ),
    css=css,
) as demo:

    gr.HTML("""
        <h1 id="title" style="font-size:2rem;font-weight:700;">
            🎡 VocalClean AI
        </h1>
        <p id="subtitle">
            Separate music into individual stems using Hybrid Demucs AI
            &nbsp;|&nbsp; Vocals Β· Drums Β· Bass Β· Other
        </p>
    """)

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### πŸ“€ Upload Audio")
            audio_input = gr.Audio(
                label="Drop your audio file here",
                type="filepath",
                sources=["upload"],
            )
            gr.Markdown(
                "_Supported: MP3, WAV, M4A, FLAC, OGG β€” up to 100 MB_",
                elem_classes=["upload-hint"],
            )
            run_btn = gr.Button(
                "πŸš€ Separate Stems",
                variant="primary",
                size="lg",
            )

        with gr.Column(scale=1):
            gr.Markdown("### πŸ“Š Processing Status")
            status_out = gr.Textbox(
                label="Status",
                interactive=False,
                placeholder="Upload a file and click 'Separate Stems' to begin...",
                lines=3,
                elem_id="status-box",
            )
            gr.Markdown(
                "⏱️ _Processing may take **1–3 minutes** on free CPU servers. "
                "GPU environments run significantly faster._"
            )

    gr.Markdown("---")
    gr.Markdown("### 🎧 Stem Results")

    with gr.Row(elem_classes=["stem-row"]):
        with gr.Column():
            gr.Markdown("#### 🎀 Vocals")
            vocals_out = gr.Audio(label="Vocals", type="filepath", interactive=False)

        with gr.Column():
            gr.Markdown("#### πŸ₯ Drums")
            drums_out = gr.Audio(label="Drums", type="filepath", interactive=False)

    with gr.Row(elem_classes=["stem-row"]):
        with gr.Column():
            gr.Markdown("#### 🎸 Bass")
            bass_out = gr.Audio(label="Bass", type="filepath", interactive=False)

        with gr.Column():
            gr.Markdown("#### 🎹 Other / Melody")
            other_out = gr.Audio(label="Other", type="filepath", interactive=False)

    gr.Markdown("---")
    gr.Markdown("### πŸ“¦ Download")

    with gr.Row():
        with gr.Column(scale=1):
            zip_out = gr.File(
                label="Download All Stems (ZIP)",
                interactive=False,
            )
        with gr.Column(scale=1):
            gr.Markdown(
                "Each stem is exported as a high-quality **WAV** file. "
                "The ZIP archive contains all separated tracks."
            )

    gr.Markdown("---")
    gr.Markdown(
        "<center><small>Powered by "
        "[Hybrid Demucs](https://github.com/facebookresearch/demucs) "
        "by Meta Research &nbsp;Β·&nbsp; "
        "Built with [Gradio](https://gradio.app)</small></center>"
    )

    run_btn.click(
        fn=separate_audio,
        inputs=[audio_input],
        outputs=[status_out, vocals_out, drums_out, bass_out, other_out, zip_out],
        show_progress="full",
    )

# ─────────────────────────────────────────────
# Launch
# ─────────────────────────────────────────────

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=int(os.environ.get("PORT", 7860)),
        share=False,
        show_error=True,
    )