Spaces:

Ram6666
/

VocalCleanAI

Sleeping

File size: 12,611 Bytes

87f44dc

import os
import uuid
import shutil
import zipfile
import subprocess
import tempfile
import logging
from pathlib import Path

import gradio as gr
import numpy as np

# ─────────────────────────────────────────────
# Logging
# ─────────────────────────────────────────────

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
)
log = logging.getLogger("vocalclean-gradio")

# ─────────────────────────────────────────────
# Directories
# ─────────────────────────────────────────────

BASE_DIR = Path(__file__).parent
OUTPUTS_DIR = BASE_DIR / "outputs"
ASSETS_DIR = BASE_DIR / "assets"
OUTPUTS_DIR.mkdir(exist_ok=True)
ASSETS_DIR.mkdir(exist_ok=True)

# ─────────────────────────────────────────────
# Constants
# ─────────────────────────────────────────────

MAX_FILE_SIZE_MB = 100
MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
ALLOWED_EXTENSIONS = {".mp3", ".wav", ".m4a", ".flac", ".ogg"}
DEMUCS_MODEL = "htdemucs"

STEM_META = {
    "vocals": {"label": "Vocals", "color": "#4F46E5", "icon": "🎤"},
    "drums": {"label": "Drums", "color": "#EF4444", "icon": "🥁"},
    "bass": {"label": "Bass", "color": "#8B5CF6", "icon": "🎸"},
    "other": {"label": "Other / Melody", "color": "#F59E0B", "icon": "🎹"},
}

# ─────────────────────────────────────────────
# GPU Detection
# ─────────────────────────────────────────────

def detect_device() -> str:
    try:
        import torch
        if torch.cuda.is_available():
            name = torch.cuda.get_device_name(0)
            log.info(f"GPU detected: {name}")
            return "cuda"
    except Exception:
        pass
    log.info("No GPU — running on CPU")
    return "cpu"

DEVICE = detect_device()

# ─────────────────────────────────────────────
# FFmpeg Preprocessing
# ─────────────────────────────────────────────

def preprocess_audio(input_path: Path, output_path: Path) -> Path:
    """Normalise to WAV, stereo, 44.1 kHz before Demucs."""
    cmd = [
        "ffmpeg", "-y",
        "-i", str(input_path),
        "-ac", "2",
        "-ar", "44100",
        "-sample_fmt", "s16",
        "-f", "wav",
        str(output_path),
    ]
    result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
    if result.returncode != 0:
        raise RuntimeError(f"FFmpeg failed: {result.stderr[-400:]}")
    return output_path

# ─────────────────────────────────────────────
# Demucs Separation
# ─────────────────────────────────────────────

def run_demucs(input_path: Path, output_dir: Path, progress_cb=None) -> dict[str, Path]:
    """Run Demucs htdemucs and return a dict of stem_name → wav path."""

    if progress_cb:
        progress_cb(0.1, "Preprocessing audio...")

    preprocessed = input_path.parent / f"pre_{input_path.stem}.wav"
    try:
        preprocess_audio(input_path, preprocessed)
        demucs_input = preprocessed
    except Exception as e:
        log.warning(f"FFmpeg preprocessing skipped: {e}")
        demucs_input = input_path

    if progress_cb:
        progress_cb(0.2, f"Running Hybrid Demucs on {DEVICE.upper()}...")

    cmd = [
        "python3", "-m", "demucs",
        "--device", DEVICE,
        "-n", DEMUCS_MODEL,
        "-o", str(output_dir),
        str(demucs_input),
    ]

    log.info(f"Demucs command: {' '.join(cmd)}")
    proc = subprocess.run(cmd, capture_output=True, text=True, timeout=600)

    if proc.returncode != 0:
        error_msg = (proc.stderr or proc.stdout or "Unknown error")[-600:]
        log.error(f"Demucs failed: {error_msg}")
        raise RuntimeError(f"Demucs separation failed:\n{error_msg}")

    if progress_cb:
        progress_cb(0.85, "Collecting output stems...")

    stems: dict[str, Path] = {}
    for wav in output_dir.rglob("*.wav"):
        stems[wav.stem] = wav

    if not stems:
        raise RuntimeError("No output files were generated by Demucs.")

    # Clean up preprocessed file
    try:
        preprocessed.unlink(missing_ok=True)
    except Exception:
        pass

    log.info(f"Stems found: {list(stems.keys())}")
    return stems

# ─────────────────────────────────────────────
# ZIP Builder
# ─────────────────────────────────────────────

def build_zip(stems: dict[str, Path], job_dir: Path) -> Path:
    zip_path = job_dir / "stems.zip"
    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
        for name, path in stems.items():
            zf.write(path, f"{name}.wav")
    return zip_path

# ─────────────────────────────────────────────
# Main Processing Function
# ─────────────────────────────────────────────

def separate_audio(audio_file, progress=gr.Progress(track_tqdm=True)):
    if audio_file is None:
        return (
            "❌ No file uploaded.",
            None, None, None, None, None,
        )

    input_path = Path(audio_file)
    ext = input_path.suffix.lower()

    if ext not in ALLOWED_EXTENSIONS:
        return (
            f"❌ Unsupported format '{ext}'. Please upload MP3, WAV, M4A, FLAC, or OGG.",
            None, None, None, None, None,
        )

    file_size = input_path.stat().st_size
    if file_size > MAX_FILE_SIZE_BYTES:
        size_mb = file_size / (1024 * 1024)
        return (
            f"❌ File too large ({size_mb:.1f} MB). Maximum allowed size is {MAX_FILE_SIZE_MB} MB.",
            None, None, None, None, None,
        )

    job_id = str(uuid.uuid4())[:8]
    job_dir = OUTPUTS_DIR / job_id
    job_dir.mkdir(parents=True, exist_ok=True)

    log.info(f"Job {job_id}: processing '{input_path.name}' ({file_size / 1024:.0f} KB)")

    try:
        def update_progress(frac: float, msg: str):
            progress(frac, desc=msg)
            log.info(f"Job {job_id}: [{int(frac * 100)}%] {msg}")

        update_progress(0.05, "Starting AI separation — this may take 1–3 minutes on free servers...")

        stems = run_demucs(input_path, job_dir, progress_cb=update_progress)

        update_progress(0.92, "Building download archive...")
        zip_path = build_zip(stems, job_dir)

        update_progress(1.0, "✅ Done!")
        log.info(f"Job {job_id}: complete — {list(stems.keys())}")

        def stem_path(name: str):
            return str(stems[name]) if name in stems else None

        status = f"✅ Separation complete! Stems: {', '.join(stems.keys())}"
        return (
            status,
            stem_path("vocals"),
            stem_path("drums"),
            stem_path("bass"),
            stem_path("other"),
            str(zip_path),
        )

    except Exception as exc:
        log.exception(f"Job {job_id}: error")
        try:
            shutil.rmtree(job_dir, ignore_errors=True)
        except Exception:
            pass
        return (
            f"❌ Processing failed: {exc}",
            None, None, None, None, None,
        )

# ─────────────────────────────────────────────
# Gradio Interface
# ─────────────────────────────────────────────

css = """
#title { text-align: center; margin-bottom: 8px; }
#subtitle { text-align: center; color: #6B7280; margin-bottom: 24px; }
#status-box { border-radius: 10px; }
.stem-row { gap: 16px; }
footer { display: none !important; }
"""

with gr.Blocks(
    title="VocalClean AI — Music Stem Separator",
    theme=gr.themes.Soft(
        primary_hue="indigo",
        secondary_hue="sky",
        font=gr.themes.GoogleFont("Inter"),
    ),
    css=css,
) as demo:

    gr.HTML("""
        <h1 id="title" style="font-size:2rem;font-weight:700;">
            🎵 VocalClean AI
        </h1>
        <p id="subtitle">
            Separate music into individual stems using Hybrid Demucs AI
            &nbsp;|&nbsp; Vocals · Drums · Bass · Other
        </p>
    """)

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### 📤 Upload Audio")
            audio_input = gr.Audio(
                label="Drop your audio file here",
                type="filepath",
                sources=["upload"],
            )
            gr.Markdown(
                "_Supported: MP3, WAV, M4A, FLAC, OGG — up to 100 MB_",
                elem_classes=["upload-hint"],
            )
            run_btn = gr.Button(
                "🚀 Separate Stems",
                variant="primary",
                size="lg",
            )

        with gr.Column(scale=1):
            gr.Markdown("### 📊 Processing Status")
            status_out = gr.Textbox(
                label="Status",
                interactive=False,
                placeholder="Upload a file and click 'Separate Stems' to begin...",
                lines=3,
                elem_id="status-box",
            )
            gr.Markdown(
                "⏱️ _Processing may take **1–3 minutes** on free CPU servers. "
                "GPU environments run significantly faster._"
            )

    gr.Markdown("---")
    gr.Markdown("### 🎧 Stem Results")

    with gr.Row(elem_classes=["stem-row"]):
        with gr.Column():
            gr.Markdown("#### 🎤 Vocals")
            vocals_out = gr.Audio(label="Vocals", type="filepath", interactive=False)

        with gr.Column():
            gr.Markdown("#### 🥁 Drums")
            drums_out = gr.Audio(label="Drums", type="filepath", interactive=False)

    with gr.Row(elem_classes=["stem-row"]):
        with gr.Column():
            gr.Markdown("#### 🎸 Bass")
            bass_out = gr.Audio(label="Bass", type="filepath", interactive=False)

        with gr.Column():
            gr.Markdown("#### 🎹 Other / Melody")
            other_out = gr.Audio(label="Other", type="filepath", interactive=False)

    gr.Markdown("---")
    gr.Markdown("### 📦 Download")

    with gr.Row():
        with gr.Column(scale=1):
            zip_out = gr.File(
                label="Download All Stems (ZIP)",
                interactive=False,
            )
        with gr.Column(scale=1):
            gr.Markdown(
                "Each stem is exported as a high-quality **WAV** file. "
                "The ZIP archive contains all separated tracks."
            )

    gr.Markdown("---")
    gr.Markdown(
        "<center><small>Powered by "
        "[Hybrid Demucs](https://github.com/facebookresearch/demucs) "
        "by Meta Research &nbsp;·&nbsp; "
        "Built with [Gradio](https://gradio.app)</small></center>"
    )

    run_btn.click(
        fn=separate_audio,
        inputs=[audio_input],
        outputs=[status_out, vocals_out, drums_out, bass_out, other_out, zip_out],
        show_progress="full",
    )

# ─────────────────────────────────────────────
# Launch
# ─────────────────────────────────────────────

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=int(os.environ.get("PORT", 7860)),
        share=False,
        show_error=True,
    )