Spaces:

Ram6666
/

VocalCleanAI

Sleeping

App Files Files Community

Ram6666 commited on 29 days ago

Commit

87f44dc

verified ·

1 Parent(s): 66603d5

Upload 3 files

Browse files

Files changed (3) hide show

app.py +352 -0
packages.txt.txt +1 -0
requirements.txtrequirements.txt.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,352 @@

+import os
+import uuid
+import shutil
+import zipfile
+import subprocess
+import tempfile
+import logging
+from pathlib import Path
+import gradio as gr
+import numpy as np
+# ─────────────────────────────────────────────
+# Logging
+# ─────────────────────────────────────────────
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+)
+log = logging.getLogger("vocalclean-gradio")
+# ─────────────────────────────────────────────
+# Directories
+# ─────────────────────────────────────────────
+BASE_DIR = Path(__file__).parent
+OUTPUTS_DIR = BASE_DIR / "outputs"
+ASSETS_DIR = BASE_DIR / "assets"
+OUTPUTS_DIR.mkdir(exist_ok=True)
+ASSETS_DIR.mkdir(exist_ok=True)
+# ─────────────────────────────────────────────
+# Constants
+# ─────────────────────────────────────────────
+MAX_FILE_SIZE_MB = 100
+MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
+ALLOWED_EXTENSIONS = {".mp3", ".wav", ".m4a", ".flac", ".ogg"}
+DEMUCS_MODEL = "htdemucs"
+STEM_META = {
+    "vocals": {"label": "Vocals", "color": "#4F46E5", "icon": "🎤"},
+    "drums": {"label": "Drums", "color": "#EF4444", "icon": "🥁"},
+    "bass": {"label": "Bass", "color": "#8B5CF6", "icon": "🎸"},
+    "other": {"label": "Other / Melody", "color": "#F59E0B", "icon": "🎹"},
+}
+# ─────────────────────────────────────────────
+# GPU Detection
+# ─────────────────────────────────────────────
+def detect_device() -> str:
+    try:
+        import torch
+        if torch.cuda.is_available():
+            name = torch.cuda.get_device_name(0)
+            log.info(f"GPU detected: {name}")
+            return "cuda"
+    except Exception:
+        pass
+    log.info("No GPU — running on CPU")
+    return "cpu"
+DEVICE = detect_device()
+# ─────────────────────────────────────────────
+# FFmpeg Preprocessing
+# ─────────────────────────────────────────────
+def preprocess_audio(input_path: Path, output_path: Path) -> Path:
+    """Normalise to WAV, stereo, 44.1 kHz before Demucs."""
+    cmd = [
+        "ffmpeg", "-y",
+        "-i", str(input_path),
+        "-ac", "2",
+        "-ar", "44100",
+        "-sample_fmt", "s16",
+        "-f", "wav",
+        str(output_path),
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
+    if result.returncode != 0:
+        raise RuntimeError(f"FFmpeg failed: {result.stderr[-400:]}")
+    return output_path
+# ─────────────────────────────────────────────
+# Demucs Separation
+# ─────────────────────────────────────────────
+def run_demucs(input_path: Path, output_dir: Path, progress_cb=None) -> dict[str, Path]:
+    """Run Demucs htdemucs and return a dict of stem_name → wav path."""
+    if progress_cb:
+        progress_cb(0.1, "Preprocessing audio...")
+    preprocessed = input_path.parent / f"pre_{input_path.stem}.wav"
+    try:
+        preprocess_audio(input_path, preprocessed)
+        demucs_input = preprocessed
+    except Exception as e:
+        log.warning(f"FFmpeg preprocessing skipped: {e}")
+        demucs_input = input_path
+    if progress_cb:
+        progress_cb(0.2, f"Running Hybrid Demucs on {DEVICE.upper()}...")
+    cmd = [
+        "python3", "-m", "demucs",
+        "--device", DEVICE,
+        "-n", DEMUCS_MODEL,
+        "-o", str(output_dir),
+        str(demucs_input),
+    ]
+    log.info(f"Demucs command: {' '.join(cmd)}")
+    proc = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
+    if proc.returncode != 0:
+        error_msg = (proc.stderr or proc.stdout or "Unknown error")[-600:]
+        log.error(f"Demucs failed: {error_msg}")
+        raise RuntimeError(f"Demucs separation failed:\n{error_msg}")
+    if progress_cb:
+        progress_cb(0.85, "Collecting output stems...")
+    stems: dict[str, Path] = {}
+    for wav in output_dir.rglob("*.wav"):
+        stems[wav.stem] = wav
+    if not stems:
+        raise RuntimeError("No output files were generated by Demucs.")
+    # Clean up preprocessed file
+    try:
+        preprocessed.unlink(missing_ok=True)
+    except Exception:
+        pass
+    log.info(f"Stems found: {list(stems.keys())}")
+    return stems
+# ─────────────────────────────────────────────
+# ZIP Builder
+# ─────────────────────────────────────────────
+def build_zip(stems: dict[str, Path], job_dir: Path) -> Path:
+    zip_path = job_dir / "stems.zip"
+    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
+        for name, path in stems.items():
+            zf.write(path, f"{name}.wav")
+    return zip_path
+# ─────────────────────────────────────────────
+# Main Processing Function
+# ─────────────────────────────────────────────
+def separate_audio(audio_file, progress=gr.Progress(track_tqdm=True)):
+    if audio_file is None:
+        return (
+            "❌ No file uploaded.",
+            None, None, None, None, None,
+        )
+    input_path = Path(audio_file)
+    ext = input_path.suffix.lower()
+    if ext not in ALLOWED_EXTENSIONS:
+        return (
+            f"❌ Unsupported format '{ext}'. Please upload MP3, WAV, M4A, FLAC, or OGG.",
+            None, None, None, None, None,
+        )
+    file_size = input_path.stat().st_size
+    if file_size > MAX_FILE_SIZE_BYTES:
+        size_mb = file_size / (1024 * 1024)
+        return (
+            f"❌ File too large ({size_mb:.1f} MB). Maximum allowed size is {MAX_FILE_SIZE_MB} MB.",
+            None, None, None, None, None,
+        )
+    job_id = str(uuid.uuid4())[:8]
+    job_dir = OUTPUTS_DIR / job_id
+    job_dir.mkdir(parents=True, exist_ok=True)
+    log.info(f"Job {job_id}: processing '{input_path.name}' ({file_size / 1024:.0f} KB)")
+    try:
+        def update_progress(frac: float, msg: str):
+            progress(frac, desc=msg)
+            log.info(f"Job {job_id}: [{int(frac * 100)}%] {msg}")
+        update_progress(0.05, "Starting AI separation — this may take 1–3 minutes on free servers...")
+        stems = run_demucs(input_path, job_dir, progress_cb=update_progress)
+        update_progress(0.92, "Building download archive...")
+        zip_path = build_zip(stems, job_dir)
+        update_progress(1.0, "✅ Done!")
+        log.info(f"Job {job_id}: complete — {list(stems.keys())}")
+        def stem_path(name: str):
+            return str(stems[name]) if name in stems else None
+        status = f"✅ Separation complete! Stems: {', '.join(stems.keys())}"
+        return (
+            status,
+            stem_path("vocals"),
+            stem_path("drums"),
+            stem_path("bass"),
+            stem_path("other"),
+            str(zip_path),
+        )
+    except Exception as exc:
+        log.exception(f"Job {job_id}: error")
+        try:
+            shutil.rmtree(job_dir, ignore_errors=True)
+        except Exception:
+            pass
+        return (
+            f"❌ Processing failed: {exc}",
+            None, None, None, None, None,
+        )
+# ─────────────────────────────────────────────
+# Gradio Interface
+# ─────────────────────────────────────────────
+css = """
+#title { text-align: center; margin-bottom: 8px; }
+#subtitle { text-align: center; color: #6B7280; margin-bottom: 24px; }
+#status-box { border-radius: 10px; }
+.stem-row { gap: 16px; }
+footer { display: none !important; }
+"""
+with gr.Blocks(
+    title="VocalClean AI — Music Stem Separator",
+    theme=gr.themes.Soft(
+        primary_hue="indigo",
+        secondary_hue="sky",
+        font=gr.themes.GoogleFont("Inter"),
+    ),
+    css=css,
+) as demo:
+    gr.HTML("""
+        <h1 id="title" style="font-size:2rem;font-weight:700;">
+            🎵 VocalClean AI
+        </h1>
+        <p id="subtitle">
+            Separate music into individual stems using Hybrid Demucs AI
+            &nbsp;|&nbsp; Vocals · Drums · Bass · Other
+        </p>
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### 📤 Upload Audio")
+            audio_input = gr.Audio(
+                label="Drop your audio file here",
+                type="filepath",
+                sources=["upload"],
+            )
+            gr.Markdown(
+                "_Supported: MP3, WAV, M4A, FLAC, OGG — up to 100 MB_",
+                elem_classes=["upload-hint"],
+            )
+            run_btn = gr.Button(
+                "🚀 Separate Stems",
+                variant="primary",
+                size="lg",
+            )
+        with gr.Column(scale=1):
+            gr.Markdown("### 📊 Processing Status")
+            status_out = gr.Textbox(
+                label="Status",
+                interactive=False,
+                placeholder="Upload a file and click 'Separate Stems' to begin...",
+                lines=3,
+                elem_id="status-box",
+            )
+            gr.Markdown(
+                "⏱️ _Processing may take **1–3 minutes** on free CPU servers. "
+                "GPU environments run significantly faster._"
+            )
+    gr.Markdown("---")
+    gr.Markdown("### 🎧 Stem Results")
+    with gr.Row(elem_classes=["stem-row"]):
+        with gr.Column():
+            gr.Markdown("#### 🎤 Vocals")
+            vocals_out = gr.Audio(label="Vocals", type="filepath", interactive=False)
+        with gr.Column():
+            gr.Markdown("#### 🥁 Drums")
+            drums_out = gr.Audio(label="Drums", type="filepath", interactive=False)
+    with gr.Row(elem_classes=["stem-row"]):
+        with gr.Column():
+            gr.Markdown("#### 🎸 Bass")
+            bass_out = gr.Audio(label="Bass", type="filepath", interactive=False)
+        with gr.Column():
+            gr.Markdown("#### 🎹 Other / Melody")
+            other_out = gr.Audio(label="Other", type="filepath", interactive=False)
+    gr.Markdown("---")
+    gr.Markdown("### 📦 Download")
+    with gr.Row():
+        with gr.Column(scale=1):
+            zip_out = gr.File(
+                label="Download All Stems (ZIP)",
+                interactive=False,
+            )
+        with gr.Column(scale=1):
+            gr.Markdown(
+                "Each stem is exported as a high-quality **WAV** file. "
+                "The ZIP archive contains all separated tracks."
+            )
+    gr.Markdown("---")
+    gr.Markdown(
+        "<center><small>Powered by "
+        "[Hybrid Demucs](https://github.com/facebookresearch/demucs) "
+        "by Meta Research &nbsp;·&nbsp; "
+        "Built with [Gradio](https://gradio.app)</small></center>"
+    )
+    run_btn.click(
+        fn=separate_audio,
+        inputs=[audio_input],
+        outputs=[status_out, vocals_out, drums_out, bass_out, other_out, zip_out],
+        show_progress="full",
+    )
+# ─────────────────────────────────────────────
+# Launch
+# ─────────────────────────────────────────────
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=int(os.environ.get("PORT", 7860)),
+        share=False,
+        show_error=True,
+    )

packages.txt.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

requirements.txtrequirements.txt.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio==5.0.0
+torch==2.6.0
+demucs==4.1.0
+numpy<2.0
+soundfile
+ffmpeg-python