Spaces:

Ram6666
/

VocalCleanAI

Sleeping

App Files Files Community

VocalCleanAI / app.py

Ram6666

Upload 3 files

87f44dc verified 8 days ago

raw

history blame contribute delete

12.6 kB

	import os
	import uuid
	import shutil
	import zipfile
	import subprocess
	import tempfile
	import logging
	from pathlib import Path

	import gradio as gr
	import numpy as np

	# ─────────────────────────────────────────────
	# Logging
	# ─────────────────────────────────────────────

	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s [%(levelname)s] %(message)s",
	)
	log = logging.getLogger("vocalclean-gradio")

	# ─────────────────────────────────────────────
	# Directories
	# ─────────────────────────────────────────────

	BASE_DIR = Path(__file__).parent
	OUTPUTS_DIR = BASE_DIR / "outputs"
	ASSETS_DIR = BASE_DIR / "assets"
	OUTPUTS_DIR.mkdir(exist_ok=True)
	ASSETS_DIR.mkdir(exist_ok=True)

	# ─────────────────────────────────────────────
	# Constants
	# ─────────────────────────────────────────────

	MAX_FILE_SIZE_MB = 100
	MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
	ALLOWED_EXTENSIONS = {".mp3", ".wav", ".m4a", ".flac", ".ogg"}
	DEMUCS_MODEL = "htdemucs"

	STEM_META = {
	"vocals": {"label": "Vocals", "color": "#4F46E5", "icon": "🎤"},
	"drums": {"label": "Drums", "color": "#EF4444", "icon": "🥁"},
	"bass": {"label": "Bass", "color": "#8B5CF6", "icon": "🎸"},
	"other": {"label": "Other / Melody", "color": "#F59E0B", "icon": "🎹"},
	}

	# ─────────────────────────────────────────────
	# GPU Detection
	# ─────────────────────────────────────────────

	def detect_device() -> str:
	try:
	import torch
	if torch.cuda.is_available():
	name = torch.cuda.get_device_name(0)
	log.info(f"GPU detected: {name}")
	return "cuda"
	except Exception:
	pass
	log.info("No GPU — running on CPU")
	return "cpu"

	DEVICE = detect_device()

	# ─────────────────────────────────────────────
	# FFmpeg Preprocessing
	# ─────────────────────────────────────────────

	def preprocess_audio(input_path: Path, output_path: Path) -> Path:
	"""Normalise to WAV, stereo, 44.1 kHz before Demucs."""
	cmd = [
	"ffmpeg", "-y",
	"-i", str(input_path),
	"-ac", "2",
	"-ar", "44100",
	"-sample_fmt", "s16",
	"-f", "wav",
	str(output_path),
	]
	result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
	if result.returncode != 0:
	raise RuntimeError(f"FFmpeg failed: {result.stderr[-400:]}")
	return output_path

	# ─────────────────────────────────────────────
	# Demucs Separation
	# ─────────────────────────────────────────────

	def run_demucs(input_path: Path, output_dir: Path, progress_cb=None) -> dict[str, Path]:
	"""Run Demucs htdemucs and return a dict of stem_name → wav path."""

	if progress_cb:
	progress_cb(0.1, "Preprocessing audio...")

	preprocessed = input_path.parent / f"pre_{input_path.stem}.wav"
	try:
	preprocess_audio(input_path, preprocessed)
	demucs_input = preprocessed
	except Exception as e:
	log.warning(f"FFmpeg preprocessing skipped: {e}")
	demucs_input = input_path

	if progress_cb:
	progress_cb(0.2, f"Running Hybrid Demucs on {DEVICE.upper()}...")

	cmd = [
	"python3", "-m", "demucs",
	"--device", DEVICE,
	"-n", DEMUCS_MODEL,
	"-o", str(output_dir),
	str(demucs_input),
	]

	log.info(f"Demucs command: {' '.join(cmd)}")
	proc = subprocess.run(cmd, capture_output=True, text=True, timeout=600)

	if proc.returncode != 0:
	error_msg = (proc.stderr or proc.stdout or "Unknown error")[-600:]
	log.error(f"Demucs failed: {error_msg}")
	raise RuntimeError(f"Demucs separation failed:\n{error_msg}")

	if progress_cb:
	progress_cb(0.85, "Collecting output stems...")

	stems: dict[str, Path] = {}
	for wav in output_dir.rglob("*.wav"):
	stems[wav.stem] = wav

	if not stems:
	raise RuntimeError("No output files were generated by Demucs.")

	# Clean up preprocessed file
	try:
	preprocessed.unlink(missing_ok=True)
	except Exception:
	pass

	log.info(f"Stems found: {list(stems.keys())}")
	return stems

	# ─────────────────────────────────────────────
	# ZIP Builder
	# ─────────────────────────────────────────────

	def build_zip(stems: dict[str, Path], job_dir: Path) -> Path:
	zip_path = job_dir / "stems.zip"
	with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
	for name, path in stems.items():
	zf.write(path, f"{name}.wav")
	return zip_path

	# ─────────────────────────────────────────────
	# Main Processing Function
	# ─────────────────────────────────────────────

	def separate_audio(audio_file, progress=gr.Progress(track_tqdm=True)):
	if audio_file is None:
	return (
	"❌ No file uploaded.",
	None, None, None, None, None,
	)

	input_path = Path(audio_file)
	ext = input_path.suffix.lower()

	if ext not in ALLOWED_EXTENSIONS:
	return (
	f"❌ Unsupported format '{ext}'. Please upload MP3, WAV, M4A, FLAC, or OGG.",
	None, None, None, None, None,
	)

	file_size = input_path.stat().st_size
	if file_size > MAX_FILE_SIZE_BYTES:
	size_mb = file_size / (1024 * 1024)
	return (
	f"❌ File too large ({size_mb:.1f} MB). Maximum allowed size is {MAX_FILE_SIZE_MB} MB.",
	None, None, None, None, None,
	)

	job_id = str(uuid.uuid4())[:8]
	job_dir = OUTPUTS_DIR / job_id
	job_dir.mkdir(parents=True, exist_ok=True)

	log.info(f"Job {job_id}: processing '{input_path.name}' ({file_size / 1024:.0f} KB)")

	try:
	def update_progress(frac: float, msg: str):
	progress(frac, desc=msg)
	log.info(f"Job {job_id}: [{int(frac * 100)}%] {msg}")

	update_progress(0.05, "Starting AI separation — this may take 1–3 minutes on free servers...")

	stems = run_demucs(input_path, job_dir, progress_cb=update_progress)

	update_progress(0.92, "Building download archive...")
	zip_path = build_zip(stems, job_dir)

	update_progress(1.0, "✅ Done!")
	log.info(f"Job {job_id}: complete — {list(stems.keys())}")

	def stem_path(name: str):
	return str(stems[name]) if name in stems else None

	status = f"✅ Separation complete! Stems: {', '.join(stems.keys())}"
	return (
	status,
	stem_path("vocals"),
	stem_path("drums"),
	stem_path("bass"),
	stem_path("other"),
	str(zip_path),
	)

	except Exception as exc:
	log.exception(f"Job {job_id}: error")
	try:
	shutil.rmtree(job_dir, ignore_errors=True)
	except Exception:
	pass
	return (
	f"❌ Processing failed: {exc}",
	None, None, None, None, None,
	)

	# ─────────────────────────────────────────────
	# Gradio Interface
	# ─────────────────────────────────────────────

	css = """
	#title { text-align: center; margin-bottom: 8px; }
	#subtitle { text-align: center; color: #6B7280; margin-bottom: 24px; }
	#status-box { border-radius: 10px; }
	.stem-row { gap: 16px; }
	footer { display: none !important; }
	"""

	with gr.Blocks(
	title="VocalClean AI — Music Stem Separator",
	theme=gr.themes.Soft(
	primary_hue="indigo",
	secondary_hue="sky",
	font=gr.themes.GoogleFont("Inter"),
	),
	css=css,
	) as demo:

	gr.HTML("""
	<h1 id="title" style="font-size:2rem;font-weight:700;">
	🎵 VocalClean AI
	</h1>
	<p id="subtitle">
	Separate music into individual stems using Hybrid Demucs AI
	\|  Vocals · Drums · Bass · Other
	</p>
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📤 Upload Audio")
	audio_input = gr.Audio(
	label="Drop your audio file here",
	type="filepath",
	sources=["upload"],
	)
	gr.Markdown(
	"_Supported: MP3, WAV, M4A, FLAC, OGG — up to 100 MB_",
	elem_classes=["upload-hint"],
	)
	run_btn = gr.Button(
	"🚀 Separate Stems",
	variant="primary",
	size="lg",
	)

	with gr.Column(scale=1):
	gr.Markdown("### 📊 Processing Status")
	status_out = gr.Textbox(
	label="Status",
	interactive=False,
	placeholder="Upload a file and click 'Separate Stems' to begin...",
	lines=3,
	elem_id="status-box",
	)
	gr.Markdown(
	"⏱️ _Processing may take 1–3 minutes on free CPU servers. "
	"GPU environments run significantly faster._"
	)

	gr.Markdown("---")
	gr.Markdown("### 🎧 Stem Results")

	with gr.Row(elem_classes=["stem-row"]):
	with gr.Column():
	gr.Markdown("#### 🎤 Vocals")
	vocals_out = gr.Audio(label="Vocals", type="filepath", interactive=False)

	with gr.Column():
	gr.Markdown("#### 🥁 Drums")
	drums_out = gr.Audio(label="Drums", type="filepath", interactive=False)

	with gr.Row(elem_classes=["stem-row"]):
	with gr.Column():
	gr.Markdown("#### 🎸 Bass")
	bass_out = gr.Audio(label="Bass", type="filepath", interactive=False)

	with gr.Column():
	gr.Markdown("#### 🎹 Other / Melody")
	other_out = gr.Audio(label="Other", type="filepath", interactive=False)

	gr.Markdown("---")
	gr.Markdown("### 📦 Download")

	with gr.Row():
	with gr.Column(scale=1):
	zip_out = gr.File(
	label="Download All Stems (ZIP)",
	interactive=False,
	)
	with gr.Column(scale=1):
	gr.Markdown(
	"Each stem is exported as a high-quality WAV file. "
	"The ZIP archive contains all separated tracks."
	)

	gr.Markdown("---")
	gr.Markdown(
	"<center><small>Powered by "
	"[Hybrid Demucs](https://github.com/facebookresearch/demucs) "
	"by Meta Research  ·  "
	"Built with [Gradio](https://gradio.app)</small></center>"
	)

	run_btn.click(
	fn=separate_audio,
	inputs=[audio_input],
	outputs=[status_out, vocals_out, drums_out, bass_out, other_out, zip_out],
	show_progress="full",
	)

	# ─────────────────────────────────────────────
	# Launch
	# ─────────────────────────────────────────────

	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=int(os.environ.get("PORT", 7860)),
	share=False,
	show_error=True,
	)