Ram6666 commited on
Commit
87f44dc
Β·
verified Β·
1 Parent(s): 66603d5

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +352 -0
  2. packages.txt.txt +1 -0
  3. requirements.txtrequirements.txt.txt +6 -0
app.py ADDED
@@ -0,0 +1,352 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ import shutil
4
+ import zipfile
5
+ import subprocess
6
+ import tempfile
7
+ import logging
8
+ from pathlib import Path
9
+
10
+ import gradio as gr
11
+ import numpy as np
12
+
13
+ # ─────────────────────────────────────────────
14
+ # Logging
15
+ # ─────────────────────────────────────────────
16
+
17
+ logging.basicConfig(
18
+ level=logging.INFO,
19
+ format="%(asctime)s [%(levelname)s] %(message)s",
20
+ )
21
+ log = logging.getLogger("vocalclean-gradio")
22
+
23
+ # ─────────────────────────────────────────────
24
+ # Directories
25
+ # ─────────────────────────────────────────────
26
+
27
+ BASE_DIR = Path(__file__).parent
28
+ OUTPUTS_DIR = BASE_DIR / "outputs"
29
+ ASSETS_DIR = BASE_DIR / "assets"
30
+ OUTPUTS_DIR.mkdir(exist_ok=True)
31
+ ASSETS_DIR.mkdir(exist_ok=True)
32
+
33
+ # ─────────────────────────────────────────────
34
+ # Constants
35
+ # ─────────────────────────────────────────────
36
+
37
+ MAX_FILE_SIZE_MB = 100
38
+ MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
39
+ ALLOWED_EXTENSIONS = {".mp3", ".wav", ".m4a", ".flac", ".ogg"}
40
+ DEMUCS_MODEL = "htdemucs"
41
+
42
+ STEM_META = {
43
+ "vocals": {"label": "Vocals", "color": "#4F46E5", "icon": "🎀"},
44
+ "drums": {"label": "Drums", "color": "#EF4444", "icon": "πŸ₯"},
45
+ "bass": {"label": "Bass", "color": "#8B5CF6", "icon": "🎸"},
46
+ "other": {"label": "Other / Melody", "color": "#F59E0B", "icon": "🎹"},
47
+ }
48
+
49
+ # ─────────────────────────────────────────────
50
+ # GPU Detection
51
+ # ─────────────────────────────────────────────
52
+
53
+ def detect_device() -> str:
54
+ try:
55
+ import torch
56
+ if torch.cuda.is_available():
57
+ name = torch.cuda.get_device_name(0)
58
+ log.info(f"GPU detected: {name}")
59
+ return "cuda"
60
+ except Exception:
61
+ pass
62
+ log.info("No GPU β€” running on CPU")
63
+ return "cpu"
64
+
65
+ DEVICE = detect_device()
66
+
67
+ # ─────────────────────────────────────────────
68
+ # FFmpeg Preprocessing
69
+ # ─────────────────────────────────────────────
70
+
71
+ def preprocess_audio(input_path: Path, output_path: Path) -> Path:
72
+ """Normalise to WAV, stereo, 44.1 kHz before Demucs."""
73
+ cmd = [
74
+ "ffmpeg", "-y",
75
+ "-i", str(input_path),
76
+ "-ac", "2",
77
+ "-ar", "44100",
78
+ "-sample_fmt", "s16",
79
+ "-f", "wav",
80
+ str(output_path),
81
+ ]
82
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
83
+ if result.returncode != 0:
84
+ raise RuntimeError(f"FFmpeg failed: {result.stderr[-400:]}")
85
+ return output_path
86
+
87
+ # ─────────────────────────────────────────────
88
+ # Demucs Separation
89
+ # ─────────────────────────────────────────────
90
+
91
+ def run_demucs(input_path: Path, output_dir: Path, progress_cb=None) -> dict[str, Path]:
92
+ """Run Demucs htdemucs and return a dict of stem_name β†’ wav path."""
93
+
94
+ if progress_cb:
95
+ progress_cb(0.1, "Preprocessing audio...")
96
+
97
+ preprocessed = input_path.parent / f"pre_{input_path.stem}.wav"
98
+ try:
99
+ preprocess_audio(input_path, preprocessed)
100
+ demucs_input = preprocessed
101
+ except Exception as e:
102
+ log.warning(f"FFmpeg preprocessing skipped: {e}")
103
+ demucs_input = input_path
104
+
105
+ if progress_cb:
106
+ progress_cb(0.2, f"Running Hybrid Demucs on {DEVICE.upper()}...")
107
+
108
+ cmd = [
109
+ "python3", "-m", "demucs",
110
+ "--device", DEVICE,
111
+ "-n", DEMUCS_MODEL,
112
+ "-o", str(output_dir),
113
+ str(demucs_input),
114
+ ]
115
+
116
+ log.info(f"Demucs command: {' '.join(cmd)}")
117
+ proc = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
118
+
119
+ if proc.returncode != 0:
120
+ error_msg = (proc.stderr or proc.stdout or "Unknown error")[-600:]
121
+ log.error(f"Demucs failed: {error_msg}")
122
+ raise RuntimeError(f"Demucs separation failed:\n{error_msg}")
123
+
124
+ if progress_cb:
125
+ progress_cb(0.85, "Collecting output stems...")
126
+
127
+ stems: dict[str, Path] = {}
128
+ for wav in output_dir.rglob("*.wav"):
129
+ stems[wav.stem] = wav
130
+
131
+ if not stems:
132
+ raise RuntimeError("No output files were generated by Demucs.")
133
+
134
+ # Clean up preprocessed file
135
+ try:
136
+ preprocessed.unlink(missing_ok=True)
137
+ except Exception:
138
+ pass
139
+
140
+ log.info(f"Stems found: {list(stems.keys())}")
141
+ return stems
142
+
143
+ # ─────────────────────────────────────────────
144
+ # ZIP Builder
145
+ # ─────────────────────────────────────────────
146
+
147
+ def build_zip(stems: dict[str, Path], job_dir: Path) -> Path:
148
+ zip_path = job_dir / "stems.zip"
149
+ with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
150
+ for name, path in stems.items():
151
+ zf.write(path, f"{name}.wav")
152
+ return zip_path
153
+
154
+ # ─────────────────────────────────────────────
155
+ # Main Processing Function
156
+ # ─────────────────────────────────────────────
157
+
158
+ def separate_audio(audio_file, progress=gr.Progress(track_tqdm=True)):
159
+ if audio_file is None:
160
+ return (
161
+ "❌ No file uploaded.",
162
+ None, None, None, None, None,
163
+ )
164
+
165
+ input_path = Path(audio_file)
166
+ ext = input_path.suffix.lower()
167
+
168
+ if ext not in ALLOWED_EXTENSIONS:
169
+ return (
170
+ f"❌ Unsupported format '{ext}'. Please upload MP3, WAV, M4A, FLAC, or OGG.",
171
+ None, None, None, None, None,
172
+ )
173
+
174
+ file_size = input_path.stat().st_size
175
+ if file_size > MAX_FILE_SIZE_BYTES:
176
+ size_mb = file_size / (1024 * 1024)
177
+ return (
178
+ f"❌ File too large ({size_mb:.1f} MB). Maximum allowed size is {MAX_FILE_SIZE_MB} MB.",
179
+ None, None, None, None, None,
180
+ )
181
+
182
+ job_id = str(uuid.uuid4())[:8]
183
+ job_dir = OUTPUTS_DIR / job_id
184
+ job_dir.mkdir(parents=True, exist_ok=True)
185
+
186
+ log.info(f"Job {job_id}: processing '{input_path.name}' ({file_size / 1024:.0f} KB)")
187
+
188
+ try:
189
+ def update_progress(frac: float, msg: str):
190
+ progress(frac, desc=msg)
191
+ log.info(f"Job {job_id}: [{int(frac * 100)}%] {msg}")
192
+
193
+ update_progress(0.05, "Starting AI separation β€” this may take 1–3 minutes on free servers...")
194
+
195
+ stems = run_demucs(input_path, job_dir, progress_cb=update_progress)
196
+
197
+ update_progress(0.92, "Building download archive...")
198
+ zip_path = build_zip(stems, job_dir)
199
+
200
+ update_progress(1.0, "βœ… Done!")
201
+ log.info(f"Job {job_id}: complete β€” {list(stems.keys())}")
202
+
203
+ def stem_path(name: str):
204
+ return str(stems[name]) if name in stems else None
205
+
206
+ status = f"βœ… Separation complete! Stems: {', '.join(stems.keys())}"
207
+ return (
208
+ status,
209
+ stem_path("vocals"),
210
+ stem_path("drums"),
211
+ stem_path("bass"),
212
+ stem_path("other"),
213
+ str(zip_path),
214
+ )
215
+
216
+ except Exception as exc:
217
+ log.exception(f"Job {job_id}: error")
218
+ try:
219
+ shutil.rmtree(job_dir, ignore_errors=True)
220
+ except Exception:
221
+ pass
222
+ return (
223
+ f"❌ Processing failed: {exc}",
224
+ None, None, None, None, None,
225
+ )
226
+
227
+ # ─────────────────────────────────────────────
228
+ # Gradio Interface
229
+ # ─────────────────────────────────────────────
230
+
231
+ css = """
232
+ #title { text-align: center; margin-bottom: 8px; }
233
+ #subtitle { text-align: center; color: #6B7280; margin-bottom: 24px; }
234
+ #status-box { border-radius: 10px; }
235
+ .stem-row { gap: 16px; }
236
+ footer { display: none !important; }
237
+ """
238
+
239
+ with gr.Blocks(
240
+ title="VocalClean AI β€” Music Stem Separator",
241
+ theme=gr.themes.Soft(
242
+ primary_hue="indigo",
243
+ secondary_hue="sky",
244
+ font=gr.themes.GoogleFont("Inter"),
245
+ ),
246
+ css=css,
247
+ ) as demo:
248
+
249
+ gr.HTML("""
250
+ <h1 id="title" style="font-size:2rem;font-weight:700;">
251
+ 🎡 VocalClean AI
252
+ </h1>
253
+ <p id="subtitle">
254
+ Separate music into individual stems using Hybrid Demucs AI
255
+ &nbsp;|&nbsp; Vocals Β· Drums Β· Bass Β· Other
256
+ </p>
257
+ """)
258
+
259
+ with gr.Row():
260
+ with gr.Column(scale=1):
261
+ gr.Markdown("### πŸ“€ Upload Audio")
262
+ audio_input = gr.Audio(
263
+ label="Drop your audio file here",
264
+ type="filepath",
265
+ sources=["upload"],
266
+ )
267
+ gr.Markdown(
268
+ "_Supported: MP3, WAV, M4A, FLAC, OGG β€” up to 100 MB_",
269
+ elem_classes=["upload-hint"],
270
+ )
271
+ run_btn = gr.Button(
272
+ "πŸš€ Separate Stems",
273
+ variant="primary",
274
+ size="lg",
275
+ )
276
+
277
+ with gr.Column(scale=1):
278
+ gr.Markdown("### πŸ“Š Processing Status")
279
+ status_out = gr.Textbox(
280
+ label="Status",
281
+ interactive=False,
282
+ placeholder="Upload a file and click 'Separate Stems' to begin...",
283
+ lines=3,
284
+ elem_id="status-box",
285
+ )
286
+ gr.Markdown(
287
+ "⏱️ _Processing may take **1–3 minutes** on free CPU servers. "
288
+ "GPU environments run significantly faster._"
289
+ )
290
+
291
+ gr.Markdown("---")
292
+ gr.Markdown("### 🎧 Stem Results")
293
+
294
+ with gr.Row(elem_classes=["stem-row"]):
295
+ with gr.Column():
296
+ gr.Markdown("#### 🎀 Vocals")
297
+ vocals_out = gr.Audio(label="Vocals", type="filepath", interactive=False)
298
+
299
+ with gr.Column():
300
+ gr.Markdown("#### πŸ₯ Drums")
301
+ drums_out = gr.Audio(label="Drums", type="filepath", interactive=False)
302
+
303
+ with gr.Row(elem_classes=["stem-row"]):
304
+ with gr.Column():
305
+ gr.Markdown("#### 🎸 Bass")
306
+ bass_out = gr.Audio(label="Bass", type="filepath", interactive=False)
307
+
308
+ with gr.Column():
309
+ gr.Markdown("#### 🎹 Other / Melody")
310
+ other_out = gr.Audio(label="Other", type="filepath", interactive=False)
311
+
312
+ gr.Markdown("---")
313
+ gr.Markdown("### πŸ“¦ Download")
314
+
315
+ with gr.Row():
316
+ with gr.Column(scale=1):
317
+ zip_out = gr.File(
318
+ label="Download All Stems (ZIP)",
319
+ interactive=False,
320
+ )
321
+ with gr.Column(scale=1):
322
+ gr.Markdown(
323
+ "Each stem is exported as a high-quality **WAV** file. "
324
+ "The ZIP archive contains all separated tracks."
325
+ )
326
+
327
+ gr.Markdown("---")
328
+ gr.Markdown(
329
+ "<center><small>Powered by "
330
+ "[Hybrid Demucs](https://github.com/facebookresearch/demucs) "
331
+ "by Meta Research &nbsp;Β·&nbsp; "
332
+ "Built with [Gradio](https://gradio.app)</small></center>"
333
+ )
334
+
335
+ run_btn.click(
336
+ fn=separate_audio,
337
+ inputs=[audio_input],
338
+ outputs=[status_out, vocals_out, drums_out, bass_out, other_out, zip_out],
339
+ show_progress="full",
340
+ )
341
+
342
+ # ─────────────────────────────────────────────
343
+ # Launch
344
+ # ─────────────────────────────────────────────
345
+
346
+ if __name__ == "__main__":
347
+ demo.launch(
348
+ server_name="0.0.0.0",
349
+ server_port=int(os.environ.get("PORT", 7860)),
350
+ share=False,
351
+ show_error=True,
352
+ )
packages.txt.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txtrequirements.txt.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio==5.0.0
2
+ torch==2.6.0
3
+ demucs==4.1.0
4
+ numpy<2.0
5
+ soundfile
6
+ ffmpeg-python