Update app.py
Browse files
app.py
CHANGED
|
@@ -7,13 +7,14 @@ Audio Processing Pipeline: Demucs + Denoise + Normalize + Resample
|
|
| 7 |
import gradio as gr
|
| 8 |
import torch
|
| 9 |
import torchaudio
|
|
|
|
| 10 |
import os
|
| 11 |
import tempfile
|
| 12 |
from pathlib import Path
|
|
|
|
| 13 |
|
| 14 |
print("Loading dependencies...")
|
| 15 |
-
|
| 16 |
-
torchaudio.set_audio_backend("soundfile")
|
| 17 |
# Check device
|
| 18 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 19 |
print(f"Device: {DEVICE}")
|
|
@@ -28,8 +29,13 @@ def separate_vocals_demucs(audio_path, device="cpu"):
|
|
| 28 |
model.to(device)
|
| 29 |
model.eval()
|
| 30 |
|
| 31 |
-
# Load audio
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
# Resample to 44.1kHz if needed
|
| 35 |
if sr != 44100:
|
|
@@ -110,7 +116,12 @@ def process_audio(
|
|
| 110 |
progress(0.2, desc="Separating vocals with Demucs...")
|
| 111 |
audio, sr = separate_vocals_demucs(input_file, DEVICE)
|
| 112 |
else:
|
| 113 |
-
audio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
# Step 2: Convert to mono
|
| 116 |
progress(0.5, desc="Converting to mono...")
|
|
|
|
| 7 |
import gradio as gr
|
| 8 |
import torch
|
| 9 |
import torchaudio
|
| 10 |
+
import soundfile as sf
|
| 11 |
import os
|
| 12 |
import tempfile
|
| 13 |
from pathlib import Path
|
| 14 |
+
import numpy as np
|
| 15 |
|
| 16 |
print("Loading dependencies...")
|
| 17 |
+
|
|
|
|
| 18 |
# Check device
|
| 19 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 20 |
print(f"Device: {DEVICE}")
|
|
|
|
| 29 |
model.to(device)
|
| 30 |
model.eval()
|
| 31 |
|
| 32 |
+
# Load audio using soundfile instead of torchaudio
|
| 33 |
+
audio_data, sr = sf.read(audio_path, dtype='float32')
|
| 34 |
+
# Convert to torch tensor and ensure correct shape [channels, samples]
|
| 35 |
+
if audio_data.ndim == 1:
|
| 36 |
+
wav = torch.from_numpy(audio_data).unsqueeze(0) # Add channel dimension
|
| 37 |
+
else:
|
| 38 |
+
wav = torch.from_numpy(audio_data.T) # Transpose to [channels, samples]
|
| 39 |
|
| 40 |
# Resample to 44.1kHz if needed
|
| 41 |
if sr != 44100:
|
|
|
|
| 116 |
progress(0.2, desc="Separating vocals with Demucs...")
|
| 117 |
audio, sr = separate_vocals_demucs(input_file, DEVICE)
|
| 118 |
else:
|
| 119 |
+
# Load audio using soundfile
|
| 120 |
+
audio_data, sr = sf.read(input_file, dtype='float32')
|
| 121 |
+
if audio_data.ndim == 1:
|
| 122 |
+
audio = torch.from_numpy(audio_data).unsqueeze(0)
|
| 123 |
+
else:
|
| 124 |
+
audio = torch.from_numpy(audio_data.T)
|
| 125 |
|
| 126 |
# Step 2: Convert to mono
|
| 127 |
progress(0.5, desc="Converting to mono...")
|