Spaces:

redsky17
/

extractor

Sleeping

redsky17 commited on Nov 9, 2025

Commit

3499494

verified ·

1 Parent(s): 50f78fd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,13 +7,14 @@ Audio Processing Pipeline: Demucs + Denoise + Normalize + Resample
 import gradio as gr
 import torch
 import torchaudio
 import os
 import tempfile
 from pathlib import Path
 print("Loading dependencies...")
-# Set torchaudio backend to soundfile (more compatible)
-torchaudio.set_audio_backend("soundfile")
 # Check device
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Device: {DEVICE}")
@@ -28,8 +29,13 @@ def separate_vocals_demucs(audio_path, device="cpu"):
     model.to(device)
     model.eval()
-    # Load audio
-    wav, sr = torchaudio.load(audio_path)
     # Resample to 44.1kHz if needed
     if sr != 44100:
@@ -110,7 +116,12 @@ def process_audio(
             progress(0.2, desc="Separating vocals with Demucs...")
             audio, sr = separate_vocals_demucs(input_file, DEVICE)
         else:
-            audio, sr = torchaudio.load(input_file)
         # Step 2: Convert to mono
         progress(0.5, desc="Converting to mono...")

 import gradio as gr
 import torch
 import torchaudio
+import soundfile as sf
 import os
 import tempfile
 from pathlib import Path
+import numpy as np
 print("Loading dependencies...")
 # Check device
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Device: {DEVICE}")
     model.to(device)
     model.eval()
+    # Load audio using soundfile instead of torchaudio
+    audio_data, sr = sf.read(audio_path, dtype='float32')
+    # Convert to torch tensor and ensure correct shape [channels, samples]
+    if audio_data.ndim == 1:
+        wav = torch.from_numpy(audio_data).unsqueeze(0)  # Add channel dimension
+    else:
+        wav = torch.from_numpy(audio_data.T)  # Transpose to [channels, samples]
     # Resample to 44.1kHz if needed
     if sr != 44100:
             progress(0.2, desc="Separating vocals with Demucs...")
             audio, sr = separate_vocals_demucs(input_file, DEVICE)
         else:
+            # Load audio using soundfile
+            audio_data, sr = sf.read(input_file, dtype='float32')
+            if audio_data.ndim == 1:
+                audio = torch.from_numpy(audio_data).unsqueeze(0)
+            else:
+                audio = torch.from_numpy(audio_data.T)
         # Step 2: Convert to mono
         progress(0.5, desc="Converting to mono...")