redsky17 commited on
Commit
3499494
·
verified ·
1 Parent(s): 50f78fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -5
app.py CHANGED
@@ -7,13 +7,14 @@ Audio Processing Pipeline: Demucs + Denoise + Normalize + Resample
7
  import gradio as gr
8
  import torch
9
  import torchaudio
 
10
  import os
11
  import tempfile
12
  from pathlib import Path
 
13
 
14
  print("Loading dependencies...")
15
- # Set torchaudio backend to soundfile (more compatible)
16
- torchaudio.set_audio_backend("soundfile")
17
  # Check device
18
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
19
  print(f"Device: {DEVICE}")
@@ -28,8 +29,13 @@ def separate_vocals_demucs(audio_path, device="cpu"):
28
  model.to(device)
29
  model.eval()
30
 
31
- # Load audio
32
- wav, sr = torchaudio.load(audio_path)
 
 
 
 
 
33
 
34
  # Resample to 44.1kHz if needed
35
  if sr != 44100:
@@ -110,7 +116,12 @@ def process_audio(
110
  progress(0.2, desc="Separating vocals with Demucs...")
111
  audio, sr = separate_vocals_demucs(input_file, DEVICE)
112
  else:
113
- audio, sr = torchaudio.load(input_file)
 
 
 
 
 
114
 
115
  # Step 2: Convert to mono
116
  progress(0.5, desc="Converting to mono...")
 
7
  import gradio as gr
8
  import torch
9
  import torchaudio
10
+ import soundfile as sf
11
  import os
12
  import tempfile
13
  from pathlib import Path
14
+ import numpy as np
15
 
16
  print("Loading dependencies...")
17
+
 
18
  # Check device
19
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
20
  print(f"Device: {DEVICE}")
 
29
  model.to(device)
30
  model.eval()
31
 
32
+ # Load audio using soundfile instead of torchaudio
33
+ audio_data, sr = sf.read(audio_path, dtype='float32')
34
+ # Convert to torch tensor and ensure correct shape [channels, samples]
35
+ if audio_data.ndim == 1:
36
+ wav = torch.from_numpy(audio_data).unsqueeze(0) # Add channel dimension
37
+ else:
38
+ wav = torch.from_numpy(audio_data.T) # Transpose to [channels, samples]
39
 
40
  # Resample to 44.1kHz if needed
41
  if sr != 44100:
 
116
  progress(0.2, desc="Separating vocals with Demucs...")
117
  audio, sr = separate_vocals_demucs(input_file, DEVICE)
118
  else:
119
+ # Load audio using soundfile
120
+ audio_data, sr = sf.read(input_file, dtype='float32')
121
+ if audio_data.ndim == 1:
122
+ audio = torch.from_numpy(audio_data).unsqueeze(0)
123
+ else:
124
+ audio = torch.from_numpy(audio_data.T)
125
 
126
  # Step 2: Convert to mono
127
  progress(0.5, desc="Converting to mono...")