throgletworld commited on
Commit
47a6dd6
·
verified ·
1 Parent(s): bad0ace

Upload 2 files

Browse files
Files changed (1) hide show
  1. app.py +14 -3
app.py CHANGED
@@ -16,6 +16,7 @@ import torchaudio
16
  import tempfile
17
  import os
18
  import json
 
19
  from datetime import datetime
20
  from transformers import WavLMModel
21
  import torch.nn as nn
@@ -111,10 +112,20 @@ def load_models():
111
  # ============================================================================
112
 
113
  def preprocess_audio(audio_path):
114
- """Convert audio to 16kHz mono"""
115
- # Use soundfile backend to avoid torchcodec dependency
116
- waveform, sr = torchaudio.load(audio_path, backend="soundfile")
117
 
 
 
 
 
 
 
 
 
 
 
118
  # Convert to mono
119
  if waveform.shape[0] > 1:
120
  waveform = waveform.mean(dim=0, keepdim=True)
 
16
  import tempfile
17
  import os
18
  import json
19
+ import soundfile as sf
20
  from datetime import datetime
21
  from transformers import WavLMModel
22
  import torch.nn as nn
 
112
  # ============================================================================
113
 
114
  def preprocess_audio(audio_path):
115
+ """Convert audio to 16kHz mono using soundfile to avoid torchcodec."""
116
+ # Read audio file with soundfile
117
+ waveform_np, sr = sf.read(audio_path, dtype='float32')
118
 
119
+ # Convert numpy array to torch tensor
120
+ waveform = torch.from_numpy(waveform_np).float()
121
+
122
+ # Add channel dimension if it's mono
123
+ if waveform.dim() == 1:
124
+ waveform = waveform.unsqueeze(0)
125
+ # Transpose if it's (samples, channels)
126
+ elif waveform.shape[1] < waveform.shape[0]:
127
+ waveform = waveform.T
128
+
129
  # Convert to mono
130
  if waveform.shape[0] > 1:
131
  waveform = waveform.mean(dim=0, keepdim=True)