Upload 2 files
Browse files
app.py
CHANGED
|
@@ -16,6 +16,7 @@ import torchaudio
|
|
| 16 |
import tempfile
|
| 17 |
import os
|
| 18 |
import json
|
|
|
|
| 19 |
from datetime import datetime
|
| 20 |
from transformers import WavLMModel
|
| 21 |
import torch.nn as nn
|
|
@@ -111,10 +112,20 @@ def load_models():
|
|
| 111 |
# ============================================================================
|
| 112 |
|
| 113 |
def preprocess_audio(audio_path):
|
| 114 |
-
"""Convert audio to 16kHz mono"""
|
| 115 |
-
#
|
| 116 |
-
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
# Convert to mono
|
| 119 |
if waveform.shape[0] > 1:
|
| 120 |
waveform = waveform.mean(dim=0, keepdim=True)
|
|
|
|
| 16 |
import tempfile
|
| 17 |
import os
|
| 18 |
import json
|
| 19 |
+
import soundfile as sf
|
| 20 |
from datetime import datetime
|
| 21 |
from transformers import WavLMModel
|
| 22 |
import torch.nn as nn
|
|
|
|
| 112 |
# ============================================================================
|
| 113 |
|
| 114 |
def preprocess_audio(audio_path):
|
| 115 |
+
"""Convert audio to 16kHz mono using soundfile to avoid torchcodec."""
|
| 116 |
+
# Read audio file with soundfile
|
| 117 |
+
waveform_np, sr = sf.read(audio_path, dtype='float32')
|
| 118 |
|
| 119 |
+
# Convert numpy array to torch tensor
|
| 120 |
+
waveform = torch.from_numpy(waveform_np).float()
|
| 121 |
+
|
| 122 |
+
# Add channel dimension if it's mono
|
| 123 |
+
if waveform.dim() == 1:
|
| 124 |
+
waveform = waveform.unsqueeze(0)
|
| 125 |
+
# Transpose if it's (samples, channels)
|
| 126 |
+
elif waveform.shape[1] < waveform.shape[0]:
|
| 127 |
+
waveform = waveform.T
|
| 128 |
+
|
| 129 |
# Convert to mono
|
| 130 |
if waveform.shape[0] > 1:
|
| 131 |
waveform = waveform.mean(dim=0, keepdim=True)
|