Spaces:
Running
Running
Shikhar commited on
Commit ·
1a1b90c
1
Parent(s): c2c9a4c
Use soundfile for audio loading (no torchcodec/ffmpeg needed)
Browse files- app.py +5 -2
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -7,6 +7,7 @@ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
| 7 |
import gradio as gr
|
| 8 |
import torch
|
| 9 |
import torchaudio
|
|
|
|
| 10 |
from huggingface_hub import hf_hub_download
|
| 11 |
|
| 12 |
from src.model.xeusphoneme.builders import build_xeus_pr_inference
|
|
@@ -43,10 +44,12 @@ def transcribe(audio_path):
|
|
| 43 |
if inference is None:
|
| 44 |
inference = load_model()
|
| 45 |
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
| 47 |
if sr != SAMPLE_RATE:
|
| 48 |
waveform = torchaudio.functional.resample(waveform, sr, SAMPLE_RATE)
|
| 49 |
-
waveform = waveform.mean(dim=0) # mono
|
| 50 |
waveform = waveform[: SAMPLE_RATE * MAX_SECONDS]
|
| 51 |
|
| 52 |
if waveform.numel() == 0:
|
|
|
|
| 7 |
import gradio as gr
|
| 8 |
import torch
|
| 9 |
import torchaudio
|
| 10 |
+
import soundfile as sf
|
| 11 |
from huggingface_hub import hf_hub_download
|
| 12 |
|
| 13 |
from src.model.xeusphoneme.builders import build_xeus_pr_inference
|
|
|
|
| 44 |
if inference is None:
|
| 45 |
inference = load_model()
|
| 46 |
|
| 47 |
+
data, sr = sf.read(audio_path, dtype="float32")
|
| 48 |
+
waveform = torch.from_numpy(data)
|
| 49 |
+
if waveform.dim() == 2:
|
| 50 |
+
waveform = waveform.mean(dim=1)
|
| 51 |
if sr != SAMPLE_RATE:
|
| 52 |
waveform = torchaudio.functional.resample(waveform, sr, SAMPLE_RATE)
|
|
|
|
| 53 |
waveform = waveform[: SAMPLE_RATE * MAX_SECONDS]
|
| 54 |
|
| 55 |
if waveform.numel() == 0:
|
requirements.txt
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
torch
|
| 2 |
torchaudio
|
|
|
|
| 3 |
huggingface_hub
|
| 4 |
pyyaml
|
| 5 |
typeguard
|
|
|
|
| 1 |
torch
|
| 2 |
torchaudio
|
| 3 |
+
soundfile
|
| 4 |
huggingface_hub
|
| 5 |
pyyaml
|
| 6 |
typeguard
|