Spaces:
Sleeping
Sleeping
Luis J Camargo commited on
Commit ·
7c2c8fa
1
Parent(s): 124a2d5
feat: Add audio resampling to 16kHz before processing to ensure consistent input.
Browse files
app.py
CHANGED
|
@@ -98,6 +98,15 @@ def predict_language(audio):
|
|
| 98 |
|
| 99 |
sample_rate, audio_array = audio
|
| 100 |
audio_len_sec = len(audio_array) / sample_rate
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
print(f"\n--- [LOG] New Request ---")
|
| 103 |
print(f"[LOG] Start Memory: {start_mem:.2f} MB")
|
|
@@ -107,7 +116,7 @@ def predict_language(audio):
|
|
| 107 |
print("[LOG] Step 3: Extracting features...")
|
| 108 |
inputs = processor(
|
| 109 |
audio_array,
|
| 110 |
-
sampling_rate=
|
| 111 |
do_normalize=True,
|
| 112 |
device="cpu",
|
| 113 |
return_tensors="pt",
|
|
|
|
| 98 |
|
| 99 |
sample_rate, audio_array = audio
|
| 100 |
audio_len_sec = len(audio_array) / sample_rate
|
| 101 |
+
|
| 102 |
+
# Resampling
|
| 103 |
+
if sample_rate != 16000:
|
| 104 |
+
print(f"[LOG] Step 2: Resampling {sample_rate}Hz -> 16000Hz...")
|
| 105 |
+
import librosa
|
| 106 |
+
# Use res_type="kaiser_fast" to save memory/cpu if needed, but default is usually fine
|
| 107 |
+
audio_array = librosa.resample(audio_array, orig_sr=sample_rate, target_sr=16_000)
|
| 108 |
+
print(f"[LOG] Memory after resampling: {get_mem_usage():.2f} MB")
|
| 109 |
+
|
| 110 |
|
| 111 |
print(f"\n--- [LOG] New Request ---")
|
| 112 |
print(f"[LOG] Start Memory: {start_mem:.2f} MB")
|
|
|
|
| 116 |
print("[LOG] Step 3: Extracting features...")
|
| 117 |
inputs = processor(
|
| 118 |
audio_array,
|
| 119 |
+
sampling_rate=16_000,
|
| 120 |
do_normalize=True,
|
| 121 |
device="cpu",
|
| 122 |
return_tensors="pt",
|