Luis J Camargo commited on
Commit
7c2c8fa
·
1 Parent(s): 124a2d5

feat: Add audio resampling to 16kHz before processing to ensure consistent input.

Browse files
Files changed (1) hide show
  1. app.py +10 -1
app.py CHANGED
@@ -98,6 +98,15 @@ def predict_language(audio):
98
 
99
  sample_rate, audio_array = audio
100
  audio_len_sec = len(audio_array) / sample_rate
 
 
 
 
 
 
 
 
 
101
 
102
  print(f"\n--- [LOG] New Request ---")
103
  print(f"[LOG] Start Memory: {start_mem:.2f} MB")
@@ -107,7 +116,7 @@ def predict_language(audio):
107
  print("[LOG] Step 3: Extracting features...")
108
  inputs = processor(
109
  audio_array,
110
- sampling_rate=sample_rate,
111
  do_normalize=True,
112
  device="cpu",
113
  return_tensors="pt",
 
98
 
99
  sample_rate, audio_array = audio
100
  audio_len_sec = len(audio_array) / sample_rate
101
+
102
+ # Resampling
103
+ if sample_rate != 16000:
104
+ print(f"[LOG] Step 2: Resampling {sample_rate}Hz -> 16000Hz...")
105
+ import librosa
106
+ # Use res_type="kaiser_fast" to save memory/cpu if needed, but default is usually fine
107
+ audio_array = librosa.resample(audio_array, orig_sr=sample_rate, target_sr=16_000)
108
+ print(f"[LOG] Memory after resampling: {get_mem_usage():.2f} MB")
109
+
110
 
111
  print(f"\n--- [LOG] New Request ---")
112
  print(f"[LOG] Start Memory: {start_mem:.2f} MB")
 
116
  print("[LOG] Step 3: Extracting features...")
117
  inputs = processor(
118
  audio_array,
119
+ sampling_rate=16_000,
120
  do_normalize=True,
121
  device="cpu",
122
  return_tensors="pt",