Spaces:

tachiwin
/

classifier

Sleeping

Luis J Camargo commited on 8 days ago

Commit

7c2c8fa

1 Parent(s): 124a2d5

feat: Add audio resampling to 16kHz before processing to ensure consistent input.

Files changed (1) hide show

app.py CHANGED Viewed

@@ -98,6 +98,15 @@ def predict_language(audio):
     sample_rate, audio_array = audio
     audio_len_sec = len(audio_array) / sample_rate
     print(f"\n--- [LOG] New Request ---")
     print(f"[LOG] Start Memory: {start_mem:.2f} MB")
@@ -107,7 +116,7 @@ def predict_language(audio):
     print("[LOG] Step 3: Extracting features...")
     inputs = processor(
         audio_array,
-        sampling_rate=sample_rate,
         do_normalize=True,
         device="cpu",
         return_tensors="pt",

     sample_rate, audio_array = audio
     audio_len_sec = len(audio_array) / sample_rate
+    # Resampling
+    if sample_rate != 16000:
+        print(f"[LOG] Step 2: Resampling {sample_rate}Hz -> 16000Hz...")
+        import librosa
+        # Use res_type="kaiser_fast" to save memory/cpu if needed, but default is usually fine
+        audio_array = librosa.resample(audio_array, orig_sr=sample_rate, target_sr=16_000)
+        print(f"[LOG] Memory after resampling: {get_mem_usage():.2f} MB")
     print(f"\n--- [LOG] New Request ---")
     print(f"[LOG] Start Memory: {start_mem:.2f} MB")
     print("[LOG] Step 3: Extracting features...")
     inputs = processor(
         audio_array,
+        sampling_rate=16_000,
         do_normalize=True,
         device="cpu",
         return_tensors="pt",