Spaces:

ROSHANNN123
/

voicedetectionapi

Sleeping

ROSHANNN123 commited on 13 days ago

Commit

14fb181

verified ·

1 Parent(s): 7146854

Update model_service.py

Files changed (1) hide show

model_service.py CHANGED Viewed

@@ -23,24 +23,28 @@ class ModelService:
             print(f"Error loading model: {e}")
             raise e
-    def preprocess_audio(self, audio_bytes):
         """
-        Load audio bytes, resample to 16000 Hz (required by Wav2Vec2).
         """
         try:
-            # Load audio from bytes
-            # librosa.load supports file-like objects
-            audio_file = io.BytesIO(audio_bytes)
-            # Load and resample to 16k
-            speech, sr = librosa.load(audio_file, sr=16000)
-            # Ensure it's mono (if multi-channel, average them) - librosa.load handles this by default (mono=True)
             return speech
         except Exception as e:
             print(f"Error processing audio: {e}")
-            raise ValueError("Invalid audio format or corrupted file: {str(e)}")
     def predict(self, audio_bytes):
         speech = self.preprocess_audio(audio_bytes)

             print(f"Error loading model: {e}")
             raise e
+        def preprocess_audio(self, audio_bytes):
         """
+        Load audio bytes, resample to 16000 Hz.
         """
+        import tempfile
+        import os
+        # Temp file handles high-quality WAV/MP3 better than memory buffers
+        fd, tmp_path = tempfile.mkstemp(suffix=".audio")
         try:
+            with os.fdopen(fd, 'wb') as tmp:
+                tmp.write(audio_bytes)
+            # Load and resample to 16kHz
+            speech, _ = librosa.load(tmp_path, sr=16000)
             return speech
         except Exception as e:
             print(f"Error processing audio: {e}")
+            raise ValueError(f"Invalid audio format or corrupted file: {str(e)}")
+        finally:
+            if os.path.exists(tmp_path):
+                os.remove(tmp_path)
     def predict(self, audio_bytes):
         speech = self.preprocess_audio(audio_bytes)