Devops-hestabit
/

Emotion-detection

Model card Files Files and versions

xet

Community

Devops-hestabit commited on Sep 25, 2024

Commit

69fe77b

verified ·

1 Parent(s): 6b6c787

Update handler.py

Browse files

Files changed (1) hide show

handler.py +20 -41

handler.py CHANGED Viewed

@@ -21,7 +21,7 @@ class EndpointHandler():
         emotion_prediction, depression_prediction = self.perform_emotion_analysis(audio_features)
         return {
             "emotion": emotion_prediction,
-            "depression": float(depression_prediction[0])
         }
     def get_mfcc_features(self, features, padding):
@@ -33,53 +33,32 @@ class EndpointHandler():
         return np.expand_dims(features, axis=0)
     def preprocess_audio_data(self, base64_string, duration=2.5, desired_sr=22050*2, offset=0.5):
-        try:
-        # Decode the base64 string
-            audio_bytes = base64.b64decode(base64_string)
-            audio_io = io.BytesIO(audio_bytes)
-        # Try to load with librosa first
-            try:
-                y, sr = librosa.load(audio_io, sr=desired_sr, duration=duration, offset=offset)
-            except:
-            # If librosa fails, try using pydub
-                audio_io.seek(0)  # Reset file pointer
-                audio = AudioSegment.from_file(audio_io)
-                audio = audio.set_channels(1)  # Convert to mono
-                audio = audio.set_frame_rate(desired_sr)
-                samples = np.array(audio.get_array_of_samples())
-                y = samples.astype(np.float32) / 32768.0  # Normalize
-                sr = desired_sr
-        # Normalize the audio
-            y = librosa.util.normalize(y)
-        # Extract MFCC features
-            mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=30)
-            if mfcc.shape[1] < 216:
-                mfcc = np.pad(mfcc, ((0, 0), (0, 216 - mfcc.shape[1])), mode='constant')
-            elif mfcc.shape[1] > 216:
-                mfcc = mfcc[:, :216]
-            return mfcc
-        except Exception as e:
-            print(f"Error in preprocess_audio_data: {str(e)}")
-            raise
     def perform_emotion_analysis(self, features, emotion_padding=216, depression_padding=2584):
-        emotion_features = features[:, :emotion_padding]
-        emotion_features = np.expand_dims(emotion_features, axis=-1)  # Add channel dimension
-        emotion_features = np.expand_dims(emotion_features, axis=0)  # Add batch dimension
         depression_features = self.get_mfcc_features(features, depression_padding)
-        print("Emotion model input shape:", self.emotion_model.input_shape)
-        print("Emotion features shape:", emotion_features.shape)
         emotion_prediction = self.emotion_model.predict(emotion_features)[0]
         emotion_prediction = self.emotion_labels[np.argmax(emotion_prediction)]
         depression_prediction = self.depression_model.predict(depression_features)[0]
         return emotion_prediction, depression_prediction

         emotion_prediction, depression_prediction = self.perform_emotion_analysis(audio_features)
         return {
             "emotion": emotion_prediction,
+            "depression": depression_prediction
         }
     def get_mfcc_features(self, features, padding):
         return np.expand_dims(features, axis=0)
     def preprocess_audio_data(self, base64_string, duration=2.5, desired_sr=22050*2, offset=0.5):
+        # audio_base64 = base64_string.replace("data:audio/webm;codecs=opus;base64,", "")
+        audio_bytes = base64.b64decode(base64_string)
+        audio_io = io.BytesIO(audio_bytes)
+        audio = AudioSegment.from_file(audio_io, format="webm")
+        byte_io = io.BytesIO()
+        audio.export(byte_io, format="wav")
+        byte_io.seek(0)
+        sample_rate, audio_array = wavfile.read(byte_io)
+        audio_array = librosa.resample(audio_array.astype(float), orig_sr=sample_rate, target_sr=desired_sr)
+        start_sample = int(offset * desired_sr)
+        end_sample = start_sample + int(duration * desired_sr)
+        audio_array = audio_array[start_sample:end_sample]
+        # X, sample_rate = librosa.load(audio_io, duration=duration, sr=desired_sr, offset=offset)
+        X = librosa.util.normalize(audio_array)
+        return librosa.feature.mfcc(y=X, sr=desired_sr, n_mfcc=30)
     def perform_emotion_analysis(self, features, emotion_padding=216, depression_padding=2584):
+        emotion_features = self.get_mfcc_features(features, emotion_padding)
         depression_features = self.get_mfcc_features(features, depression_padding)
         emotion_prediction = self.emotion_model.predict(emotion_features)[0]
         emotion_prediction = self.emotion_labels[np.argmax(emotion_prediction)]
         depression_prediction = self.depression_model.predict(depression_features)[0]
+        # depression_prediction = "Depressed" if depression_prediction >= 0.5 else "Not Depressed"
         return emotion_prediction, depression_prediction