Update handler.py
Browse files- handler.py +20 -41
handler.py
CHANGED
|
@@ -21,7 +21,7 @@ class EndpointHandler():
|
|
| 21 |
emotion_prediction, depression_prediction = self.perform_emotion_analysis(audio_features)
|
| 22 |
return {
|
| 23 |
"emotion": emotion_prediction,
|
| 24 |
-
"depression":
|
| 25 |
}
|
| 26 |
|
| 27 |
def get_mfcc_features(self, features, padding):
|
|
@@ -33,53 +33,32 @@ class EndpointHandler():
|
|
| 33 |
return np.expand_dims(features, axis=0)
|
| 34 |
|
| 35 |
def preprocess_audio_data(self, base64_string, duration=2.5, desired_sr=22050*2, offset=0.5):
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
-
|
| 42 |
-
try:
|
| 43 |
-
y, sr = librosa.load(audio_io, sr=desired_sr, duration=duration, offset=offset)
|
| 44 |
-
except:
|
| 45 |
-
# If librosa fails, try using pydub
|
| 46 |
-
audio_io.seek(0) # Reset file pointer
|
| 47 |
-
audio = AudioSegment.from_file(audio_io)
|
| 48 |
-
audio = audio.set_channels(1) # Convert to mono
|
| 49 |
-
audio = audio.set_frame_rate(desired_sr)
|
| 50 |
-
|
| 51 |
-
samples = np.array(audio.get_array_of_samples())
|
| 52 |
-
y = samples.astype(np.float32) / 32768.0 # Normalize
|
| 53 |
-
sr = desired_sr
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
| 57 |
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
elif mfcc.shape[1] > 216:
|
| 63 |
-
mfcc = mfcc[:, :216]
|
| 64 |
-
|
| 65 |
-
return mfcc
|
| 66 |
-
|
| 67 |
-
except Exception as e:
|
| 68 |
-
print(f"Error in preprocess_audio_data: {str(e)}")
|
| 69 |
-
raise
|
| 70 |
|
| 71 |
def perform_emotion_analysis(self, features, emotion_padding=216, depression_padding=2584):
|
| 72 |
-
emotion_features = features
|
| 73 |
-
emotion_features = np.expand_dims(emotion_features, axis=-1) # Add channel dimension
|
| 74 |
-
emotion_features = np.expand_dims(emotion_features, axis=0) # Add batch dimension
|
| 75 |
-
|
| 76 |
depression_features = self.get_mfcc_features(features, depression_padding)
|
| 77 |
-
|
| 78 |
-
print("Emotion model input shape:", self.emotion_model.input_shape)
|
| 79 |
-
print("Emotion features shape:", emotion_features.shape)
|
| 80 |
-
|
| 81 |
emotion_prediction = self.emotion_model.predict(emotion_features)[0]
|
| 82 |
emotion_prediction = self.emotion_labels[np.argmax(emotion_prediction)]
|
| 83 |
-
|
| 84 |
depression_prediction = self.depression_model.predict(depression_features)[0]
|
|
|
|
| 85 |
return emotion_prediction, depression_prediction
|
|
|
|
| 21 |
emotion_prediction, depression_prediction = self.perform_emotion_analysis(audio_features)
|
| 22 |
return {
|
| 23 |
"emotion": emotion_prediction,
|
| 24 |
+
"depression": depression_prediction
|
| 25 |
}
|
| 26 |
|
| 27 |
def get_mfcc_features(self, features, padding):
|
|
|
|
| 33 |
return np.expand_dims(features, axis=0)
|
| 34 |
|
| 35 |
def preprocess_audio_data(self, base64_string, duration=2.5, desired_sr=22050*2, offset=0.5):
|
| 36 |
+
# audio_base64 = base64_string.replace("data:audio/webm;codecs=opus;base64,", "")
|
| 37 |
+
audio_bytes = base64.b64decode(base64_string)
|
| 38 |
+
audio_io = io.BytesIO(audio_bytes)
|
| 39 |
+
audio = AudioSegment.from_file(audio_io, format="webm")
|
| 40 |
+
|
| 41 |
+
byte_io = io.BytesIO()
|
| 42 |
+
audio.export(byte_io, format="wav")
|
| 43 |
+
byte_io.seek(0)
|
| 44 |
|
| 45 |
+
sample_rate, audio_array = wavfile.read(byte_io)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
+
audio_array = librosa.resample(audio_array.astype(float), orig_sr=sample_rate, target_sr=desired_sr)
|
| 48 |
+
start_sample = int(offset * desired_sr)
|
| 49 |
+
end_sample = start_sample + int(duration * desired_sr)
|
| 50 |
+
audio_array = audio_array[start_sample:end_sample]
|
| 51 |
|
| 52 |
+
|
| 53 |
+
# X, sample_rate = librosa.load(audio_io, duration=duration, sr=desired_sr, offset=offset)
|
| 54 |
+
X = librosa.util.normalize(audio_array)
|
| 55 |
+
return librosa.feature.mfcc(y=X, sr=desired_sr, n_mfcc=30)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
def perform_emotion_analysis(self, features, emotion_padding=216, depression_padding=2584):
|
| 58 |
+
emotion_features = self.get_mfcc_features(features, emotion_padding)
|
|
|
|
|
|
|
|
|
|
| 59 |
depression_features = self.get_mfcc_features(features, depression_padding)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
emotion_prediction = self.emotion_model.predict(emotion_features)[0]
|
| 61 |
emotion_prediction = self.emotion_labels[np.argmax(emotion_prediction)]
|
|
|
|
| 62 |
depression_prediction = self.depression_model.predict(depression_features)[0]
|
| 63 |
+
# depression_prediction = "Depressed" if depression_prediction >= 0.5 else "Not Depressed"
|
| 64 |
return emotion_prediction, depression_prediction
|