Spaces:

Chatanya
/

emotion-detection-streamlit-app

Build error

App Files Files Community

Chatanya commited on Feb 12, 2025

Commit

1348175

verified ·

1 Parent(s): 629b847

Create app.py

Browse files

Files changed (1) hide show

app.py +146 -0

app.py ADDED Viewed

	@@ -0,0 +1,146 @@

+'''
+Outline:
+- Create animation: animate charts (potentially using streamlit)
+'''
+import librosa
+import streamlit as st
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import pickle
+import keras
+import tensorflow
+import matplotlib.animation as animation
+model_path = "model_simple.sav" #Defines the path to the model file
+emotion_map = {
+        'Disgust': 0,
+        'Happiness': 1,
+        'Saddness': 2,
+        'Neutral': 3,
+        'Fear': 4,
+        'Anger': 5,
+        'Surprise': 6
+    } #Maps emotions to integers: taken from data preprocessing
+reversed_emotion_map = {value:key for key, value in emotion_map.items()}
+#Reverses emotion mapping such that integers can be mapped into emotions
+#Uses librosa to load the inputted audio file as a list of frequency values
+@st.cache_data
+def process_audio(input_file):
+    st.audio(input_file) #Creates an audio player within the streamlit app
+    audio_signal, sample_rate = librosa.load(input_file)
+    return audio_signal, sample_rate
+#Creates a line chart displaying the audio frequency using librosa
+def display_spectrum_animation(audio_signal, sample_rate):
+    S = np.abs(librosa.stft(audio_signal))
+    frequencies = librosa.fft_frequencies(sr=sample_rate)
+    fig, ax = plt.subplots()
+    def update_spectrum(num, S, ax):
+        ax.clear()
+        ax.plot(frequencies, S[:, num])
+        ax.set_xlabel("Frequency (Hz)")
+        ax.set_ylabel("Amplitude")
+    ani = animation.FuncAnimation(fig, update_spectrum, frames=S.shape[1], fargs=[S, ax], blit=False)
+    ani.save("spectrum_animation.gif", writer="imagemagick")
+    st.image("spectrum_animation.gif")
+@st.cache_data
+def display_frequency(audio_signal, sample_rate):
+    frequency_plot = librosa.display.waveshow(audio_signal, sr = sample_rate)
+    st.pyplot(plt.gcf())
+#Creates and displays a mel spectrogram using librosa
+@st.cache_data
+def display_mel_spectogram(audio_signal, sample_rate):
+    fig, ax = plt.subplots()
+    audio_time = audio_signal.shape[0]/sample_rate
+    D = librosa.amplitude_to_db(np.abs(librosa.stft(audio_signal)), ref = np.max)
+    amt_to_add = int(D.shape[-1]/audio_time)
+    specshow = librosa.display.specshow(D, sr = sample_rate, x_axis = "time", y_axis = "log", ax = ax)
+    def update_spectrogram (num, D, ax, plus):
+        ax.clear()
+        librosa.display.specshow(D[:, :num + plus], sr = sample_rate, x_axis = "time", y_axis = "log", ax = ax)
+    ani = animation.FuncAnimation(fig, update_spectrogram, frames = np.arange(1, D.shape[1]), fargs = [D, ax, amt_to_add], blit = False)
+    ani.save("spectrogram_animation.gif", writer = "imagemagick")
+    st.image("spectrogram_animation.gif")
+#Creates the interface allowing users to select which plot they want displayed
+def create_selections(audio_signal, sample_rate):
+    chart_options = ["Spectrum", "Mel-Spectogram"] #Graph titles go here
+    functions = [display_spectrum_animation, display_mel_spectogram] #Graphing functions go here
+    chart_selector = st.radio(
+        label = "",
+        options = chart_options,
+        horizontal = True
+    )
+    selection_index = chart_options.index(chart_selector)
+    functions[selection_index](audio_signal, sample_rate)
+#Helper function to force the length of a given frequency array into a specific length
+#Currently, this length is hard-coded at 66,150 though that may change in the future
+@st.cache_data
+def standardize_waveform_length(waveform):
+    audio_length = 66150
+    if len(waveform) > audio_length:
+        waveform = waveform[:audio_length]
+    else:
+        waveform = np.pad(waveform, (0, max(0, audio_length - len(waveform))), "constant")
+    return waveform
+#Takes in a given audio signal and returns its mel-frequency cepstral coefficients
+@st.cache_data
+def preprocess_audio_for_prediction(audio_signal, sample_rate):
+    waveform = standardize_waveform_length(waveform = audio_signal)
+    mfcc = librosa.feature.mfcc(y = waveform, sr = sample_rate, n_mels = 128)
+    mfcc = mfcc.reshape(-1)
+    return mfcc
+#Loads the model given in model_path and returns a Keras Sequential model
+@st.cache_data
+def load_model(model_path):
+    model = pickle.load(open(model_path, "rb"))
+    return model
+#Uses the model to predict the speaker's emotion in the given audio clip
+@st.cache_data
+def get_emotion_prediction(mfcc):
+    model = load_model(model_path)
+    prediction = model.predict(mfcc[None])
+    predicted_index = np.argmax(prediction)
+    emotion = reversed_emotion_map[predicted_index]
+    return emotion
+#Combines all model functions and displays the model output as a subheader
+@st.cache_data
+def display_prediction(audio_signal, sample_rate):
+    mfcc = preprocess_audio_for_prediction(audio_signal, sample_rate)
+    prediction = get_emotion_prediction(mfcc)
+    st.subheader("Predicted Emotion: " + prediction, divider = True)
+#Defines the entire process of inputting audio, displaying the model's predictions, and displaying graphs
+def run(input_file):
+    audio_signal, sample_rate = process_audio(input_file)
+    display_prediction(audio_signal, sample_rate)
+    create_selections(audio_signal, sample_rate)
+#Creates an input area to upload the file
+def main():
+    st.header("Upload your file here")
+    file_uploader = st.file_uploader("", type = "wav")
+    if file_uploader is not None:
+        run(file_uploader)
+if __name__ == "__main__":
+    main()