Spaces:

alvi123
/

speechpredict

Runtime error

App Files Files Community

alvi123 commited on Jan 22, 2023

Commit

296d077

1 Parent(s): f7a2a43

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -91

app.py CHANGED Viewed

@@ -1,103 +1,102 @@
 import gradio as gr
-import wave
-import matplotlib.pyplot as plt
-import numpy as np
-from extract_features import *
-import pickle
-import soundfile
 import librosa
-classifier = pickle.load(open('finalized_rf.sav', 'rb'))
-def emotion_predict(input):
-  input_features = extract_feature(input, mfcc=True, chroma=True, mel=True, contrast=True, tonnetz=True)
-  rf_prediction = classifier.predict(input_features.reshape(1,-1))
-  if rf_prediction == 'happy':
-    return 'kata-kerja '
-  elif rf_prediction == 'neutral':
-    return 'kata-benda '
-  elif rf_prediction == 'sad':
-    return 'kata-sifat '
-  else:
-    return 'kata-keterangan'
-def plot_fig(input):
-  wav = wave.open(input, 'r')
-  raw = wav.readframes(-1)
-  raw = np.frombuffer(raw, "int16")
-  sampleRate = wav.getframerate()
-  Time = np.linspace(0, len(raw)/sampleRate, num=len(raw))
-  fig = plt.figure()
-  plt.rcParams["figure.figsize"] = (50,15)
-  plt.title("Waveform Of the Audio", fontsize=25)
-  plt.xticks(fontsize=15)
-  plt.yticks(fontsize=15)
-  plt.ylabel("Amplitude", fontsize=25)
-  plt.plot(Time, raw, color='red')
-  return fig
-with gr.Blocks() as app:
-  gr.Markdown(
         """
-    # Speech Detected 🎵😍
-    This application classifies inputted audio 🔊 according to the prediction into four categories:
-    1. kata-benda 😎
-    2. kata-kerja 😐
-    3. kata-sifat 😢
-    4. kata-keterangan 😤
-    """
-  )
-  with gr.Tab("Record Audio"):
-    record_input = gr.Audio(source="microphone", type="filepath")
-    with gr.Accordion("Audio Visualization", open=False):
-      gr.Markdown(
-          """
-      ### Visualization will work only after Audio has been submitted
-      """
-      )
-      plot_record = gr.Button("Display Audio Signal")
-      plot_record_c = gr.Plot(label='Waveform Of the Audio')
-    record_button = gr.Button("Detect Emotion")
-    record_output = gr.Text(label = 'Emotion Detected')
-  with gr.Tab("Upload Audio File"):
-    gr.Markdown(
         """
-    ## Uploaded Audio should be of .wav format
-    """
     )
-    upload_input = gr.Audio(type="filepath")
-    with gr.Accordion("Audio Visualization", open=False):
-      gr.Markdown(
-          """
-      ### Visualization will work only after Audio has been submitted
-      """
-      )
-      plot_upload = gr.Button("Display Audio Signal")
-      plot_upload_c = gr.Plot(label='Waveform Of the Audio')
-    upload_button = gr.Button("Detect Emotion")
-    upload_output = gr.Text(label = 'Emotion Detected')
-  record_button.click(emotion_predict, inputs=record_input, outputs=record_output)
-  upload_button.click(emotion_predict, inputs=upload_input, outputs=upload_output)
-  plot_record.click(plot_fig, inputs=record_input, outputs=plot_record_c)
-  plot_upload.click(plot_fig, inputs=upload_input, outputs=plot_upload_c)
-app.launch()

 import gradio as gr
 import librosa
+import matplotlib.pyplot as plt
+import plotly.express as px
+from radar_chart import radar_factory
+from keras.models import load_model
+import os
+import numpy as np
+model = load_model(os.path.join("model", "Emotion_Voice_Detection_Model_tuned_2.h5"))
+def convert_class_to_emotion(pred):
+        """
+        Method to convert the predictions (int) into human readable strings.
         """
+        # label_conversion = {0: 'neutral',
+        #                     1: 'calm',
+        #                     2: 'happy',
+        #                     3: 'sad',
+        #                     4: 'angry',
+        #                     5: 'fearful',
+        #                     6: 'disgust',
+        #                     7: 'surprised'}
+        label_conversion = {0: 'very happy',
+                            1: 'happy',
+                            2: 'very happy',
+                            3: 'very unhappy',
+                            4: 'very unhappy',
+                            5: 'unhappy',
+                            6: 'unhappy',
+                            7: 'happy'}
+        return label_conversion[int(pred)]
+def make_predictions(file, micro=None):
         """
+        Method to process the files and create your features.
+        """
+        if file is not None and micro is None:
+            input_audio = file
+        elif file is None and micro is not None:
+            input_audio = micro
+        else:
+            print("THERE IS A PROBLEM")
+            input_audio = file
+        data, sampling_rate = librosa.load(input_audio)
+        print(data)
+        print(f"THE SAMPLING RATE IS {sampling_rate}")
+        mfccs = np.mean(librosa.feature.mfcc(y=data, sr=sampling_rate, n_mfcc=40).T, axis=0)
+        x = np.expand_dims(mfccs, axis=1)
+        x = np.expand_dims(x, axis=0)
+        predictions = np.argmax(model.predict(x), axis=1)
+        N = 8
+        theta = radar_factory(N, frame='polygon')
+        spoke_labels = np.array(['neutral',
+                            'calm',
+                            'happy',
+                            'sad',
+                            'angry',
+                            'fearful',
+                            'disgust',
+                            'surprised'])
+        fig_radar, axs = plt.subplots(figsize=(8, 8), nrows=1, ncols=1,
+                            subplot_kw=dict(projection='radar'))
+        vec = model.predict(x)[0]
+        axs.plot(theta, vec, color="b")
+        axs.fill(theta, vec, alpha=0.3)
+        axs.set_varlabels(spoke_labels)
+        fig = plt.figure()
+        plt.plot(data, alpha=0.8)
+        plt.xlabel("temps")
+        plt.ylabel("amplitude")
+        return convert_class_to_emotion(predictions), fig, fig_radar
+# Set the starting state to an empty string
+iface = gr.Interface(
+    fn=make_predictions,
+    title="identify emotion of a chunk of audio speech",
+    description="a simple interface to perform emotion recognition from an audio file",
+    article="Author: <a href=\"https://huggingface.co/poisso\">Poisso</a>.",
+    inputs=[gr.Audio(source="upload", type="filepath", label="File"),
+        gr.Audio(source="microphone", type="filepath", streaming=False, label="Microphone")]
+    ,
+    examples=[[os.path.join("examples", filename)] for filename in os.listdir("examples")],
+    outputs=[gr.Textbox(label="Text output"), gr.Plot(), gr.Plot()]
     )
+iface.launch(debug=True)