Spaces:

alvi123
/

demo_apps

Runtime error

App Files Files Community

alvi123 commited on Jan 22, 2023

Commit

51c4e93

1 Parent(s): 883f3cb

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -95

app.py CHANGED Viewed

@@ -1,102 +1,23 @@
 import gradio as gr
-import wave
-import matplotlib.pyplot as plt
-import numpy as np
-from extract_features import *
-import pickle
-import soundfile
-import librosa
-classifier = pickle.load(open('finalized_rf.sav', 'rb'))
-def emotion_predict(input):
-  input_features = extract_feature(input, mfcc=True, chroma=True, mel=True, contrast=True, tonnetz=True)
-  rf_prediction = classifier.predict(input_features.reshape(1,-1))
-  if rf_prediction == 'happy':
-    return 'kata-kerja '
-  elif rf_prediction == 'neutral':
-    return 'kata-benda '
-  elif rf_prediction == 'sad':
-    return 'kata-sifat '
-  else:
-    return 'kata-keterangan'
-def plot_fig(input):
-  wav = wave.open(input, 'r')
-  raw = wav.readframes(-1)
-  raw = np.frombuffer(raw, "int16")
-  sampleRate = wav.getframerate()
-  Time = np.linspace(0, len(raw)/sampleRate, num=len(raw))
-  fig = plt.figure()
-  plt.rcParams["figure.figsize"] = (50,15)
-  plt.title("Waveform Of the Audio", fontsize=25)
-  plt.xticks(fontsize=15)
-  plt.yticks(fontsize=15)
-  plt.ylabel("Amplitude", fontsize=25)
-  plt.plot(Time, raw, color='red')
-  return fig
-with gr.Blocks() as app:
-  gr.Markdown(
-        """
-   # Speech Detected 🎵😍
-    This application classifies inputted audio 🔊 according to the prediction into four categories:
-    1. kata-benda 😎
-    2. kata-kerja 😐
-    3. kata-sifat 😢
-    4. kata-keterangan 😤
-    """
-  )
-  with gr.Tab("Record Audio"):
-    record_input = gr.Audio(source="microphone", type="filepath")
-    with gr.Accordion("Audio Visualization", open=False):
-      gr.Markdown(
-          """
-      ### Visualization will work only after Audio has been submitted
-      """
-      )
-      plot_record = gr.Button("Display Audio Signal")
-      plot_record_c = gr.Plot(label='Waveform Of the Audio')
-    record_button = gr.Button("Detect Emotion")
-    record_output = gr.Text(label = 'Emotion Detected')
-  with gr.Tab("Upload Audio File"):
-    gr.Markdown(
-        """
-    ## Uploaded Audio should be of .wav format
-    """
-    )
-    upload_input = gr.Audio(type="filepath")
-    with gr.Accordion("Audio Visualization", open=False):
-      gr.Markdown(
-          """
-      ### Visualization will work only after Audio has been submitted
-      """
-      )
-      plot_upload = gr.Button("Display Audio Signal")
-      plot_upload_c = gr.Plot(label='Waveform Of the Audio')
-    upload_button = gr.Button("Detect Emotion")
-    upload_output = gr.Text(label = 'Emotion Detected')
-  record_button.click(emotion_predict, inputs=record_input, outputs=record_output)
-  upload_button.click(emotion_predict, inputs=upload_input, outputs=upload_output)
-  plot_record.click(plot_fig, inputs=record_input, outputs=plot_record_c)
-  plot_upload.click(plot_fig, inputs=upload_input, outputs=plot_upload_c)
-app.launch()

 import gradio as gr
+from transformers import pipeline
+model = pipeline(task="automatic-speech-recognition",
+                 model="facebook/s2t-medium-librispeech-asr")
+def predict_speech_to_text(audio):
+    prediction = model(audio)
+    text = prediction['text']
+    return text
+gr.Interface(fn=predict_speech_to_text,
+             title="Automatic Speech Recognition (ASR)",
+             inputs=gr.inputs.Audio(
+                 source="microphone", type="filepath", label="Input"),
+             outputs=gr.outputs.Textbox(label="Output"),
+             description="Using pipeline with Facebook S2T for ASR.",
+             examples=['ljspeech.wav'],
+             allow_flagging='never'
+             ).launch(share=True)