KeDiVoInRe / app.py
nikzarifie's picture
Update app.py
7eefc16 verified
import gradio as gr
import librosa
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from matplotlib.backends.backend_agg import FigureCanvasAgg
from keras.models import load_model
# Load the Keras model
model = load_model('test.h5')
# Define classes
classes = ['Class 1', 'Class 2', 'Class 3', 'Class 4', 'Class 5']
def classify_syllable(voice):
# Convert voice to spectrogram image
img = voice_to_image(voice)
# Perform prediction
prediction = model.predict(img[np.newaxis, ..., np.newaxis])[0]
# Get class label
predicted_class = classes[np.argmax(prediction)]
# Plot spectrogram
plot = plot_spectrogram(img)
# Return prediction and spectrogram plot
return predicted_class, plot
def voice_to_image(voice):
audio_data, sample_rate = librosa.load(voice, sr=None)
spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)
fig, ax = plt.subplots()
ax.imshow(spectrogram_db, aspect='auto', cmap='viridis')
ax.axis('off')
fig.tight_layout(pad=0)
return fig
def plot_spectrogram(img):
canvas = FigureCanvasAgg(img)
canvas.draw()
image_data = np.frombuffer(canvas.tostring_rgb(), dtype='uint8')
image_data = image_data.reshape(img.get_width_height()[::-1] + (3,))
return image_data
# Define Gradio interface
voice_input = gr.inputs.Audio(source="microphone", type="file", label="Record Voice")
submit_button = gr.inputs.Button(text="Submit")
output_label = gr.outputs.Label(label="Predicted Class")
output_image = gr.outputs.Image(type="numpy", label="Spectrogram")
gr.Interface(
fn=classify_syllable,
inputs=voice_input,
outputs=[output_label, output_image],
title="Malay Language Syllable Classification",
description="Record your voice and classify the Malay language syllable into one of five classes.",
theme="compact",
allow_flagging=False,
allow_screenshot=False,
allow_browser_cache=False
).launch()