Spaces:

lyimo
/

dove

Runtime error

dove / app.py

Update app.py

4856025 verified over 1 year ago

1.69 kB

	import gradio as gr
	from fastai.vision.all import *
	import librosa
	import numpy as np
	import matplotlib.pyplot as plt
	from pydub import AudioSegment
	import tempfile
	import PIL

	learn = load_learner('model.pkl')
	labels = learn.dls.vocab

	def audio_to_spectrogram(audio_file):
	if isinstance(audio_file, str):
	if audio_file.endswith('.mp3'):
	with tempfile.NamedTemporaryFile(suffix='.wav') as temp_wav:
	audio = AudioSegment.from_mp3(audio_file)
	audio.export(temp_wav.name, format='wav')
	y, sr = librosa.load(temp_wav.name, sr=None)
	else:
	y, sr = librosa.load(audio_file, sr=None)
	else:
	y, sr = librosa.load(audio_file, sr=None)

	S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
	S_dB = librosa.power_to_db(S, ref=np.max)
	fig, ax = plt.subplots()
	img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
	fig.colorbar(img, ax=ax, format='%+2.0f dB')
	ax.set(title='Mel-frequency spectrogram')
	spectrogram_file = "spectrogram.png"
	plt.savefig(spectrogram_file)
	plt.close()
	return spectrogram_file

	def predict(audio):
	spectrogram_file = audio_to_spectrogram(audio)
	img = PILImage.create(spectrogram_file)
	img = img.resize((512, 512))
	pred, pred_idx, probs = learn.predict(img)
	return {labels[i]: float(probs[i]) for i in range(len(labels))}

	gr.Interface(
	fn=predict,
	inputs=[
	gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or Record audio (WAV or MP3)"),
	],
	outputs=gr.components.Label(num_top_classes=3),
	live=True
	).launch()