Spaces:

joshieyu
/

AudioClassifier

Sleeping

App Files Files Community

AudioClassifier / app.py

joshieyu

Updated website description

0a1e9ce over 1 year ago

raw

history blame contribute delete

2.28 kB

	import gradio as gr
	from fastai.vision.all import load_learner, PILImage
	import librosa
	import librosa.display
	import matplotlib.pyplot as plt
	import numpy as np
	import os
	import pathlib

	if os.name != 'nt':
	pathlib.WindowsPath = pathlib.PosixPath
	# For Windows
	# learn_inf = load_learner('export.pkl')
	# else:
	# learn_inf = load_learner('export.pkl')

	# Load your fastai model
	learn_inf = load_learner('export.pkl')


	# Function to save mel spectrogram and run inference
	def save_mel_spectrogram_and_predict(wav_path):
	# Define paths
	output_dir = 'temp_spectrograms'
	os.makedirs(output_dir, exist_ok=True) # Ensure the directory exists
	output_path = os.path.join(output_dir, 'temp_spectrogram.png')

	# Load the audio file
	y, sr = librosa.load(wav_path, sr=16000)

	# Compute the mel spectrogram
	S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
	S_dB = librosa.power_to_db(S, ref=np.max)

	# Save the mel spectrogram as an image
	plt.figure(figsize=(10, 4))
	librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', cmap='viridis')
	# plt.colorbar(format='%+2.0f dB')
	# plt.title('Mel spectrogram')
	plt.axis('off')
	plt.savefig(output_path, bbox_inches='tight', pad_inches=0, format='png')
	plt.close()

	# Run inference on the saved mel spectrogram image
	img = PILImage.create(output_path)
	pred_class, pred_idx, probs = learn_inf.predict(img)

	return output_path, {learn_inf.dls.vocab[i]: float(probs[i]) for i in range(len(probs))}

	# Gradio interface function
	def gradio_interface(audio):
	spectrogram_path, predictions = save_mel_spectrogram_and_predict(audio)
	return spectrogram_path, predictions

	# Create the Gradio interface
	interface = gr.Interface(
	fn=gradio_interface,
	inputs=gr.Audio(sources="upload", type="filepath"),
	outputs=[gr.Image(type="filepath", label="Mel Spectrogram"), gr.JSON(label="Class Probabilities")],
	title="Audio Classification with Mel Spectrogram",
	description=
	"Upload an audio file to see its mel spectrogram and classification probabilities. Currently supports acoustic guitar, electric guitar, bass, synth lead, and synth pad."
	)

	# Launch the interface
	interface.launch()