Spaces:

AIOmarRehan
/

Deep_Audio_Classifier_using_CNN

Sleeping

App Files Files Community

Deep_Audio_Classifier_using_CNN / app.py

AIOmarRehan

Create app.py

7651694 verified 24 days ago

raw

history blame

3.15 kB

	import gradio as gr
	import numpy as np
	import librosa
	from PIL import Image
	import tempfile
	import os
	from app.preprocess import preprocess_audio
	from app.model import predict
	from collections import Counter, defaultdict


	# Process Image Input
	def process_image_input(img):
	"""Classify a spectrogram image directly using model.predict"""
	label, confidence, probs = predict(img)
	return label, round(confidence, 3), probs


	# Process Audio Input
	def process_audio_input(audio_file):

	# Save uploaded audio temporarily
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
	tmp.write(audio_file)
	tmp_path = tmp.name

	# Preprocess → mel-spectrogram chunks (list of PIL images)
	imgs = preprocess_audio(tmp_path)

	os.remove(tmp_path)

	# Predict on each chunk
	all_preds = []
	all_confs = []
	all_probs = []

	for img in imgs:
	label, conf, probs = predict(img)
	all_preds.append(label)
	all_confs.append(conf)
	all_probs.append(probs)

	# Majority Vote
	counter = Counter(all_preds)
	max_count = max(counter.values())
	candidates = [k for k, v in counter.items() if v == max_count]

	if len(candidates) == 1:
	final_label = candidates[0]
	else:
	conf_sums = defaultdict(float)
	for i, label in enumerate(all_preds):
	if label in candidates:
	conf_sums[label] += all_confs[i]
	final_label = max(conf_sums, key=conf_sums.get)

	final_conf = float(np.mean([all_confs[i] for i, l in enumerate(all_preds) if l == final_label]))

	return final_label, round(final_conf, 3), all_preds, [round(c, 3) for c in all_confs]


	# MAIN GRADIO CLASSIFICATION PIPELINE (AUDIO OR IMAGE)
	def classify(audio, image):

	# If image is provided → classify image
	if image is not None:
	label, conf, probs = process_image_input(image)
	return {
	"Final Label": label,
	"Confidence": conf,
	"Details": probs
	}

	# If audio is provided → preprocess audio → classify
	if audio is not None:
	label, conf, all_preds, all_confs = process_audio_input(audio)

	return {
	"Final Label": label,
	"Confidence": conf,
	"All Chunk Labels": all_preds,
	"All Chunk Confidences": all_confs
	}

	# Nothing provided
	return "Please upload an audio file OR a spectrogram image."


	# GRADIO UI
	interface = gr.Interface(
	fn=classify,
	inputs=[
	gr.Audio(type="bytes", label="Upload Audio (WAV/MP3)"),
	gr.Image(type="pil", label="Upload Spectrogram Image")
	],
	outputs=gr.JSON(label="Prediction Results"),
	title="General Audio Classifier (Audio + Spectrogram Support)",
	description=(
	"Upload a raw audio file OR a spectrogram image.\n"
	"The app automatically detects the input type:\n"
	"• If audio → the model preprocesses it into mel spectrogram chunks.\n"
	"• If spectrogram → the model classifies it directly.\n"
	"Built using CNN + Mel-Spectrogram + Gradio."
	),
	)

	interface.launch()