Spaces:

AIOmarRehan
/

Deep_Audio_Classifier_using_CNN

Running

App Files Files Community

Deep_Audio_Classifier_using_CNN / app.py

AIOmarRehan

Update app.py

904154d verified 25 days ago

raw

history blame

2.94 kB

	import gradio as gr
	import numpy as np
	import librosa
	from PIL import Image
	import tempfile
	import os
	from app.preprocess import preprocess_audio
	from app.model import predict
	from collections import Counter, defaultdict


	# Process Image Input
	def process_image_input(img):
	# Classify a spectrogram image directly using model.predict
	label, confidence, probs = predict(img)
	return label, round(confidence, 3), probs


	# Process Audio Input
	def process_audio_input(audio_path):
	# audio_path = filepath from Gradio
	# Preprocess → mel-spectrogram → predict per chunk

	# Preprocess to mel-spectrogram chunk images
	imgs = preprocess_audio(audio_path)

	all_preds = []
	all_confs = []
	all_probs = []

	for img in imgs:
	label, conf, probs = predict(img)
	all_preds.append(label)
	all_confs.append(conf)
	all_probs.append(probs)

	# Majority Vote
	counter = Counter(all_preds)
	max_count = max(counter.values())
	candidates = [k for k, v in counter.items() if v == max_count]

	if len(candidates) == 1:
	final_label = candidates[0]
	else:
	conf_sums = defaultdict(float)
	for i, label in enumerate(all_preds):
	if label in candidates:
	conf_sums[label] += all_confs[i]
	final_label = max(conf_sums, key=conf_sums.get)

	final_conf = float(np.mean([all_confs[i] for i, l in enumerate(all_preds) if l == final_label]))

	return final_label, round(final_conf, 3), all_preds, [round(c, 3) for c in all_confs]


	# Main prediction logic
	def classify(audio_path, image):

	# If an image is provided → classify directly
	if image is not None:
	label, conf, probs = process_image_input(image)
	return {
	"Final Label": label,
	"Confidence": conf,
	"Details": probs
	}

	# If an audio file is provided → preprocess and classify
	if audio_path is not None:
	label, conf, all_preds, all_confs = process_audio_input(audio_path)

	return {
	"Final Label": label,
	"Confidence": conf,
	"All Chunk Labels": all_preds,
	"All Chunk Confidences": all_confs
	}

	# Neither provided
	return "Please upload an audio file OR a spectrogram image."


	# GRADIO UI
	interface = gr.Interface(
	fn=classify,
	inputs=[
	gr.Audio(type="filepath", label="Upload Audio (WAV/MP3)"),
	gr.Image(type="pil", label="Upload Spectrogram Image")
	],
	outputs=gr.JSON(label="Prediction Results"),
	title="General Audio Classifier (Audio + Spectrogram Support)",
	description=(
	"Upload a raw audio file OR a spectrogram image.\n"
	"If audio → model preprocesses into mel-spectrogram chunks.\n"
	"If image → model classifies the spectrogram directly.\n"
	"Built using CNN + Mel-Spectrogram + Gradio."
	),
	)

	interface.launch()