AIOmarRehan's picture
Create app.py
7651694 verified
raw
history blame
3.15 kB
import gradio as gr
import numpy as np
import librosa
from PIL import Image
import tempfile
import os
from app.preprocess import preprocess_audio
from app.model import predict
from collections import Counter, defaultdict
# Process Image Input
def process_image_input(img):
"""Classify a spectrogram image directly using model.predict"""
label, confidence, probs = predict(img)
return label, round(confidence, 3), probs
# Process Audio Input
def process_audio_input(audio_file):
# Save uploaded audio temporarily
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
tmp.write(audio_file)
tmp_path = tmp.name
# Preprocess β†’ mel-spectrogram chunks (list of PIL images)
imgs = preprocess_audio(tmp_path)
os.remove(tmp_path)
# Predict on each chunk
all_preds = []
all_confs = []
all_probs = []
for img in imgs:
label, conf, probs = predict(img)
all_preds.append(label)
all_confs.append(conf)
all_probs.append(probs)
# Majority Vote
counter = Counter(all_preds)
max_count = max(counter.values())
candidates = [k for k, v in counter.items() if v == max_count]
if len(candidates) == 1:
final_label = candidates[0]
else:
conf_sums = defaultdict(float)
for i, label in enumerate(all_preds):
if label in candidates:
conf_sums[label] += all_confs[i]
final_label = max(conf_sums, key=conf_sums.get)
final_conf = float(np.mean([all_confs[i] for i, l in enumerate(all_preds) if l == final_label]))
return final_label, round(final_conf, 3), all_preds, [round(c, 3) for c in all_confs]
# MAIN GRADIO CLASSIFICATION PIPELINE (AUDIO OR IMAGE)
def classify(audio, image):
# If image is provided β†’ classify image
if image is not None:
label, conf, probs = process_image_input(image)
return {
"Final Label": label,
"Confidence": conf,
"Details": probs
}
# If audio is provided β†’ preprocess audio β†’ classify
if audio is not None:
label, conf, all_preds, all_confs = process_audio_input(audio)
return {
"Final Label": label,
"Confidence": conf,
"All Chunk Labels": all_preds,
"All Chunk Confidences": all_confs
}
# Nothing provided
return "Please upload an audio file OR a spectrogram image."
# GRADIO UI
interface = gr.Interface(
fn=classify,
inputs=[
gr.Audio(type="bytes", label="Upload Audio (WAV/MP3)"),
gr.Image(type="pil", label="Upload Spectrogram Image")
],
outputs=gr.JSON(label="Prediction Results"),
title="General Audio Classifier (Audio + Spectrogram Support)",
description=(
"Upload a raw audio file OR a spectrogram image.\n"
"The app automatically detects the input type:\n"
"β€’ If audio β†’ the model preprocesses it into mel spectrogram chunks.\n"
"β€’ If spectrogram β†’ the model classifies it directly.\n"
"Built using CNN + Mel-Spectrogram + Gradio."
),
)
interface.launch()