AIOmarRehan's picture
Update app.py
904154d verified
raw
history blame
2.94 kB
import gradio as gr
import numpy as np
import librosa
from PIL import Image
import tempfile
import os
from app.preprocess import preprocess_audio
from app.model import predict
from collections import Counter, defaultdict
# Process Image Input
def process_image_input(img):
# Classify a spectrogram image directly using model.predict
label, confidence, probs = predict(img)
return label, round(confidence, 3), probs
# Process Audio Input
def process_audio_input(audio_path):
# audio_path = filepath from Gradio
# Preprocess β†’ mel-spectrogram β†’ predict per chunk
# Preprocess to mel-spectrogram chunk images
imgs = preprocess_audio(audio_path)
all_preds = []
all_confs = []
all_probs = []
for img in imgs:
label, conf, probs = predict(img)
all_preds.append(label)
all_confs.append(conf)
all_probs.append(probs)
# Majority Vote
counter = Counter(all_preds)
max_count = max(counter.values())
candidates = [k for k, v in counter.items() if v == max_count]
if len(candidates) == 1:
final_label = candidates[0]
else:
conf_sums = defaultdict(float)
for i, label in enumerate(all_preds):
if label in candidates:
conf_sums[label] += all_confs[i]
final_label = max(conf_sums, key=conf_sums.get)
final_conf = float(np.mean([all_confs[i] for i, l in enumerate(all_preds) if l == final_label]))
return final_label, round(final_conf, 3), all_preds, [round(c, 3) for c in all_confs]
# Main prediction logic
def classify(audio_path, image):
# If an image is provided β†’ classify directly
if image is not None:
label, conf, probs = process_image_input(image)
return {
"Final Label": label,
"Confidence": conf,
"Details": probs
}
# If an audio file is provided β†’ preprocess and classify
if audio_path is not None:
label, conf, all_preds, all_confs = process_audio_input(audio_path)
return {
"Final Label": label,
"Confidence": conf,
"All Chunk Labels": all_preds,
"All Chunk Confidences": all_confs
}
# Neither provided
return "Please upload an audio file OR a spectrogram image."
# GRADIO UI
interface = gr.Interface(
fn=classify,
inputs=[
gr.Audio(type="filepath", label="Upload Audio (WAV/MP3)"),
gr.Image(type="pil", label="Upload Spectrogram Image")
],
outputs=gr.JSON(label="Prediction Results"),
title="General Audio Classifier (Audio + Spectrogram Support)",
description=(
"Upload a raw audio file OR a spectrogram image.\n"
"If audio β†’ model preprocesses into mel-spectrogram chunks.\n"
"If image β†’ model classifies the spectrogram directly.\n"
"Built using CNN + Mel-Spectrogram + Gradio."
),
)
interface.launch()