Spaces:

AIOmarRehan
/

Deep_Audio_Classifier_using_CNN

Running

App Files Files Community

AIOmarRehan commited on Nov 20, 2025

Commit

600df41

verified ·

1 Parent(s): 08a57d6

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -33

app.py CHANGED Viewed

@@ -1,20 +1,35 @@
 import gradio as gr
 import numpy as np
 from PIL import Image
-from app.preprocess import preprocess_audio
-from app.model import predict
-from collections import Counter, defaultdict
 import librosa
 import random
-from datasets import load_dataset
-# Load Hugging Face datasets
-audio_ds = load_dataset("AIOmarRehan/General_Audio_Dataset")
-image_ds = load_dataset("AIOmarRehan/Mel_Spectrogram_Images_for_Audio_Classification")
-# IMAGE HANDLING
 def safe_load_image(img):
     if img is None:
         return None
     if isinstance(img, np.ndarray):
@@ -22,17 +37,13 @@ def safe_load_image(img):
     img = img.convert("RGBA")
     return img
-# PROCESS SPECTROGRAM IMAGE
 def process_image_input(img):
     img = safe_load_image(img)
     label, confidence, probs = predict(img)
     return label, round(confidence, 3), probs
-# PROCESS RAW AUDIO
 def process_audio_input(audio_path):
-    imgs = preprocess_audio(audio_path)
     all_preds, all_confs, all_probs = [], [], []
     for img in imgs:
@@ -59,19 +70,16 @@ def process_audio_input(audio_path):
     return final_label, round(final_conf, 3), all_preds, [round(c, 3) for c in all_confs]
-# MAIN CLASSIFIER
-def classify(audio_path, image, random_audio, random_image):
-    # Load random audio if selected
-    if random_audio:
-        rand_sample = random.choice(audio_ds)
-        audio_path = rand_sample["audio"]["path"]
-    # Load random image if selected
-    if random_image:
-        rand_sample = random.choice(image_ds)
-        img_bytes = rand_sample["image"]
-        image = Image.open(img_bytes).convert("RGBA")
     # If spectrogram image
     if image is not None:
@@ -80,7 +88,7 @@ def classify(audio_path, image, random_audio, random_image):
             "Final Label": label,
             "Confidence": conf,
             "Details": probs
-        }, label
     # If raw audio
     if audio_path is not None:
@@ -90,12 +98,11 @@ def classify(audio_path, image, random_audio, random_image):
             "Confidence": conf,
             "All Chunk Labels": all_preds,
             "All Chunk Confidences": all_confs
-        }, label
     return "Please upload an audio file OR a spectrogram image.", ""
-# GRADIO UI
 interface = gr.Interface(
     fn=classify,
     inputs=[
@@ -111,7 +118,7 @@ interface = gr.Interface(
     title="General Audio Classifier (Audio + Spectrogram Support)",
     description=(
         "Upload a raw audio file OR a spectrogram image.\n"
-        "You can also select random samples from the Hugging Face datasets.\n"
         "The output shows a JSON with all details and a separate field for the final label."
     ),
 )

 import gradio as gr
 import numpy as np
 from PIL import Image
 import librosa
+import matplotlib.pyplot as plt
+import io
+import os
 import random
+from collections import Counter, defaultdict
+from app.model import predict
+from app.preprocess import preprocess_audio
+# Dataset Paths (download manually from Hugging Face)
+AUDIO_DATASET_DIR = "General_Audio_Dataset"
+IMAGE_DATASET_DIR = "Mel_Spectrogram_Images_for_Audio_Classification"
+# Get file lists
+audio_files = [
+    os.path.join(AUDIO_DATASET_DIR, f)
+    for f in os.listdir(AUDIO_DATASET_DIR)
+    if f.lower().endswith((".wav", ".mp3"))
+]
+image_files = [
+    os.path.join(IMAGE_DATASET_DIR, f)
+    for f in os.listdir(IMAGE_DATASET_DIR)
+    if f.lower().endswith(".png")
+]
+# Helper functions
 def safe_load_image(img):
+    """Ensure input is PIL RGBA image"""
     if img is None:
         return None
     if isinstance(img, np.ndarray):
     img = img.convert("RGBA")
     return img
 def process_image_input(img):
     img = safe_load_image(img)
     label, confidence, probs = predict(img)
     return label, round(confidence, 3), probs
 def process_audio_input(audio_path):
+    imgs = preprocess_audio(audio_path)  # returns list of PIL RGBA images
     all_preds, all_confs, all_probs = [], [], []
     for img in imgs:
     return final_label, round(final_conf, 3), all_preds, [round(c, 3) for c in all_confs]
+# Main classifier
+def classify(audio_path, image, random_audio=False, random_image=False):
+    # Pick random audio
+    if random_audio and audio_files:
+        audio_path = random.choice(audio_files)
+    # Pick random image
+    if random_image and image_files:
+        img_path = random.choice(image_files)
+        image = Image.open(img_path).convert("RGBA")
     # If spectrogram image
     if image is not None:
             "Final Label": label,
             "Confidence": conf,
             "Details": probs
+        }, label
     # If raw audio
     if audio_path is not None:
             "Confidence": conf,
             "All Chunk Labels": all_preds,
             "All Chunk Confidences": all_confs
+        }, label
     return "Please upload an audio file OR a spectrogram image.", ""
+# Gradio Interface
 interface = gr.Interface(
     fn=classify,
     inputs=[
     title="General Audio Classifier (Audio + Spectrogram Support)",
     description=(
         "Upload a raw audio file OR a spectrogram image.\n"
+        "You can also select random samples from the local datasets.\n"
         "The output shows a JSON with all details and a separate field for the final label."
     ),
 )