Spaces:

AIOmarRehan
/

Deep_Audio_Classifier_using_CNN

Running

App Files Files Community

AIOmarRehan commited on Nov 20, 2025

Commit

e0d79c7

verified ·

1 Parent(s): d8c58cd

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -28

app.py CHANGED Viewed

@@ -1,29 +1,18 @@
 import gradio as gr
 import numpy as np
 from PIL import Image
-import os
 import random
 from collections import Counter, defaultdict
 from app.model import predict
 from app.preprocess import preprocess_audio
-# Dataset Paths (download manually from Hugging Face or put in space files)
-AUDIO_DATASET_DIR = "General_Audio_Dataset"
-IMAGE_DATASET_DIR = "Mel_Spectrogram_Images_for_Audio_Classification"
-# Get file lists safely
-audio_files = [
-    os.path.join(AUDIO_DATASET_DIR, f)
-    for f in os.listdir(AUDIO_DATASET_DIR)
-    if f.lower().endswith((".wav", ".mp3"))
-] if os.path.exists(AUDIO_DATASET_DIR) else []
-image_files = [
-    os.path.join(IMAGE_DATASET_DIR, f)
-    for f in os.listdir(IMAGE_DATASET_DIR)
-    if f.lower().endswith(".png")
-] if os.path.exists(IMAGE_DATASET_DIR) else []
 def safe_load_image(img):
     if img is None:
         return None
@@ -32,13 +21,13 @@ def safe_load_image(img):
     img = img.convert("RGBA")
     return img
-# Process image
 def process_image_input(img):
     img = safe_load_image(img)
     label, confidence, probs = predict(img)
     return label, round(confidence, 3), probs
-# Process audio
 def process_audio_input(audio_path):
     imgs = preprocess_audio(audio_path)
     all_preds, all_confs, all_probs = [], [], []
@@ -68,14 +57,24 @@ def process_audio_input(audio_path):
 # Main classifier
 def classify(audio_path, image, random_audio=False, random_image=False):
-    # Pick random audio if selected
-    if random_audio and audio_files:
-        audio_path = random.choice(audio_files)
-    # Pick random image if selected
-    if random_image and image_files:
-        img_path = random.choice(image_files)
-        image = Image.open(img_path).convert("RGBA")
     # If spectrogram image
     if image is not None:
@@ -114,7 +113,7 @@ interface = gr.Interface(
     title="General Audio Classifier (Audio + Spectrogram Support)",
     description=(
         "Upload a raw audio file OR a spectrogram image.\n"
-        "You can also select random samples from the local datasets.\n"
         "The output shows a JSON with all details and a separate field for the final label."
     ),
 )

 import gradio as gr
 import numpy as np
 from PIL import Image
 import random
+import io
 from collections import Counter, defaultdict
+from datasets import load_dataset
 from app.model import predict
 from app.preprocess import preprocess_audio
+# Load Hugging Face datasets directly
+audio_ds = load_dataset("AIOmarRehan/General_Audio_Dataset", split="train")
+image_ds = load_dataset("AIOmarRehan/Mel_Spectrogram_Images_for_Audio_Classification", split="train")
+# Helper function to safely load images
 def safe_load_image(img):
     if img is None:
         return None
     img = img.convert("RGBA")
     return img
+# Process spectrogram image
 def process_image_input(img):
     img = safe_load_image(img)
     label, confidence, probs = predict(img)
     return label, round(confidence, 3), probs
+# Process raw audio
 def process_audio_input(audio_path):
     imgs = preprocess_audio(audio_path)
     all_preds, all_confs, all_probs = [], [], []
 # Main classifier
 def classify(audio_path, image, random_audio=False, random_image=False):
+    # Pick random audio from HF dataset
+    if random_audio and len(audio_ds) > 0:
+        sample = random.choice(audio_ds)
+        # If dataset stores audio as file path or array
+        if isinstance(sample["audio"], dict) and "path" in sample["audio"]:
+            audio_path = sample["audio"]["path"]
+        elif isinstance(sample["audio"], dict) and "array" in sample["audio"]:
+            # Save array temporarily
+            import soundfile as sf
+            audio_path = "/tmp/random_audio.wav"
+            sf.write(audio_path, sample["audio"]["array"], sample["audio"]["sampling_rate"])
+    # Pick random image from HF dataset
+    if random_image and len(image_ds) > 0:
+        sample = random.choice(image_ds)
+        # Handle image bytes
+        img_bytes = sample["image"] if isinstance(sample["image"], bytes) else sample["image"].tobytes()
+        image = Image.open(io.BytesIO(img_bytes)).convert("RGBA")
     # If spectrogram image
     if image is not None:
     title="General Audio Classifier (Audio + Spectrogram Support)",
     description=(
         "Upload a raw audio file OR a spectrogram image.\n"
+        "You can also select random samples from your Hugging Face datasets.\n"
         "The output shows a JSON with all details and a separate field for the final label."
     ),
 )