Spaces:

AIOmarRehan
/

Deep_Audio_Classifier_using_CNN

Sleeping

App Files Files Community

AIOmarRehan commited on Nov 20, 2025

Commit

904154d

verified ·

1 Parent(s): 20843b4

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -23

app.py CHANGED Viewed

@@ -11,25 +11,19 @@ from collections import Counter, defaultdict
 # Process Image Input
 def process_image_input(img):
-    """Classify a spectrogram image directly using model.predict"""
     label, confidence, probs = predict(img)
     return label, round(confidence, 3), probs
 # Process Audio Input
-def process_audio_input(audio_file):
-    # Save uploaded audio temporarily
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
-        tmp.write(audio_file)
-        tmp_path = tmp.name
-    # Preprocess → mel-spectrogram chunks (list of PIL images)
-    imgs = preprocess_audio(tmp_path)
-    os.remove(tmp_path)
-    # Predict on each chunk
     all_preds = []
     all_confs = []
     all_probs = []
@@ -59,10 +53,10 @@ def process_audio_input(audio_file):
     return final_label, round(final_conf, 3), all_preds, [round(c, 3) for c in all_confs]
-#  MAIN GRADIO CLASSIFICATION PIPELINE (AUDIO OR IMAGE)
-def classify(audio, image):
-    # If image is provided → classify image
     if image is not None:
         label, conf, probs = process_image_input(image)
         return {
@@ -71,9 +65,9 @@ def classify(audio, image):
             "Details": probs
         }
-    # If audio is provided → preprocess audio → classify
-    if audio is not None:
-        label, conf, all_preds, all_confs = process_audio_input(audio)
         return {
             "Final Label": label,
@@ -82,7 +76,7 @@ def classify(audio, image):
             "All Chunk Confidences": all_confs
         }
-    # Nothing provided
     return "Please upload an audio file OR a spectrogram image."
@@ -90,16 +84,15 @@ def classify(audio, image):
 interface = gr.Interface(
     fn=classify,
     inputs=[
-        gr.Audio(type="bytes", label="Upload Audio (WAV/MP3)"),
         gr.Image(type="pil", label="Upload Spectrogram Image")
     ],
     outputs=gr.JSON(label="Prediction Results"),
     title="General Audio Classifier (Audio + Spectrogram Support)",
     description=(
         "Upload a raw audio file OR a spectrogram image.\n"
-        "The app automatically detects the input type:\n"
-        "• If audio → the model preprocesses it into mel spectrogram chunks.\n"
-        "• If spectrogram → the model classifies it directly.\n"
         "Built using CNN + Mel-Spectrogram + Gradio."
     ),
 )

 # Process Image Input
 def process_image_input(img):
+    # Classify a spectrogram image directly using model.predict
     label, confidence, probs = predict(img)
     return label, round(confidence, 3), probs
 # Process Audio Input
+def process_audio_input(audio_path):
+    # audio_path = filepath from Gradio
+    # Preprocess → mel-spectrogram → predict per chunk
+    # Preprocess to mel-spectrogram chunk images
+    imgs = preprocess_audio(audio_path)
     all_preds = []
     all_confs = []
     all_probs = []
     return final_label, round(final_conf, 3), all_preds, [round(c, 3) for c in all_confs]
+# Main prediction logic
+def classify(audio_path, image):
+    # If an image is provided → classify directly
     if image is not None:
         label, conf, probs = process_image_input(image)
         return {
             "Details": probs
         }
+    # If an audio file is provided → preprocess and classify
+    if audio_path is not None:
+        label, conf, all_preds, all_confs = process_audio_input(audio_path)
         return {
             "Final Label": label,
             "All Chunk Confidences": all_confs
         }
+    # Neither provided
     return "Please upload an audio file OR a spectrogram image."
 interface = gr.Interface(
     fn=classify,
     inputs=[
+        gr.Audio(type="filepath", label="Upload Audio (WAV/MP3)"),
         gr.Image(type="pil", label="Upload Spectrogram Image")
     ],
     outputs=gr.JSON(label="Prediction Results"),
     title="General Audio Classifier (Audio + Spectrogram Support)",
     description=(
         "Upload a raw audio file OR a spectrogram image.\n"
+        "If audio → model preprocesses into mel-spectrogram chunks.\n"
+        "If image → model classifies the spectrogram directly.\n"
         "Built using CNN + Mel-Spectrogram + Gradio."
     ),
 )