AIOmarRehan commited on
Commit
600df41
·
verified ·
1 Parent(s): 08a57d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -33
app.py CHANGED
@@ -1,20 +1,35 @@
1
  import gradio as gr
2
  import numpy as np
3
  from PIL import Image
4
- from app.preprocess import preprocess_audio
5
- from app.model import predict
6
- from collections import Counter, defaultdict
7
  import librosa
 
 
 
8
  import random
9
- from datasets import load_dataset
10
-
11
- # Load Hugging Face datasets
12
- audio_ds = load_dataset("AIOmarRehan/General_Audio_Dataset")
13
- image_ds = load_dataset("AIOmarRehan/Mel_Spectrogram_Images_for_Audio_Classification")
14
-
15
-
16
- # IMAGE HANDLING
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def safe_load_image(img):
 
18
  if img is None:
19
  return None
20
  if isinstance(img, np.ndarray):
@@ -22,17 +37,13 @@ def safe_load_image(img):
22
  img = img.convert("RGBA")
23
  return img
24
 
25
-
26
- # PROCESS SPECTROGRAM IMAGE
27
  def process_image_input(img):
28
  img = safe_load_image(img)
29
  label, confidence, probs = predict(img)
30
  return label, round(confidence, 3), probs
31
 
32
-
33
- # PROCESS RAW AUDIO
34
  def process_audio_input(audio_path):
35
- imgs = preprocess_audio(audio_path)
36
  all_preds, all_confs, all_probs = [], [], []
37
 
38
  for img in imgs:
@@ -59,19 +70,16 @@ def process_audio_input(audio_path):
59
 
60
  return final_label, round(final_conf, 3), all_preds, [round(c, 3) for c in all_confs]
61
 
 
 
 
 
 
62
 
63
- # MAIN CLASSIFIER
64
- def classify(audio_path, image, random_audio, random_image):
65
- # Load random audio if selected
66
- if random_audio:
67
- rand_sample = random.choice(audio_ds)
68
- audio_path = rand_sample["audio"]["path"]
69
-
70
- # Load random image if selected
71
- if random_image:
72
- rand_sample = random.choice(image_ds)
73
- img_bytes = rand_sample["image"]
74
- image = Image.open(img_bytes).convert("RGBA")
75
 
76
  # If spectrogram image
77
  if image is not None:
@@ -80,7 +88,7 @@ def classify(audio_path, image, random_audio, random_image):
80
  "Final Label": label,
81
  "Confidence": conf,
82
  "Details": probs
83
- }, label
84
 
85
  # If raw audio
86
  if audio_path is not None:
@@ -90,12 +98,11 @@ def classify(audio_path, image, random_audio, random_image):
90
  "Confidence": conf,
91
  "All Chunk Labels": all_preds,
92
  "All Chunk Confidences": all_confs
93
- }, label
94
 
95
  return "Please upload an audio file OR a spectrogram image.", ""
96
 
97
-
98
- # GRADIO UI
99
  interface = gr.Interface(
100
  fn=classify,
101
  inputs=[
@@ -111,7 +118,7 @@ interface = gr.Interface(
111
  title="General Audio Classifier (Audio + Spectrogram Support)",
112
  description=(
113
  "Upload a raw audio file OR a spectrogram image.\n"
114
- "You can also select random samples from the Hugging Face datasets.\n"
115
  "The output shows a JSON with all details and a separate field for the final label."
116
  ),
117
  )
 
1
  import gradio as gr
2
  import numpy as np
3
  from PIL import Image
 
 
 
4
  import librosa
5
+ import matplotlib.pyplot as plt
6
+ import io
7
+ import os
8
  import random
9
+ from collections import Counter, defaultdict
10
+ from app.model import predict
11
+ from app.preprocess import preprocess_audio
12
+
13
+ # Dataset Paths (download manually from Hugging Face)
14
+ AUDIO_DATASET_DIR = "General_Audio_Dataset"
15
+ IMAGE_DATASET_DIR = "Mel_Spectrogram_Images_for_Audio_Classification"
16
+
17
+ # Get file lists
18
+ audio_files = [
19
+ os.path.join(AUDIO_DATASET_DIR, f)
20
+ for f in os.listdir(AUDIO_DATASET_DIR)
21
+ if f.lower().endswith((".wav", ".mp3"))
22
+ ]
23
+
24
+ image_files = [
25
+ os.path.join(IMAGE_DATASET_DIR, f)
26
+ for f in os.listdir(IMAGE_DATASET_DIR)
27
+ if f.lower().endswith(".png")
28
+ ]
29
+
30
+ # Helper functions
31
  def safe_load_image(img):
32
+ """Ensure input is PIL RGBA image"""
33
  if img is None:
34
  return None
35
  if isinstance(img, np.ndarray):
 
37
  img = img.convert("RGBA")
38
  return img
39
 
 
 
40
  def process_image_input(img):
41
  img = safe_load_image(img)
42
  label, confidence, probs = predict(img)
43
  return label, round(confidence, 3), probs
44
 
 
 
45
  def process_audio_input(audio_path):
46
+ imgs = preprocess_audio(audio_path) # returns list of PIL RGBA images
47
  all_preds, all_confs, all_probs = [], [], []
48
 
49
  for img in imgs:
 
70
 
71
  return final_label, round(final_conf, 3), all_preds, [round(c, 3) for c in all_confs]
72
 
73
+ # Main classifier
74
+ def classify(audio_path, image, random_audio=False, random_image=False):
75
+ # Pick random audio
76
+ if random_audio and audio_files:
77
+ audio_path = random.choice(audio_files)
78
 
79
+ # Pick random image
80
+ if random_image and image_files:
81
+ img_path = random.choice(image_files)
82
+ image = Image.open(img_path).convert("RGBA")
 
 
 
 
 
 
 
 
83
 
84
  # If spectrogram image
85
  if image is not None:
 
88
  "Final Label": label,
89
  "Confidence": conf,
90
  "Details": probs
91
+ }, label
92
 
93
  # If raw audio
94
  if audio_path is not None:
 
98
  "Confidence": conf,
99
  "All Chunk Labels": all_preds,
100
  "All Chunk Confidences": all_confs
101
+ }, label
102
 
103
  return "Please upload an audio file OR a spectrogram image.", ""
104
 
105
+ # Gradio Interface
 
106
  interface = gr.Interface(
107
  fn=classify,
108
  inputs=[
 
118
  title="General Audio Classifier (Audio + Spectrogram Support)",
119
  description=(
120
  "Upload a raw audio file OR a spectrogram image.\n"
121
+ "You can also select random samples from the local datasets.\n"
122
  "The output shows a JSON with all details and a separate field for the final label."
123
  ),
124
  )