AIOmarRehan commited on
Commit
6fa015b
Β·
verified Β·
1 Parent(s): 17d28b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -28
app.py CHANGED
@@ -5,22 +5,20 @@ from app.preprocess import preprocess_audio
5
  from app.model import predict
6
  from collections import Counter, defaultdict
7
  import librosa
 
 
 
 
 
 
8
 
9
 
10
  # IMAGE HANDLING
11
  def safe_load_image(img):
12
- """
13
- Ensure the input is a valid PIL RGBA image.
14
- Gradio sometimes gives numpy arrays β†’ we convert safely.
15
- """
16
  if img is None:
17
  return None
18
-
19
- # If numpy array β†’ convert to PIL
20
  if isinstance(img, np.ndarray):
21
  img = Image.fromarray(img)
22
-
23
- # Convert to RGBA, to make sure the Alpha channel keep
24
  img = img.convert("RGBA")
25
  return img
26
 
@@ -34,12 +32,8 @@ def process_image_input(img):
34
 
35
  # PROCESS RAW AUDIO
36
  def process_audio_input(audio_path):
37
-
38
- imgs = preprocess_audio(audio_path) # returns list of PIL RGBA images
39
-
40
- all_preds = []
41
- all_confs = []
42
- all_probs = []
43
 
44
  for img in imgs:
45
  label, conf, probs = predict(img)
@@ -56,20 +50,28 @@ def process_audio_input(audio_path):
56
  final_label = candidates[0]
57
  else:
58
  conf_sums = defaultdict(float)
59
- for i, label in enumerate(all_preds):
60
- if label in candidates:
61
- conf_sums[label] += all_confs[i]
62
  final_label = max(conf_sums, key=conf_sums.get)
63
 
64
- final_conf = float(
65
- np.mean([all_confs[i] for i, lbl in enumerate(all_preds) if lbl == final_label])
66
- )
67
 
68
  return final_label, round(final_conf, 3), all_preds, [round(c, 3) for c in all_confs]
69
 
70
 
71
  # MAIN CLASSIFIER
72
- def classify(audio_path, image):
 
 
 
 
 
 
 
 
 
 
73
 
74
  # If spectrogram image
75
  if image is not None:
@@ -78,7 +80,7 @@ def classify(audio_path, image):
78
  "Final Label": label,
79
  "Confidence": conf,
80
  "Details": probs
81
- }
82
 
83
  # If raw audio
84
  if audio_path is not None:
@@ -88,9 +90,9 @@ def classify(audio_path, image):
88
  "Confidence": conf,
89
  "All Chunk Labels": all_preds,
90
  "All Chunk Confidences": all_confs
91
- }
92
 
93
- return "Please upload an audio file OR a spectrogram image."
94
 
95
 
96
  # GRADIO UI
@@ -98,14 +100,19 @@ interface = gr.Interface(
98
  fn=classify,
99
  inputs=[
100
  gr.Audio(type="filepath", label="Upload Audio (WAV/MP3)"),
101
- gr.Image(type="pil", label="Upload Spectrogram Image (PNG RGBA Supported)")
 
 
 
 
 
 
102
  ],
103
- outputs=gr.JSON(label="Prediction Results"),
104
  title="General Audio Classifier (Audio + Spectrogram Support)",
105
  description=(
106
  "Upload a raw audio file OR a spectrogram image.\n"
107
- "If audio β†’ model preprocesses into mel-spectrogram chunks.\n"
108
- "If image β†’ model classifies the spectrogram directly.\n"
109
  ),
110
  )
111
 
 
5
  from app.model import predict
6
  from collections import Counter, defaultdict
7
  import librosa
8
+ import random
9
+ from datasets import load_dataset
10
+
11
+ # Load Hugging Face datasets
12
+ audio_ds = load_dataset("AIOmarRehan/General_Audio_Dataset", split="train")
13
+ image_ds = load_dataset("AIOmarRehan/Mel_Spectrogram_Images_for_Audio_Classification", split="train")
14
 
15
 
16
  # IMAGE HANDLING
17
  def safe_load_image(img):
 
 
 
 
18
  if img is None:
19
  return None
 
 
20
  if isinstance(img, np.ndarray):
21
  img = Image.fromarray(img)
 
 
22
  img = img.convert("RGBA")
23
  return img
24
 
 
32
 
33
  # PROCESS RAW AUDIO
34
  def process_audio_input(audio_path):
35
+ imgs = preprocess_audio(audio_path)
36
+ all_preds, all_confs, all_probs = [], [], []
 
 
 
 
37
 
38
  for img in imgs:
39
  label, conf, probs = predict(img)
 
50
  final_label = candidates[0]
51
  else:
52
  conf_sums = defaultdict(float)
53
+ for i, lbl in enumerate(all_preds):
54
+ if lbl in candidates:
55
+ conf_sums[lbl] += all_confs[i]
56
  final_label = max(conf_sums, key=conf_sums.get)
57
 
58
+ final_conf = float(np.mean([all_confs[i] for i, lbl in enumerate(all_preds) if lbl == final_label]))
 
 
59
 
60
  return final_label, round(final_conf, 3), all_preds, [round(c, 3) for c in all_confs]
61
 
62
 
63
  # MAIN CLASSIFIER
64
+ def classify(audio_path, image, random_audio, random_image):
65
+ # Load random audio if selected
66
+ if random_audio:
67
+ rand_sample = random.choice(audio_ds)
68
+ audio_path = rand_sample["audio"]["path"]
69
+
70
+ # Load random image if selected
71
+ if random_image:
72
+ rand_sample = random.choice(image_ds)
73
+ img_bytes = rand_sample["image"]
74
+ image = Image.open(img_bytes).convert("RGBA")
75
 
76
  # If spectrogram image
77
  if image is not None:
 
80
  "Final Label": label,
81
  "Confidence": conf,
82
  "Details": probs
83
+ }, label
84
 
85
  # If raw audio
86
  if audio_path is not None:
 
90
  "Confidence": conf,
91
  "All Chunk Labels": all_preds,
92
  "All Chunk Confidences": all_confs
93
+ }, label
94
 
95
+ return "Please upload an audio file OR a spectrogram image.", ""
96
 
97
 
98
  # GRADIO UI
 
100
  fn=classify,
101
  inputs=[
102
  gr.Audio(type="filepath", label="Upload Audio (WAV/MP3)"),
103
+ gr.Image(type="pil", label="Upload Spectrogram Image (PNG RGBA Supported)"),
104
+ gr.Checkbox(label="Pick Random Audio from Dataset"),
105
+ gr.Checkbox(label="Pick Random Image from Dataset"),
106
+ ],
107
+ outputs=[
108
+ gr.JSON(label="Prediction Results"),
109
+ gr.Textbox(label="Final Label", interactive=False)
110
  ],
 
111
  title="General Audio Classifier (Audio + Spectrogram Support)",
112
  description=(
113
  "Upload a raw audio file OR a spectrogram image.\n"
114
+ "You can also select random samples from the Hugging Face datasets.\n"
115
+ "The output shows a JSON with all details and a separate field for the final label."
116
  ),
117
  )
118