AIOmarRehan commited on
Commit
d8c58cd
·
verified ·
1 Parent(s): 600df41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -15
app.py CHANGED
@@ -1,35 +1,30 @@
1
  import gradio as gr
2
  import numpy as np
3
  from PIL import Image
4
- import librosa
5
- import matplotlib.pyplot as plt
6
- import io
7
  import os
8
  import random
9
  from collections import Counter, defaultdict
10
- from app.model import predict
11
- from app.preprocess import preprocess_audio
12
 
13
- # Dataset Paths (download manually from Hugging Face)
14
  AUDIO_DATASET_DIR = "General_Audio_Dataset"
15
  IMAGE_DATASET_DIR = "Mel_Spectrogram_Images_for_Audio_Classification"
16
 
17
- # Get file lists
18
  audio_files = [
19
  os.path.join(AUDIO_DATASET_DIR, f)
20
  for f in os.listdir(AUDIO_DATASET_DIR)
21
  if f.lower().endswith((".wav", ".mp3"))
22
- ]
23
 
24
  image_files = [
25
  os.path.join(IMAGE_DATASET_DIR, f)
26
  for f in os.listdir(IMAGE_DATASET_DIR)
27
  if f.lower().endswith(".png")
28
- ]
29
 
30
- # Helper functions
31
  def safe_load_image(img):
32
- """Ensure input is PIL RGBA image"""
33
  if img is None:
34
  return None
35
  if isinstance(img, np.ndarray):
@@ -37,13 +32,15 @@ def safe_load_image(img):
37
  img = img.convert("RGBA")
38
  return img
39
 
 
40
  def process_image_input(img):
41
  img = safe_load_image(img)
42
  label, confidence, probs = predict(img)
43
  return label, round(confidence, 3), probs
44
 
 
45
  def process_audio_input(audio_path):
46
- imgs = preprocess_audio(audio_path) # returns list of PIL RGBA images
47
  all_preds, all_confs, all_probs = [], [], []
48
 
49
  for img in imgs:
@@ -67,16 +64,15 @@ def process_audio_input(audio_path):
67
  final_label = max(conf_sums, key=conf_sums.get)
68
 
69
  final_conf = float(np.mean([all_confs[i] for i, lbl in enumerate(all_preds) if lbl == final_label]))
70
-
71
  return final_label, round(final_conf, 3), all_preds, [round(c, 3) for c in all_confs]
72
 
73
  # Main classifier
74
  def classify(audio_path, image, random_audio=False, random_image=False):
75
- # Pick random audio
76
  if random_audio and audio_files:
77
  audio_path = random.choice(audio_files)
78
 
79
- # Pick random image
80
  if random_image and image_files:
81
  img_path = random.choice(image_files)
82
  image = Image.open(img_path).convert("RGBA")
 
1
  import gradio as gr
2
  import numpy as np
3
  from PIL import Image
 
 
 
4
  import os
5
  import random
6
  from collections import Counter, defaultdict
7
+ from app.model import predict
8
+ from app.preprocess import preprocess_audio
9
 
10
+ # Dataset Paths (download manually from Hugging Face or put in space files)
11
  AUDIO_DATASET_DIR = "General_Audio_Dataset"
12
  IMAGE_DATASET_DIR = "Mel_Spectrogram_Images_for_Audio_Classification"
13
 
14
+ # Get file lists safely
15
  audio_files = [
16
  os.path.join(AUDIO_DATASET_DIR, f)
17
  for f in os.listdir(AUDIO_DATASET_DIR)
18
  if f.lower().endswith((".wav", ".mp3"))
19
+ ] if os.path.exists(AUDIO_DATASET_DIR) else []
20
 
21
  image_files = [
22
  os.path.join(IMAGE_DATASET_DIR, f)
23
  for f in os.listdir(IMAGE_DATASET_DIR)
24
  if f.lower().endswith(".png")
25
+ ] if os.path.exists(IMAGE_DATASET_DIR) else []
26
 
 
27
  def safe_load_image(img):
 
28
  if img is None:
29
  return None
30
  if isinstance(img, np.ndarray):
 
32
  img = img.convert("RGBA")
33
  return img
34
 
35
+ # Process image
36
  def process_image_input(img):
37
  img = safe_load_image(img)
38
  label, confidence, probs = predict(img)
39
  return label, round(confidence, 3), probs
40
 
41
+ # Process audio
42
  def process_audio_input(audio_path):
43
+ imgs = preprocess_audio(audio_path)
44
  all_preds, all_confs, all_probs = [], [], []
45
 
46
  for img in imgs:
 
64
  final_label = max(conf_sums, key=conf_sums.get)
65
 
66
  final_conf = float(np.mean([all_confs[i] for i, lbl in enumerate(all_preds) if lbl == final_label]))
 
67
  return final_label, round(final_conf, 3), all_preds, [round(c, 3) for c in all_confs]
68
 
69
  # Main classifier
70
  def classify(audio_path, image, random_audio=False, random_image=False):
71
+ # Pick random audio if selected
72
  if random_audio and audio_files:
73
  audio_path = random.choice(audio_files)
74
 
75
+ # Pick random image if selected
76
  if random_image and image_files:
77
  img_path = random.choice(image_files)
78
  image = Image.open(img_path).convert("RGBA")