AIOmarRehan commited on
Commit
e0d79c7
·
verified ·
1 Parent(s): d8c58cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -28
app.py CHANGED
@@ -1,29 +1,18 @@
1
  import gradio as gr
2
  import numpy as np
3
  from PIL import Image
4
- import os
5
  import random
 
6
  from collections import Counter, defaultdict
 
7
  from app.model import predict
8
  from app.preprocess import preprocess_audio
9
 
10
- # Dataset Paths (download manually from Hugging Face or put in space files)
11
- AUDIO_DATASET_DIR = "General_Audio_Dataset"
12
- IMAGE_DATASET_DIR = "Mel_Spectrogram_Images_for_Audio_Classification"
13
-
14
- # Get file lists safely
15
- audio_files = [
16
- os.path.join(AUDIO_DATASET_DIR, f)
17
- for f in os.listdir(AUDIO_DATASET_DIR)
18
- if f.lower().endswith((".wav", ".mp3"))
19
- ] if os.path.exists(AUDIO_DATASET_DIR) else []
20
-
21
- image_files = [
22
- os.path.join(IMAGE_DATASET_DIR, f)
23
- for f in os.listdir(IMAGE_DATASET_DIR)
24
- if f.lower().endswith(".png")
25
- ] if os.path.exists(IMAGE_DATASET_DIR) else []
26
 
 
27
  def safe_load_image(img):
28
  if img is None:
29
  return None
@@ -32,13 +21,13 @@ def safe_load_image(img):
32
  img = img.convert("RGBA")
33
  return img
34
 
35
- # Process image
36
  def process_image_input(img):
37
  img = safe_load_image(img)
38
  label, confidence, probs = predict(img)
39
  return label, round(confidence, 3), probs
40
 
41
- # Process audio
42
  def process_audio_input(audio_path):
43
  imgs = preprocess_audio(audio_path)
44
  all_preds, all_confs, all_probs = [], [], []
@@ -68,14 +57,24 @@ def process_audio_input(audio_path):
68
 
69
  # Main classifier
70
  def classify(audio_path, image, random_audio=False, random_image=False):
71
- # Pick random audio if selected
72
- if random_audio and audio_files:
73
- audio_path = random.choice(audio_files)
74
-
75
- # Pick random image if selected
76
- if random_image and image_files:
77
- img_path = random.choice(image_files)
78
- image = Image.open(img_path).convert("RGBA")
 
 
 
 
 
 
 
 
 
 
79
 
80
  # If spectrogram image
81
  if image is not None:
@@ -114,7 +113,7 @@ interface = gr.Interface(
114
  title="General Audio Classifier (Audio + Spectrogram Support)",
115
  description=(
116
  "Upload a raw audio file OR a spectrogram image.\n"
117
- "You can also select random samples from the local datasets.\n"
118
  "The output shows a JSON with all details and a separate field for the final label."
119
  ),
120
  )
 
1
  import gradio as gr
2
  import numpy as np
3
  from PIL import Image
 
4
  import random
5
+ import io
6
  from collections import Counter, defaultdict
7
+ from datasets import load_dataset
8
  from app.model import predict
9
  from app.preprocess import preprocess_audio
10
 
11
+ # Load Hugging Face datasets directly
12
+ audio_ds = load_dataset("AIOmarRehan/General_Audio_Dataset", split="train")
13
+ image_ds = load_dataset("AIOmarRehan/Mel_Spectrogram_Images_for_Audio_Classification", split="train")
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Helper function to safely load images
16
  def safe_load_image(img):
17
  if img is None:
18
  return None
 
21
  img = img.convert("RGBA")
22
  return img
23
 
24
+ # Process spectrogram image
25
  def process_image_input(img):
26
  img = safe_load_image(img)
27
  label, confidence, probs = predict(img)
28
  return label, round(confidence, 3), probs
29
 
30
+ # Process raw audio
31
  def process_audio_input(audio_path):
32
  imgs = preprocess_audio(audio_path)
33
  all_preds, all_confs, all_probs = [], [], []
 
57
 
58
  # Main classifier
59
  def classify(audio_path, image, random_audio=False, random_image=False):
60
+ # Pick random audio from HF dataset
61
+ if random_audio and len(audio_ds) > 0:
62
+ sample = random.choice(audio_ds)
63
+ # If dataset stores audio as file path or array
64
+ if isinstance(sample["audio"], dict) and "path" in sample["audio"]:
65
+ audio_path = sample["audio"]["path"]
66
+ elif isinstance(sample["audio"], dict) and "array" in sample["audio"]:
67
+ # Save array temporarily
68
+ import soundfile as sf
69
+ audio_path = "/tmp/random_audio.wav"
70
+ sf.write(audio_path, sample["audio"]["array"], sample["audio"]["sampling_rate"])
71
+
72
+ # Pick random image from HF dataset
73
+ if random_image and len(image_ds) > 0:
74
+ sample = random.choice(image_ds)
75
+ # Handle image bytes
76
+ img_bytes = sample["image"] if isinstance(sample["image"], bytes) else sample["image"].tobytes()
77
+ image = Image.open(io.BytesIO(img_bytes)).convert("RGBA")
78
 
79
  # If spectrogram image
80
  if image is not None:
 
113
  title="General Audio Classifier (Audio + Spectrogram Support)",
114
  description=(
115
  "Upload a raw audio file OR a spectrogram image.\n"
116
+ "You can also select random samples from your Hugging Face datasets.\n"
117
  "The output shows a JSON with all details and a separate field for the final label."
118
  ),
119
  )