AIOmarRehan commited on
Commit
904154d
Β·
verified Β·
1 Parent(s): 20843b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -23
app.py CHANGED
@@ -11,25 +11,19 @@ from collections import Counter, defaultdict
11
 
12
  # Process Image Input
13
  def process_image_input(img):
14
- """Classify a spectrogram image directly using model.predict"""
15
  label, confidence, probs = predict(img)
16
  return label, round(confidence, 3), probs
17
 
18
 
19
  # Process Audio Input
20
- def process_audio_input(audio_file):
 
 
21
 
22
- # Save uploaded audio temporarily
23
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
24
- tmp.write(audio_file)
25
- tmp_path = tmp.name
26
 
27
- # Preprocess β†’ mel-spectrogram chunks (list of PIL images)
28
- imgs = preprocess_audio(tmp_path)
29
-
30
- os.remove(tmp_path)
31
-
32
- # Predict on each chunk
33
  all_preds = []
34
  all_confs = []
35
  all_probs = []
@@ -59,10 +53,10 @@ def process_audio_input(audio_file):
59
  return final_label, round(final_conf, 3), all_preds, [round(c, 3) for c in all_confs]
60
 
61
 
62
- # MAIN GRADIO CLASSIFICATION PIPELINE (AUDIO OR IMAGE)
63
- def classify(audio, image):
64
 
65
- # If image is provided β†’ classify image
66
  if image is not None:
67
  label, conf, probs = process_image_input(image)
68
  return {
@@ -71,9 +65,9 @@ def classify(audio, image):
71
  "Details": probs
72
  }
73
 
74
- # If audio is provided β†’ preprocess audio β†’ classify
75
- if audio is not None:
76
- label, conf, all_preds, all_confs = process_audio_input(audio)
77
 
78
  return {
79
  "Final Label": label,
@@ -82,7 +76,7 @@ def classify(audio, image):
82
  "All Chunk Confidences": all_confs
83
  }
84
 
85
- # Nothing provided
86
  return "Please upload an audio file OR a spectrogram image."
87
 
88
 
@@ -90,16 +84,15 @@ def classify(audio, image):
90
  interface = gr.Interface(
91
  fn=classify,
92
  inputs=[
93
- gr.Audio(type="bytes", label="Upload Audio (WAV/MP3)"),
94
  gr.Image(type="pil", label="Upload Spectrogram Image")
95
  ],
96
  outputs=gr.JSON(label="Prediction Results"),
97
  title="General Audio Classifier (Audio + Spectrogram Support)",
98
  description=(
99
  "Upload a raw audio file OR a spectrogram image.\n"
100
- "The app automatically detects the input type:\n"
101
- "β€’ If audio β†’ the model preprocesses it into mel spectrogram chunks.\n"
102
- "β€’ If spectrogram β†’ the model classifies it directly.\n"
103
  "Built using CNN + Mel-Spectrogram + Gradio."
104
  ),
105
  )
 
11
 
12
  # Process Image Input
13
  def process_image_input(img):
14
+ # Classify a spectrogram image directly using model.predict
15
  label, confidence, probs = predict(img)
16
  return label, round(confidence, 3), probs
17
 
18
 
19
  # Process Audio Input
20
+ def process_audio_input(audio_path):
21
+ # audio_path = filepath from Gradio
22
+ # Preprocess β†’ mel-spectrogram β†’ predict per chunk
23
 
24
+ # Preprocess to mel-spectrogram chunk images
25
+ imgs = preprocess_audio(audio_path)
 
 
26
 
 
 
 
 
 
 
27
  all_preds = []
28
  all_confs = []
29
  all_probs = []
 
53
  return final_label, round(final_conf, 3), all_preds, [round(c, 3) for c in all_confs]
54
 
55
 
56
+ # Main prediction logic
57
+ def classify(audio_path, image):
58
 
59
+ # If an image is provided β†’ classify directly
60
  if image is not None:
61
  label, conf, probs = process_image_input(image)
62
  return {
 
65
  "Details": probs
66
  }
67
 
68
+ # If an audio file is provided β†’ preprocess and classify
69
+ if audio_path is not None:
70
+ label, conf, all_preds, all_confs = process_audio_input(audio_path)
71
 
72
  return {
73
  "Final Label": label,
 
76
  "All Chunk Confidences": all_confs
77
  }
78
 
79
+ # Neither provided
80
  return "Please upload an audio file OR a spectrogram image."
81
 
82
 
 
84
  interface = gr.Interface(
85
  fn=classify,
86
  inputs=[
87
+ gr.Audio(type="filepath", label="Upload Audio (WAV/MP3)"),
88
  gr.Image(type="pil", label="Upload Spectrogram Image")
89
  ],
90
  outputs=gr.JSON(label="Prediction Results"),
91
  title="General Audio Classifier (Audio + Spectrogram Support)",
92
  description=(
93
  "Upload a raw audio file OR a spectrogram image.\n"
94
+ "If audio β†’ model preprocesses into mel-spectrogram chunks.\n"
95
+ "If image β†’ model classifies the spectrogram directly.\n"
 
96
  "Built using CNN + Mel-Spectrogram + Gradio."
97
  ),
98
  )