AIOmarRehan commited on
Commit
7651694
Β·
verified Β·
1 Parent(s): 05b56c7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -0
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import librosa
4
+ from PIL import Image
5
+ import tempfile
6
+ import os
7
+ from app.preprocess import preprocess_audio
8
+ from app.model import predict
9
+ from collections import Counter, defaultdict
10
+
11
+
12
+ # Process Image Input
13
+ def process_image_input(img):
14
+ """Classify a spectrogram image directly using model.predict"""
15
+ label, confidence, probs = predict(img)
16
+ return label, round(confidence, 3), probs
17
+
18
+
19
+ # Process Audio Input
20
+ def process_audio_input(audio_file):
21
+
22
+ # Save uploaded audio temporarily
23
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
24
+ tmp.write(audio_file)
25
+ tmp_path = tmp.name
26
+
27
+ # Preprocess β†’ mel-spectrogram chunks (list of PIL images)
28
+ imgs = preprocess_audio(tmp_path)
29
+
30
+ os.remove(tmp_path)
31
+
32
+ # Predict on each chunk
33
+ all_preds = []
34
+ all_confs = []
35
+ all_probs = []
36
+
37
+ for img in imgs:
38
+ label, conf, probs = predict(img)
39
+ all_preds.append(label)
40
+ all_confs.append(conf)
41
+ all_probs.append(probs)
42
+
43
+ # Majority Vote
44
+ counter = Counter(all_preds)
45
+ max_count = max(counter.values())
46
+ candidates = [k for k, v in counter.items() if v == max_count]
47
+
48
+ if len(candidates) == 1:
49
+ final_label = candidates[0]
50
+ else:
51
+ conf_sums = defaultdict(float)
52
+ for i, label in enumerate(all_preds):
53
+ if label in candidates:
54
+ conf_sums[label] += all_confs[i]
55
+ final_label = max(conf_sums, key=conf_sums.get)
56
+
57
+ final_conf = float(np.mean([all_confs[i] for i, l in enumerate(all_preds) if l == final_label]))
58
+
59
+ return final_label, round(final_conf, 3), all_preds, [round(c, 3) for c in all_confs]
60
+
61
+
62
+ # MAIN GRADIO CLASSIFICATION PIPELINE (AUDIO OR IMAGE)
63
+ def classify(audio, image):
64
+
65
+ # If image is provided β†’ classify image
66
+ if image is not None:
67
+ label, conf, probs = process_image_input(image)
68
+ return {
69
+ "Final Label": label,
70
+ "Confidence": conf,
71
+ "Details": probs
72
+ }
73
+
74
+ # If audio is provided β†’ preprocess audio β†’ classify
75
+ if audio is not None:
76
+ label, conf, all_preds, all_confs = process_audio_input(audio)
77
+
78
+ return {
79
+ "Final Label": label,
80
+ "Confidence": conf,
81
+ "All Chunk Labels": all_preds,
82
+ "All Chunk Confidences": all_confs
83
+ }
84
+
85
+ # Nothing provided
86
+ return "Please upload an audio file OR a spectrogram image."
87
+
88
+
89
+ # GRADIO UI
90
+ interface = gr.Interface(
91
+ fn=classify,
92
+ inputs=[
93
+ gr.Audio(type="bytes", label="Upload Audio (WAV/MP3)"),
94
+ gr.Image(type="pil", label="Upload Spectrogram Image")
95
+ ],
96
+ outputs=gr.JSON(label="Prediction Results"),
97
+ title="General Audio Classifier (Audio + Spectrogram Support)",
98
+ description=(
99
+ "Upload a raw audio file OR a spectrogram image.\n"
100
+ "The app automatically detects the input type:\n"
101
+ "β€’ If audio β†’ the model preprocesses it into mel spectrogram chunks.\n"
102
+ "β€’ If spectrogram β†’ the model classifies it directly.\n"
103
+ "Built using CNN + Mel-Spectrogram + Gradio."
104
+ ),
105
+ )
106
+
107
+ interface.launch()