Aynursusuz commited on
Commit
8b61ed4
·
1 Parent(s): 0676533

Ultra simple Interface version

Browse files
Files changed (1) hide show
  1. app.py +95 -191
app.py CHANGED
@@ -6,205 +6,109 @@ import io
6
  from PIL import Image
7
  from scipy.stats import kurtosis, skew
8
 
9
- ALL_METRICS = [
10
- "SNR (dB)", "RMS Energy", "Peak Level (dB)", "Crest Factor", "Dynamic Range (dB)",
11
- "Zero Crossing Rate", "Spectral Centroid (Hz)", "Spectral Rolloff (Hz)",
12
- "Spectral Bandwidth (Hz)", "Spectral Flatness", "Spectral Contrast (Mean)",
13
- "Harmonic-to-Noise Ratio (dB)", "THD (%)", "Tempo (BPM)", "Onset Strength (Mean)",
14
- "Kurtosis", "Skewness", "Fundamental Frequency (Hz)", "Loudness (approx LUFS)",
15
- "Silence Ratio", "Clipping Ratio (%)", "MFCC Mean", "MFCC Std Dev",
16
- "Chroma Mean", "Chroma Std Dev"
17
- ]
18
-
19
- def calculate_metrics(y, sr):
20
- metrics = {}
21
-
22
- # Amplitude & Energy
23
- noise_floor = np.percentile(np.abs(y), 5)
24
- signal_power = np.mean(y ** 2)
25
- noise_power = noise_floor ** 2
26
- snr = 10 * np.log10(signal_power / (noise_power + 1e-10))
27
- metrics['SNR (dB)'] = round(snr, 2)
28
-
29
- rms = np.sqrt(np.mean(y ** 2))
30
- metrics['RMS Energy'] = round(rms, 4)
31
- metrics['Peak Level (dB)'] = round(20 * np.log10(np.max(np.abs(y)) + 1e-10), 2)
32
- metrics['Crest Factor'] = round(np.max(np.abs(y)) / (rms + 1e-10), 2)
33
-
34
- dynamic_range = 20 * np.log10(np.max(np.abs(y)) / (np.min(np.abs(y[y != 0])) + 1e-10))
35
- metrics['Dynamic Range (dB)'] = round(dynamic_range, 2)
36
-
37
- # Spectral
38
- metrics['Zero Crossing Rate'] = round(np.mean(librosa.feature.zero_crossing_rate(y)), 4)
39
- metrics['Spectral Centroid (Hz)'] = round(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)[0]), 2)
40
- metrics['Spectral Rolloff (Hz)'] = round(np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)[0]), 2)
41
- metrics['Spectral Bandwidth (Hz)'] = round(np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr)[0]), 2)
42
- metrics['Spectral Flatness'] = round(np.mean(librosa.feature.spectral_flatness(y=y)[0]), 4)
43
- metrics['Spectral Contrast (Mean)'] = round(np.mean(librosa.feature.spectral_contrast(y=y, sr=sr)), 2)
44
-
45
- # Harmonic
46
- y_harmonic, y_percussive = librosa.effects.hpss(y)
47
- harmonic_power = np.mean(y_harmonic ** 2)
48
- percussive_power = np.mean(y_percussive ** 2)
49
- hnr = 10 * np.log10((harmonic_power + 1e-10) / (percussive_power + 1e-10))
50
- metrics['Harmonic-to-Noise Ratio (dB)'] = round(hnr, 2)
51
-
52
- fft = np.abs(np.fft.rfft(y))
53
- if len(fft) > 1:
54
- fundamental_idx = np.argmax(fft[1:]) + 1
55
- fundamental_power = fft[fundamental_idx] ** 2
56
- harmonic_indices = [i * fundamental_idx for i in range(2, 6) if i * fundamental_idx < len(fft)]
57
- harmonic_power = np.sum([fft[idx] ** 2 for idx in harmonic_indices])
58
- thd = np.sqrt(harmonic_power / (fundamental_power + 1e-10)) * 100
59
- metrics['THD (%)'] = round(min(thd, 100), 2)
60
- else:
61
- metrics['THD (%)'] = 0.0
62
-
63
- # Temporal
64
- try:
65
- tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
66
- metrics['Tempo (BPM)'] = round(float(tempo), 1)
67
- except:
68
- metrics['Tempo (BPM)'] = 0.0
69
-
70
- metrics['Onset Strength (Mean)'] = round(np.mean(librosa.onset.onset_strength(y=y, sr=sr)), 4)
71
-
72
- # Statistical
73
- metrics['Kurtosis'] = round(kurtosis(y), 2)
74
- metrics['Skewness'] = round(skew(y), 2)
75
 
76
- # Frequency
77
  try:
78
- f0 = librosa.yin(y, fmin=50, fmax=400, sr=sr)
79
- f0_mean = np.nanmean(f0[f0 > 0])
80
- metrics['Fundamental Frequency (Hz)'] = round(f0_mean, 2) if not np.isnan(f0_mean) else 0.0
81
- except:
82
- metrics['Fundamental Frequency (Hz)'] = 0.0
83
-
84
- # Perceptual
85
- metrics['Loudness (approx LUFS)'] = round(20 * np.log10(rms + 1e-10), 2)
86
- silence_threshold = 0.01
87
- metrics['Silence Ratio'] = round(np.sum(np.abs(y) < silence_threshold) / len(y), 4)
88
- clipping_threshold = 0.99
89
- metrics['Clipping Ratio (%)'] = round(np.sum(np.abs(y) > clipping_threshold) / len(y) * 100, 4)
90
-
91
- # MFCC & Chroma
92
- mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
93
- metrics['MFCC Mean'] = round(np.mean(mfcc), 4)
94
- metrics['MFCC Std Dev'] = round(np.std(mfcc), 4)
95
- chroma = librosa.feature.chroma_stft(y=y, sr=sr)
96
- metrics['Chroma Mean'] = round(np.mean(chroma), 4)
97
- metrics['Chroma Std Dev'] = round(np.std(chroma), 4)
98
-
99
- # Quality Score
100
- quality_score = (
101
- min(max(snr, 0), 40) / 40 * 20 +
102
- (1 - min(metrics['THD (%)'], 10) / 10) * 15 +
103
- min(max(dynamic_range, 0), 60) / 60 * 15 +
104
- min(rms, 0.7) / 0.7 * 10 +
105
- (1 - metrics['Spectral Flatness']) * 10 +
106
- (1 - min(metrics['Clipping Ratio (%)'], 100) / 100) * 15 +
107
- min(max(hnr, -10), 20) / 30 * 15
108
- )
109
- metrics['Overall Quality Score'] = round(quality_score, 1)
110
-
111
- return metrics
112
-
113
- def create_viz(y, sr, selected):
114
- show_wave = any(m in selected for m in ["SNR (dB)", "RMS Energy", "Peak Level (dB)"])
115
- show_spec = "Spectral Centroid (Hz)" in selected
116
- show_mel = "Spectral Flatness" in selected
117
- show_mfcc = "MFCC Mean" in selected
118
-
119
- plots = [show_wave, show_spec, show_mel, show_mfcc]
120
- num = sum(plots) or 2
121
-
122
- fig, axes = plt.subplots((num + 1) // 2, 2, figsize=(12, 4 * ((num + 1) // 2)))
123
- if num == 1:
124
- axes = [axes]
125
- else:
126
- axes = axes.flatten()
127
-
128
- idx = 0
129
-
130
- if show_wave:
131
- axes[idx].plot(np.linspace(0, len(y)/sr, len(y)), y, linewidth=0.5)
132
- axes[idx].set_title('Waveform')
133
- axes[idx].set_xlabel('Time (s)')
134
- axes[idx].grid(True, alpha=0.3)
135
- idx += 1
136
-
137
- if show_spec:
138
- D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
139
- librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz', ax=axes[idx])
140
- axes[idx].set_title('Spectrogram')
141
- idx += 1
142
-
143
- if show_mel:
144
- S = librosa.feature.melspectrogram(y=y, sr=sr)
145
- S_dB = librosa.power_to_db(S, ref=np.max)
146
- librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', ax=axes[idx])
147
- axes[idx].set_title('Mel Spectrogram')
148
- idx += 1
149
-
150
- if show_mfcc:
151
- mfcc = librosa.feature.mfcc(y=y, sr=sr)
152
- librosa.display.specshow(mfcc, sr=sr, x_axis='time', ax=axes[idx])
153
- axes[idx].set_title('MFCC')
154
- idx += 1
155
-
156
- for i in range(idx, len(axes)):
157
- axes[i].axis('off')
158
-
159
- plt.tight_layout()
160
- buf = io.BytesIO()
161
- plt.savefig(buf, format='png', dpi=150, bbox_inches='tight')
162
- buf.seek(0)
163
- img = Image.open(buf)
164
- plt.close()
165
- return img
166
-
167
- def analyze(audio, selected):
168
- if not audio or not selected:
169
- return None, "Please upload audio and select metrics"
170
 
171
- try:
172
- y, sr = librosa.load(audio, sr=None, mono=True)
173
  if len(y) < sr * 0.1:
174
- return None, "Audio too short"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
- all_metrics = calculate_metrics(y, sr)
177
- viz = create_viz(y, sr, selected)
178
 
179
- score = all_metrics['Overall Quality Score']
180
- status = "Excellent" if score >= 80 else "Good" if score >= 60 else "Fair" if score >= 40 else "Poor"
 
 
 
181
 
182
- output = f"## Quality Score: {score}/100 ({status})\n\n"
183
- for k, v in all_metrics.items():
184
- if k in selected or k == "Overall Quality Score":
185
- output += f"**{k}:** {v}\n"
186
 
187
- return viz, output
188
  except Exception as e:
189
  return None, f"Error: {str(e)}"
190
 
191
- with gr.Blocks(title="Audio Quality Assessment") as demo:
192
- gr.Markdown("# Audio Quality Assessment\n### Professional audio analysis")
193
-
194
- with gr.Row():
195
- with gr.Column():
196
- audio = gr.Audio(label="Upload Audio", type="filepath")
197
- metrics = gr.CheckboxGroup(
198
- choices=ALL_METRICS,
199
- value=["SNR (dB)", "RMS Energy", "THD (%)"],
200
- label="Select Metrics"
201
- )
202
- btn = gr.Button("Analyze", variant="primary")
203
-
204
- with gr.Column():
205
- img = gr.Image(label="Visualization")
206
- text = gr.Textbox(label="Metrics", lines=15)
207
-
208
- btn.click(analyze, inputs=[audio, metrics], outputs=[img, text])
209
-
210
- demo.launch(share=True)
 
 
6
  from PIL import Image
7
  from scipy.stats import kurtosis, skew
8
 
9
+ def analyze_audio(audio_file, snr, rms, peak, thd, spectral, mfcc_check):
10
+ if audio_file is None:
11
+ return None, "Please upload an audio file"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
 
13
  try:
14
+ # Load audio
15
+ y, sr = librosa.load(audio_file, sr=None, mono=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
 
 
17
  if len(y) < sr * 0.1:
18
+ return None, "Audio file too short"
19
+
20
+ # Calculate metrics
21
+ results = []
22
+
23
+ # Basic metrics
24
+ noise_floor = np.percentile(np.abs(y), 5)
25
+ signal_power = np.mean(y ** 2)
26
+ noise_power = noise_floor ** 2
27
+ snr_val = 10 * np.log10(signal_power / (noise_power + 1e-10))
28
+
29
+ rms_val = np.sqrt(np.mean(y ** 2))
30
+ peak_val = 20 * np.log10(np.max(np.abs(y)) + 1e-10)
31
+
32
+ # THD
33
+ fft = np.abs(np.fft.rfft(y))
34
+ if len(fft) > 1:
35
+ fundamental_idx = np.argmax(fft[1:]) + 1
36
+ fundamental_power = fft[fundamental_idx] ** 2
37
+ harmonic_indices = [i * fundamental_idx for i in range(2, 6) if i * fundamental_idx < len(fft)]
38
+ harmonic_power = np.sum([fft[idx] ** 2 for idx in harmonic_indices])
39
+ thd_val = np.sqrt(harmonic_power / (fundamental_power + 1e-10)) * 100
40
+ else:
41
+ thd_val = 0.0
42
+
43
+ # Spectral
44
+ spec_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)[0])
45
+
46
+ # MFCC
47
+ mfcc_feat = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
48
+ mfcc_mean = np.mean(mfcc_feat)
49
+
50
+ # Build output
51
+ output = "## Audio Quality Metrics\n\n"
52
+
53
+ if snr:
54
+ output += f"**SNR:** {snr_val:.2f} dB\n"
55
+ if rms:
56
+ output += f"**RMS Energy:** {rms_val:.4f}\n"
57
+ if peak:
58
+ output += f"**Peak Level:** {peak_val:.2f} dB\n"
59
+ if thd:
60
+ output += f"**THD:** {thd_val:.2f}%\n"
61
+ if spectral:
62
+ output += f"**Spectral Centroid:** {spec_centroid:.2f} Hz\n"
63
+ if mfcc_check:
64
+ output += f"**MFCC Mean:** {mfcc_mean:.4f}\n"
65
+
66
+ # Create visualization
67
+ fig, axes = plt.subplots(2, 1, figsize=(10, 6))
68
+
69
+ # Waveform
70
+ time = np.linspace(0, len(y) / sr, len(y))
71
+ axes[0].plot(time, y, linewidth=0.5)
72
+ axes[0].set_title('Waveform')
73
+ axes[0].set_xlabel('Time (s)')
74
+ axes[0].grid(True, alpha=0.3)
75
+
76
+ # Spectrogram
77
+ D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
78
+ librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz', ax=axes[1])
79
+ axes[1].set_title('Spectrogram')
80
 
81
+ plt.tight_layout()
 
82
 
83
+ buf = io.BytesIO()
84
+ plt.savefig(buf, format='png', dpi=100, bbox_inches='tight')
85
+ buf.seek(0)
86
+ img = Image.open(buf)
87
+ plt.close()
88
 
89
+ return img, output
 
 
 
90
 
 
91
  except Exception as e:
92
  return None, f"Error: {str(e)}"
93
 
94
+ # Create interface
95
+ iface = gr.Interface(
96
+ fn=analyze_audio,
97
+ inputs=[
98
+ gr.Audio(label="Upload Audio File", type="filepath"),
99
+ gr.Checkbox(label="SNR (Signal-to-Noise Ratio)", value=True),
100
+ gr.Checkbox(label="RMS Energy", value=True),
101
+ gr.Checkbox(label="Peak Level", value=True),
102
+ gr.Checkbox(label="THD (Total Harmonic Distortion)", value=True),
103
+ gr.Checkbox(label="Spectral Centroid", value=False),
104
+ gr.Checkbox(label="MFCC", value=False),
105
+ ],
106
+ outputs=[
107
+ gr.Image(label="Visualization", type="pil"),
108
+ gr.Textbox(label="Metrics", lines=10)
109
+ ],
110
+ title="Audio Quality Assessment",
111
+ description="Upload an audio file and select metrics to analyze"
112
+ )
113
+
114
+ iface.launch(share=True)