Spaces:
Build error
Build error
Commit ·
8b61ed4
1
Parent(s): 0676533
Ultra simple Interface version
Browse files
app.py
CHANGED
|
@@ -6,205 +6,109 @@ import io
|
|
| 6 |
from PIL import Image
|
| 7 |
from scipy.stats import kurtosis, skew
|
| 8 |
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
"Spectral Bandwidth (Hz)", "Spectral Flatness", "Spectral Contrast (Mean)",
|
| 13 |
-
"Harmonic-to-Noise Ratio (dB)", "THD (%)", "Tempo (BPM)", "Onset Strength (Mean)",
|
| 14 |
-
"Kurtosis", "Skewness", "Fundamental Frequency (Hz)", "Loudness (approx LUFS)",
|
| 15 |
-
"Silence Ratio", "Clipping Ratio (%)", "MFCC Mean", "MFCC Std Dev",
|
| 16 |
-
"Chroma Mean", "Chroma Std Dev"
|
| 17 |
-
]
|
| 18 |
-
|
| 19 |
-
def calculate_metrics(y, sr):
|
| 20 |
-
metrics = {}
|
| 21 |
-
|
| 22 |
-
# Amplitude & Energy
|
| 23 |
-
noise_floor = np.percentile(np.abs(y), 5)
|
| 24 |
-
signal_power = np.mean(y ** 2)
|
| 25 |
-
noise_power = noise_floor ** 2
|
| 26 |
-
snr = 10 * np.log10(signal_power / (noise_power + 1e-10))
|
| 27 |
-
metrics['SNR (dB)'] = round(snr, 2)
|
| 28 |
-
|
| 29 |
-
rms = np.sqrt(np.mean(y ** 2))
|
| 30 |
-
metrics['RMS Energy'] = round(rms, 4)
|
| 31 |
-
metrics['Peak Level (dB)'] = round(20 * np.log10(np.max(np.abs(y)) + 1e-10), 2)
|
| 32 |
-
metrics['Crest Factor'] = round(np.max(np.abs(y)) / (rms + 1e-10), 2)
|
| 33 |
-
|
| 34 |
-
dynamic_range = 20 * np.log10(np.max(np.abs(y)) / (np.min(np.abs(y[y != 0])) + 1e-10))
|
| 35 |
-
metrics['Dynamic Range (dB)'] = round(dynamic_range, 2)
|
| 36 |
-
|
| 37 |
-
# Spectral
|
| 38 |
-
metrics['Zero Crossing Rate'] = round(np.mean(librosa.feature.zero_crossing_rate(y)), 4)
|
| 39 |
-
metrics['Spectral Centroid (Hz)'] = round(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)[0]), 2)
|
| 40 |
-
metrics['Spectral Rolloff (Hz)'] = round(np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)[0]), 2)
|
| 41 |
-
metrics['Spectral Bandwidth (Hz)'] = round(np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr)[0]), 2)
|
| 42 |
-
metrics['Spectral Flatness'] = round(np.mean(librosa.feature.spectral_flatness(y=y)[0]), 4)
|
| 43 |
-
metrics['Spectral Contrast (Mean)'] = round(np.mean(librosa.feature.spectral_contrast(y=y, sr=sr)), 2)
|
| 44 |
-
|
| 45 |
-
# Harmonic
|
| 46 |
-
y_harmonic, y_percussive = librosa.effects.hpss(y)
|
| 47 |
-
harmonic_power = np.mean(y_harmonic ** 2)
|
| 48 |
-
percussive_power = np.mean(y_percussive ** 2)
|
| 49 |
-
hnr = 10 * np.log10((harmonic_power + 1e-10) / (percussive_power + 1e-10))
|
| 50 |
-
metrics['Harmonic-to-Noise Ratio (dB)'] = round(hnr, 2)
|
| 51 |
-
|
| 52 |
-
fft = np.abs(np.fft.rfft(y))
|
| 53 |
-
if len(fft) > 1:
|
| 54 |
-
fundamental_idx = np.argmax(fft[1:]) + 1
|
| 55 |
-
fundamental_power = fft[fundamental_idx] ** 2
|
| 56 |
-
harmonic_indices = [i * fundamental_idx for i in range(2, 6) if i * fundamental_idx < len(fft)]
|
| 57 |
-
harmonic_power = np.sum([fft[idx] ** 2 for idx in harmonic_indices])
|
| 58 |
-
thd = np.sqrt(harmonic_power / (fundamental_power + 1e-10)) * 100
|
| 59 |
-
metrics['THD (%)'] = round(min(thd, 100), 2)
|
| 60 |
-
else:
|
| 61 |
-
metrics['THD (%)'] = 0.0
|
| 62 |
-
|
| 63 |
-
# Temporal
|
| 64 |
-
try:
|
| 65 |
-
tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
|
| 66 |
-
metrics['Tempo (BPM)'] = round(float(tempo), 1)
|
| 67 |
-
except:
|
| 68 |
-
metrics['Tempo (BPM)'] = 0.0
|
| 69 |
-
|
| 70 |
-
metrics['Onset Strength (Mean)'] = round(np.mean(librosa.onset.onset_strength(y=y, sr=sr)), 4)
|
| 71 |
-
|
| 72 |
-
# Statistical
|
| 73 |
-
metrics['Kurtosis'] = round(kurtosis(y), 2)
|
| 74 |
-
metrics['Skewness'] = round(skew(y), 2)
|
| 75 |
|
| 76 |
-
# Frequency
|
| 77 |
try:
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
metrics['Fundamental Frequency (Hz)'] = round(f0_mean, 2) if not np.isnan(f0_mean) else 0.0
|
| 81 |
-
except:
|
| 82 |
-
metrics['Fundamental Frequency (Hz)'] = 0.0
|
| 83 |
-
|
| 84 |
-
# Perceptual
|
| 85 |
-
metrics['Loudness (approx LUFS)'] = round(20 * np.log10(rms + 1e-10), 2)
|
| 86 |
-
silence_threshold = 0.01
|
| 87 |
-
metrics['Silence Ratio'] = round(np.sum(np.abs(y) < silence_threshold) / len(y), 4)
|
| 88 |
-
clipping_threshold = 0.99
|
| 89 |
-
metrics['Clipping Ratio (%)'] = round(np.sum(np.abs(y) > clipping_threshold) / len(y) * 100, 4)
|
| 90 |
-
|
| 91 |
-
# MFCC & Chroma
|
| 92 |
-
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
|
| 93 |
-
metrics['MFCC Mean'] = round(np.mean(mfcc), 4)
|
| 94 |
-
metrics['MFCC Std Dev'] = round(np.std(mfcc), 4)
|
| 95 |
-
chroma = librosa.feature.chroma_stft(y=y, sr=sr)
|
| 96 |
-
metrics['Chroma Mean'] = round(np.mean(chroma), 4)
|
| 97 |
-
metrics['Chroma Std Dev'] = round(np.std(chroma), 4)
|
| 98 |
-
|
| 99 |
-
# Quality Score
|
| 100 |
-
quality_score = (
|
| 101 |
-
min(max(snr, 0), 40) / 40 * 20 +
|
| 102 |
-
(1 - min(metrics['THD (%)'], 10) / 10) * 15 +
|
| 103 |
-
min(max(dynamic_range, 0), 60) / 60 * 15 +
|
| 104 |
-
min(rms, 0.7) / 0.7 * 10 +
|
| 105 |
-
(1 - metrics['Spectral Flatness']) * 10 +
|
| 106 |
-
(1 - min(metrics['Clipping Ratio (%)'], 100) / 100) * 15 +
|
| 107 |
-
min(max(hnr, -10), 20) / 30 * 15
|
| 108 |
-
)
|
| 109 |
-
metrics['Overall Quality Score'] = round(quality_score, 1)
|
| 110 |
-
|
| 111 |
-
return metrics
|
| 112 |
-
|
| 113 |
-
def create_viz(y, sr, selected):
|
| 114 |
-
show_wave = any(m in selected for m in ["SNR (dB)", "RMS Energy", "Peak Level (dB)"])
|
| 115 |
-
show_spec = "Spectral Centroid (Hz)" in selected
|
| 116 |
-
show_mel = "Spectral Flatness" in selected
|
| 117 |
-
show_mfcc = "MFCC Mean" in selected
|
| 118 |
-
|
| 119 |
-
plots = [show_wave, show_spec, show_mel, show_mfcc]
|
| 120 |
-
num = sum(plots) or 2
|
| 121 |
-
|
| 122 |
-
fig, axes = plt.subplots((num + 1) // 2, 2, figsize=(12, 4 * ((num + 1) // 2)))
|
| 123 |
-
if num == 1:
|
| 124 |
-
axes = [axes]
|
| 125 |
-
else:
|
| 126 |
-
axes = axes.flatten()
|
| 127 |
-
|
| 128 |
-
idx = 0
|
| 129 |
-
|
| 130 |
-
if show_wave:
|
| 131 |
-
axes[idx].plot(np.linspace(0, len(y)/sr, len(y)), y, linewidth=0.5)
|
| 132 |
-
axes[idx].set_title('Waveform')
|
| 133 |
-
axes[idx].set_xlabel('Time (s)')
|
| 134 |
-
axes[idx].grid(True, alpha=0.3)
|
| 135 |
-
idx += 1
|
| 136 |
-
|
| 137 |
-
if show_spec:
|
| 138 |
-
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
|
| 139 |
-
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz', ax=axes[idx])
|
| 140 |
-
axes[idx].set_title('Spectrogram')
|
| 141 |
-
idx += 1
|
| 142 |
-
|
| 143 |
-
if show_mel:
|
| 144 |
-
S = librosa.feature.melspectrogram(y=y, sr=sr)
|
| 145 |
-
S_dB = librosa.power_to_db(S, ref=np.max)
|
| 146 |
-
librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', ax=axes[idx])
|
| 147 |
-
axes[idx].set_title('Mel Spectrogram')
|
| 148 |
-
idx += 1
|
| 149 |
-
|
| 150 |
-
if show_mfcc:
|
| 151 |
-
mfcc = librosa.feature.mfcc(y=y, sr=sr)
|
| 152 |
-
librosa.display.specshow(mfcc, sr=sr, x_axis='time', ax=axes[idx])
|
| 153 |
-
axes[idx].set_title('MFCC')
|
| 154 |
-
idx += 1
|
| 155 |
-
|
| 156 |
-
for i in range(idx, len(axes)):
|
| 157 |
-
axes[i].axis('off')
|
| 158 |
-
|
| 159 |
-
plt.tight_layout()
|
| 160 |
-
buf = io.BytesIO()
|
| 161 |
-
plt.savefig(buf, format='png', dpi=150, bbox_inches='tight')
|
| 162 |
-
buf.seek(0)
|
| 163 |
-
img = Image.open(buf)
|
| 164 |
-
plt.close()
|
| 165 |
-
return img
|
| 166 |
-
|
| 167 |
-
def analyze(audio, selected):
|
| 168 |
-
if not audio or not selected:
|
| 169 |
-
return None, "Please upload audio and select metrics"
|
| 170 |
|
| 171 |
-
try:
|
| 172 |
-
y, sr = librosa.load(audio, sr=None, mono=True)
|
| 173 |
if len(y) < sr * 0.1:
|
| 174 |
-
return None, "Audio too short"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
-
|
| 177 |
-
viz = create_viz(y, sr, selected)
|
| 178 |
|
| 179 |
-
|
| 180 |
-
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
-
|
| 183 |
-
for k, v in all_metrics.items():
|
| 184 |
-
if k in selected or k == "Overall Quality Score":
|
| 185 |
-
output += f"**{k}:** {v}\n"
|
| 186 |
|
| 187 |
-
return viz, output
|
| 188 |
except Exception as e:
|
| 189 |
return None, f"Error: {str(e)}"
|
| 190 |
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
|
|
|
|
|
| 6 |
from PIL import Image
|
| 7 |
from scipy.stats import kurtosis, skew
|
| 8 |
|
| 9 |
+
def analyze_audio(audio_file, snr, rms, peak, thd, spectral, mfcc_check):
|
| 10 |
+
if audio_file is None:
|
| 11 |
+
return None, "Please upload an audio file"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
|
|
|
| 13 |
try:
|
| 14 |
+
# Load audio
|
| 15 |
+
y, sr = librosa.load(audio_file, sr=None, mono=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
|
|
|
|
|
|
| 17 |
if len(y) < sr * 0.1:
|
| 18 |
+
return None, "Audio file too short"
|
| 19 |
+
|
| 20 |
+
# Calculate metrics
|
| 21 |
+
results = []
|
| 22 |
+
|
| 23 |
+
# Basic metrics
|
| 24 |
+
noise_floor = np.percentile(np.abs(y), 5)
|
| 25 |
+
signal_power = np.mean(y ** 2)
|
| 26 |
+
noise_power = noise_floor ** 2
|
| 27 |
+
snr_val = 10 * np.log10(signal_power / (noise_power + 1e-10))
|
| 28 |
+
|
| 29 |
+
rms_val = np.sqrt(np.mean(y ** 2))
|
| 30 |
+
peak_val = 20 * np.log10(np.max(np.abs(y)) + 1e-10)
|
| 31 |
+
|
| 32 |
+
# THD
|
| 33 |
+
fft = np.abs(np.fft.rfft(y))
|
| 34 |
+
if len(fft) > 1:
|
| 35 |
+
fundamental_idx = np.argmax(fft[1:]) + 1
|
| 36 |
+
fundamental_power = fft[fundamental_idx] ** 2
|
| 37 |
+
harmonic_indices = [i * fundamental_idx for i in range(2, 6) if i * fundamental_idx < len(fft)]
|
| 38 |
+
harmonic_power = np.sum([fft[idx] ** 2 for idx in harmonic_indices])
|
| 39 |
+
thd_val = np.sqrt(harmonic_power / (fundamental_power + 1e-10)) * 100
|
| 40 |
+
else:
|
| 41 |
+
thd_val = 0.0
|
| 42 |
+
|
| 43 |
+
# Spectral
|
| 44 |
+
spec_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)[0])
|
| 45 |
+
|
| 46 |
+
# MFCC
|
| 47 |
+
mfcc_feat = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
|
| 48 |
+
mfcc_mean = np.mean(mfcc_feat)
|
| 49 |
+
|
| 50 |
+
# Build output
|
| 51 |
+
output = "## Audio Quality Metrics\n\n"
|
| 52 |
+
|
| 53 |
+
if snr:
|
| 54 |
+
output += f"**SNR:** {snr_val:.2f} dB\n"
|
| 55 |
+
if rms:
|
| 56 |
+
output += f"**RMS Energy:** {rms_val:.4f}\n"
|
| 57 |
+
if peak:
|
| 58 |
+
output += f"**Peak Level:** {peak_val:.2f} dB\n"
|
| 59 |
+
if thd:
|
| 60 |
+
output += f"**THD:** {thd_val:.2f}%\n"
|
| 61 |
+
if spectral:
|
| 62 |
+
output += f"**Spectral Centroid:** {spec_centroid:.2f} Hz\n"
|
| 63 |
+
if mfcc_check:
|
| 64 |
+
output += f"**MFCC Mean:** {mfcc_mean:.4f}\n"
|
| 65 |
+
|
| 66 |
+
# Create visualization
|
| 67 |
+
fig, axes = plt.subplots(2, 1, figsize=(10, 6))
|
| 68 |
+
|
| 69 |
+
# Waveform
|
| 70 |
+
time = np.linspace(0, len(y) / sr, len(y))
|
| 71 |
+
axes[0].plot(time, y, linewidth=0.5)
|
| 72 |
+
axes[0].set_title('Waveform')
|
| 73 |
+
axes[0].set_xlabel('Time (s)')
|
| 74 |
+
axes[0].grid(True, alpha=0.3)
|
| 75 |
+
|
| 76 |
+
# Spectrogram
|
| 77 |
+
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
|
| 78 |
+
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz', ax=axes[1])
|
| 79 |
+
axes[1].set_title('Spectrogram')
|
| 80 |
|
| 81 |
+
plt.tight_layout()
|
|
|
|
| 82 |
|
| 83 |
+
buf = io.BytesIO()
|
| 84 |
+
plt.savefig(buf, format='png', dpi=100, bbox_inches='tight')
|
| 85 |
+
buf.seek(0)
|
| 86 |
+
img = Image.open(buf)
|
| 87 |
+
plt.close()
|
| 88 |
|
| 89 |
+
return img, output
|
|
|
|
|
|
|
|
|
|
| 90 |
|
|
|
|
| 91 |
except Exception as e:
|
| 92 |
return None, f"Error: {str(e)}"
|
| 93 |
|
| 94 |
+
# Create interface
|
| 95 |
+
iface = gr.Interface(
|
| 96 |
+
fn=analyze_audio,
|
| 97 |
+
inputs=[
|
| 98 |
+
gr.Audio(label="Upload Audio File", type="filepath"),
|
| 99 |
+
gr.Checkbox(label="SNR (Signal-to-Noise Ratio)", value=True),
|
| 100 |
+
gr.Checkbox(label="RMS Energy", value=True),
|
| 101 |
+
gr.Checkbox(label="Peak Level", value=True),
|
| 102 |
+
gr.Checkbox(label="THD (Total Harmonic Distortion)", value=True),
|
| 103 |
+
gr.Checkbox(label="Spectral Centroid", value=False),
|
| 104 |
+
gr.Checkbox(label="MFCC", value=False),
|
| 105 |
+
],
|
| 106 |
+
outputs=[
|
| 107 |
+
gr.Image(label="Visualization", type="pil"),
|
| 108 |
+
gr.Textbox(label="Metrics", lines=10)
|
| 109 |
+
],
|
| 110 |
+
title="Audio Quality Assessment",
|
| 111 |
+
description="Upload an audio file and select metrics to analyze"
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
iface.launch(share=True)
|