Aynursusuz's picture
Ultra simple Interface version
8b61ed4
import gradio as gr
import numpy as np
import librosa
import matplotlib.pyplot as plt
import io
from PIL import Image
from scipy.stats import kurtosis, skew
def analyze_audio(audio_file, snr, rms, peak, thd, spectral, mfcc_check):
if audio_file is None:
return None, "Please upload an audio file"
try:
# Load audio
y, sr = librosa.load(audio_file, sr=None, mono=True)
if len(y) < sr * 0.1:
return None, "Audio file too short"
# Calculate metrics
results = []
# Basic metrics
noise_floor = np.percentile(np.abs(y), 5)
signal_power = np.mean(y ** 2)
noise_power = noise_floor ** 2
snr_val = 10 * np.log10(signal_power / (noise_power + 1e-10))
rms_val = np.sqrt(np.mean(y ** 2))
peak_val = 20 * np.log10(np.max(np.abs(y)) + 1e-10)
# THD
fft = np.abs(np.fft.rfft(y))
if len(fft) > 1:
fundamental_idx = np.argmax(fft[1:]) + 1
fundamental_power = fft[fundamental_idx] ** 2
harmonic_indices = [i * fundamental_idx for i in range(2, 6) if i * fundamental_idx < len(fft)]
harmonic_power = np.sum([fft[idx] ** 2 for idx in harmonic_indices])
thd_val = np.sqrt(harmonic_power / (fundamental_power + 1e-10)) * 100
else:
thd_val = 0.0
# Spectral
spec_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)[0])
# MFCC
mfcc_feat = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
mfcc_mean = np.mean(mfcc_feat)
# Build output
output = "## Audio Quality Metrics\n\n"
if snr:
output += f"**SNR:** {snr_val:.2f} dB\n"
if rms:
output += f"**RMS Energy:** {rms_val:.4f}\n"
if peak:
output += f"**Peak Level:** {peak_val:.2f} dB\n"
if thd:
output += f"**THD:** {thd_val:.2f}%\n"
if spectral:
output += f"**Spectral Centroid:** {spec_centroid:.2f} Hz\n"
if mfcc_check:
output += f"**MFCC Mean:** {mfcc_mean:.4f}\n"
# Create visualization
fig, axes = plt.subplots(2, 1, figsize=(10, 6))
# Waveform
time = np.linspace(0, len(y) / sr, len(y))
axes[0].plot(time, y, linewidth=0.5)
axes[0].set_title('Waveform')
axes[0].set_xlabel('Time (s)')
axes[0].grid(True, alpha=0.3)
# Spectrogram
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz', ax=axes[1])
axes[1].set_title('Spectrogram')
plt.tight_layout()
buf = io.BytesIO()
plt.savefig(buf, format='png', dpi=100, bbox_inches='tight')
buf.seek(0)
img = Image.open(buf)
plt.close()
return img, output
except Exception as e:
return None, f"Error: {str(e)}"
# Create interface
iface = gr.Interface(
fn=analyze_audio,
inputs=[
gr.Audio(label="Upload Audio File", type="filepath"),
gr.Checkbox(label="SNR (Signal-to-Noise Ratio)", value=True),
gr.Checkbox(label="RMS Energy", value=True),
gr.Checkbox(label="Peak Level", value=True),
gr.Checkbox(label="THD (Total Harmonic Distortion)", value=True),
gr.Checkbox(label="Spectral Centroid", value=False),
gr.Checkbox(label="MFCC", value=False),
],
outputs=[
gr.Image(label="Visualization", type="pil"),
gr.Textbox(label="Metrics", lines=10)
],
title="Audio Quality Assessment",
description="Upload an audio file and select metrics to analyze"
)
iface.launch(share=True)