Spaces:
Build error
Build error
| import gradio as gr | |
| import numpy as np | |
| import librosa | |
| import matplotlib.pyplot as plt | |
| import io | |
| from PIL import Image | |
| from scipy.stats import kurtosis, skew | |
| def analyze_audio(audio_file, snr, rms, peak, thd, spectral, mfcc_check): | |
| if audio_file is None: | |
| return None, "Please upload an audio file" | |
| try: | |
| # Load audio | |
| y, sr = librosa.load(audio_file, sr=None, mono=True) | |
| if len(y) < sr * 0.1: | |
| return None, "Audio file too short" | |
| # Calculate metrics | |
| results = [] | |
| # Basic metrics | |
| noise_floor = np.percentile(np.abs(y), 5) | |
| signal_power = np.mean(y ** 2) | |
| noise_power = noise_floor ** 2 | |
| snr_val = 10 * np.log10(signal_power / (noise_power + 1e-10)) | |
| rms_val = np.sqrt(np.mean(y ** 2)) | |
| peak_val = 20 * np.log10(np.max(np.abs(y)) + 1e-10) | |
| # THD | |
| fft = np.abs(np.fft.rfft(y)) | |
| if len(fft) > 1: | |
| fundamental_idx = np.argmax(fft[1:]) + 1 | |
| fundamental_power = fft[fundamental_idx] ** 2 | |
| harmonic_indices = [i * fundamental_idx for i in range(2, 6) if i * fundamental_idx < len(fft)] | |
| harmonic_power = np.sum([fft[idx] ** 2 for idx in harmonic_indices]) | |
| thd_val = np.sqrt(harmonic_power / (fundamental_power + 1e-10)) * 100 | |
| else: | |
| thd_val = 0.0 | |
| # Spectral | |
| spec_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)[0]) | |
| # MFCC | |
| mfcc_feat = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) | |
| mfcc_mean = np.mean(mfcc_feat) | |
| # Build output | |
| output = "## Audio Quality Metrics\n\n" | |
| if snr: | |
| output += f"**SNR:** {snr_val:.2f} dB\n" | |
| if rms: | |
| output += f"**RMS Energy:** {rms_val:.4f}\n" | |
| if peak: | |
| output += f"**Peak Level:** {peak_val:.2f} dB\n" | |
| if thd: | |
| output += f"**THD:** {thd_val:.2f}%\n" | |
| if spectral: | |
| output += f"**Spectral Centroid:** {spec_centroid:.2f} Hz\n" | |
| if mfcc_check: | |
| output += f"**MFCC Mean:** {mfcc_mean:.4f}\n" | |
| # Create visualization | |
| fig, axes = plt.subplots(2, 1, figsize=(10, 6)) | |
| # Waveform | |
| time = np.linspace(0, len(y) / sr, len(y)) | |
| axes[0].plot(time, y, linewidth=0.5) | |
| axes[0].set_title('Waveform') | |
| axes[0].set_xlabel('Time (s)') | |
| axes[0].grid(True, alpha=0.3) | |
| # Spectrogram | |
| D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max) | |
| librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz', ax=axes[1]) | |
| axes[1].set_title('Spectrogram') | |
| plt.tight_layout() | |
| buf = io.BytesIO() | |
| plt.savefig(buf, format='png', dpi=100, bbox_inches='tight') | |
| buf.seek(0) | |
| img = Image.open(buf) | |
| plt.close() | |
| return img, output | |
| except Exception as e: | |
| return None, f"Error: {str(e)}" | |
| # Create interface | |
| iface = gr.Interface( | |
| fn=analyze_audio, | |
| inputs=[ | |
| gr.Audio(label="Upload Audio File", type="filepath"), | |
| gr.Checkbox(label="SNR (Signal-to-Noise Ratio)", value=True), | |
| gr.Checkbox(label="RMS Energy", value=True), | |
| gr.Checkbox(label="Peak Level", value=True), | |
| gr.Checkbox(label="THD (Total Harmonic Distortion)", value=True), | |
| gr.Checkbox(label="Spectral Centroid", value=False), | |
| gr.Checkbox(label="MFCC", value=False), | |
| ], | |
| outputs=[ | |
| gr.Image(label="Visualization", type="pil"), | |
| gr.Textbox(label="Metrics", lines=10) | |
| ], | |
| title="Audio Quality Assessment", | |
| description="Upload an audio file and select metrics to analyze" | |
| ) | |
| iface.launch(share=True) | |